X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=bbdd462b4b560d38641a3b9566e448e4c08e7e17;hb=c671bf49230781ed80d9fa80577fed9b1b655a99;hp=c2b7d7b104fa9e8c1c0e9183390aa5b80e58edc4;hpb=63b0b9682c3d3636e47c48750a42f768b8e7574b;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index c2b7d7b1..bbdd462b 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -1,3 +1,17 @@ +#-- vim:sw=2:et +#++ +# +# :title: rbot utilities provider +# +# Author:: Tom Gilbert +# Author:: Giuseppe "Oblomov" Bilotta +# +# Copyright:: (C) 2002-2006 Tom Gilbert +# Copyright:: (C) 2007 Giuseppe Bilotta +# +# TODO some of these Utils should be rewritten as extensions to the approriate +# standard Ruby classes and accordingly be moved to extends.rb + require 'net/http' require 'uri' require 'tempfile' @@ -314,6 +328,7 @@ module ::Irc secs_to_string_case(ret, hours, "hour", "hours") if hours > 0 mins, secs = secs.divmod SEC_PER_MIN secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0 + secs = secs.to_i secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty? case ret.length when 0 @@ -416,10 +431,11 @@ module ::Irc end end - H1_REGEX = /]*)?>(.*?)<\/h1>/im - PAR_REGEX = /]*)?>.*?<\/p>/im + HX_REGEX = /]*)?>.*?<\/h\1>/im + PAR_REGEX = /]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im + AFTER_PAR1_REGEX = /<\w+\s+[^>]*body[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im # Try to grab and IRCify the first HTML par (

tag) in the given string. - # If possible, grab the one after the first h1 heading + # If possible, grab the one after the first heading # # It is possible to pass some options to determine how the stripping # occurs. Currently, only one option is supported: @@ -431,7 +447,7 @@ module ::Irc strip = opts[:strip] strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) - header_found = xml.match(H1_REGEX) + header_found = xml.match(HX_REGEX) if header_found header_found = $' debug "Found header: #{header_found[1].inspect}" @@ -456,6 +472,19 @@ module ::Irc txt.sub!(strip, '') if strip end end + + # Nothing yet ... let's get drastic: we ca + if txt.empty? + header_found = xml + while txt.empty? + candidate = header_found[AFTER_PAR1_REGEX] + break unless candidate + txt = candidate.ircify_html + header_found = $' + txt.sub!(strip, '') if strip + end + end + return txt end