X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=bbdd462b4b560d38641a3b9566e448e4c08e7e17;hb=c671bf49230781ed80d9fa80577fed9b1b655a99;hp=c2b7d7b104fa9e8c1c0e9183390aa5b80e58edc4;hpb=63b0b9682c3d3636e47c48750a42f768b8e7574b;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git

diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index c2b7d7b1..bbdd462b 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -1,3 +1,17 @@
+#-- vim:sw=2:et
+#++
+#
+# :title: rbot utilities provider
+#
+# Author:: Tom Gilbert <tom@linuxbrit.co.uk>
+# Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
+#
+# Copyright:: (C) 2002-2006 Tom Gilbert
+# Copyright:: (C) 2007 Giuseppe Bilotta
+#
+# TODO some of these Utils should be rewritten as extensions to the approriate
+# standard Ruby classes and accordingly be moved to extends.rb
+
 require 'net/http'
 require 'uri'
 require 'tempfile'
@@ -314,6 +328,7 @@ module ::Irc
       secs_to_string_case(ret, hours, "hour", "hours") if hours > 0
       mins, secs = secs.divmod SEC_PER_MIN
       secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0
+      secs = secs.to_i
       secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty?
       case ret.length
       when 0
@@ -416,10 +431,11 @@ module ::Irc
       end
     end
 
-    H1_REGEX = /<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im
-    PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/p>/im
+    HX_REGEX = /<h(\d)(?:\s+[^>]*)?>.*?<\/h\1>/im
+    PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+    AFTER_PAR1_REGEX = /<\w+\s+[^>]*body[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
     # Try to grab and IRCify the first HTML par (<p> tag) in the given string.
-    # If possible, grab the one after the first h1 heading
+    # If possible, grab the one after the first heading
     #
     # It is possible to pass some options to determine how the stripping
     # occurs. Currently, only one option is supported:
@@ -431,7 +447,7 @@ module ::Irc
       strip = opts[:strip]
       strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
 
-      header_found = xml.match(H1_REGEX)
+      header_found = xml.match(HX_REGEX)
       if header_found
         header_found = $'
         debug "Found header: #{header_found[1].inspect}"
@@ -456,6 +472,19 @@ module ::Irc
 	  txt.sub!(strip, '') if strip
         end
       end
+
+      # Nothing yet ... let's get drastic: we ca
+      if txt.empty?
+	header_found = xml
+        while txt.empty? 
+          candidate = header_found[AFTER_PAR1_REGEX]
+          break unless candidate
+          txt = candidate.ircify_html
+          header_found = $'
+	  txt.sub!(strip, '') if strip
+        end
+      end
+
       return txt
     end