+#-- vim:sw=2:et
+#++
+#
+# :title: rbot utilities provider
+#
+# Author:: Tom Gilbert <tom@linuxbrit.co.uk>
+# Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
+#
+# Copyright:: (C) 2002-2006 Tom Gilbert
+# Copyright:: (C) 2007 Giuseppe Bilotta
+#
+# TODO some of these Utils should be rewritten as extensions to the approriate
+# standard Ruby classes and accordingly be moved to extends.rb
+
require 'net/http'
require 'uri'
require 'tempfile'
secs_to_string_case(ret, hours, "hour", "hours") if hours > 0
mins, secs = secs.divmod SEC_PER_MIN
secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0
+ secs = secs.to_i
secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty?
case ret.length
when 0
end
end
- H1_REGEX = /<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im
- PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/p>/im
+ HX_REGEX = /<h(\d)(?:\s+[^>]*)?>.*?<\/h\1>/im
+ PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+ AFTER_PAR1_REGEX = /<\w+\s+[^>]*body[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
# Try to grab and IRCify the first HTML par (<p> tag) in the given string.
- # If possible, grab the one after the first h1 heading
+ # If possible, grab the one after the first heading
#
# It is possible to pass some options to determine how the stripping
# occurs. Currently, only one option is supported:
strip = opts[:strip]
strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
- header_found = xml.match(H1_REGEX)
+ header_found = xml.match(HX_REGEX)
if header_found
header_found = $'
debug "Found header: #{header_found[1].inspect}"
txt.sub!(strip, '') if strip
end
end
+
+ # Nothing yet ... let's get drastic: we ca
+ if txt.empty?
+ header_found = xml
+ while txt.empty?
+ candidate = header_found[AFTER_PAR1_REGEX]
+ break unless candidate
+ txt = candidate.ircify_html
+ header_found = $'
+ txt.sub!(strip, '') if strip
+ end
+ end
+
return txt
end