txt = String.new
- h = %w{h1 h2 h3 h4 h5 h6}
- p = %w{p}
- ar = []
- h.each { |hx|
- p.each { |px|
- ar << "#{hx}~#{px}"
- }
- }
- h_p_css = ar.join("|")
- debug "css search: #{h_p_css}"
-
pre_h = pars = by_span = nil
while true
debug "Minimum number of spaces: #{min_spaces}"
# Initial attempt: <p> that follows <h\d>
- pre_h = doc/h_p_css if pre_h.nil?
- debug "Hx: found: #{pre_h.pretty_inspect}"
+ if pre_h.nil?
+ pre_h = Hpricot::Elements[]
+ found_h = false
+ doc.root.search("*") { |e|
+ case e.pathname
+ when /^h\d/
+ found_h = true
+ when 'p'
+ pre_h << e if found_h
+ end
+ }
+ debug "Hx: found: #{pre_h.pretty_inspect}"
+ end
+
pre_h.each { |p|
debug p
txt = p.to_html.ircify_html
# we don't need
if by_span.nil?
by_span = Hpricot::Elements[]
- pre_pars = doc/"div|span|td|tr|tbody|table"
- pre_pars.each { |el|
- by_span.push el if el.class =~ /body|message|text/i
+ doc.root.each("*") { |el|
+ by_span.push el if el.pathname =~ /^(?:div|span|td|tr|tbody|table)$/ and el[:class] =~ /body|message|text/i
}
debug "other \#1: found: #{by_span.pretty_inspect}"
end
fragreg = /.*?<a\s+[^>]*name=["']?#{frag}["']?.*?>/im
txt.sub!(fragreg,'')
end
- content = Utils.ircify_first_html_par(txt, :strip => title)
+ c_opts = opts.dup
+ c_opts[:strip] ||= title
+ content = Utils.ircify_first_html_par(txt, c_opts)
content = nil if content.empty?
return {:title => title, :content => content}
end
url = urls.shift
idx += 1
- # FIXME what happens if some big file is returned? We should share
- # code with the url plugin to only retrieve partial file content!
- xml = self.bot.httputil.get(url)
- if xml.nil?
- debug "Unable to retrieve #{url}"
- next
- end
- par = Utils.ircify_first_html_par(xml, opts)
- if par.empty?
- debug "No first par found\n#{xml}"
- # FIXME only do this if the 'url' plugin is loaded
- # TODO even better, put the code here
- # par = @bot.plugins['url'].get_title_from_html(xml)
- if par.empty?
- retval.push(nil)
- next
+ begin
+ info = Utils.get_html_info(URI.parse(url), opts)
+
+ par = info[:content]
+ retval.push(par)
+
+ if par
+ msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg
+ count -=1
end
+ rescue
+ debug "Unable to retrieve #{url}: #{$!}"
+ next
end
- msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg
- count -=1
- retval.push(par)
end
return retval
end