diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 17:31:24 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-09-18 17:31:24 +0000 |
commit | adb212bdfc678af04fa438b42ec06047a13a8f2c (patch) | |
tree | 4d7f0be9c6eb9e5e5cbef2ffc3e6d921b0064452 /lib | |
parent | 663a2b1553d400bca97c4490c82822d93dcf0a24 (diff) |
first_html_par: build lists 'manually' when using Hpricot
Hpricot selectors (like doc/"css path") don't return elements in their natural (depth-first) order. Use custom searches from the root of the document to achieve this.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index 32b05700..9b678def 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -498,25 +498,26 @@ module ::Irc txt = String.new - h = %w{h1 h2 h3 h4 h5 h6} - p = %w{p} - ar = [] - h.each { |hx| - p.each { |px| - ar << "#{hx}~#{px}" - } - } - h_p_css = ar.join("|") - debug "css search: #{h_p_css}" - pre_h = pars = by_span = nil while true debug "Minimum number of spaces: #{min_spaces}" # Initial attempt: <p> that follows <h\d> - pre_h = doc/h_p_css if pre_h.nil? - debug "Hx: found: #{pre_h.pretty_inspect}" + if pre_h.nil? + pre_h = Hpricot::Elements[] + found_h = false + doc.root.search("*") { |e| + case e.pathname + when /^h\d/ + found_h = true + when 'p' + pre_h << e if found_h + end + } + debug "Hx: found: #{pre_h.pretty_inspect}" + end + pre_h.each { |p| debug p txt = p.to_html.ircify_html @@ -551,9 +552,8 @@ module ::Irc # we don't need if by_span.nil? by_span = Hpricot::Elements[] - pre_pars = doc/"div|span|td|tr|tbody|table" - pre_pars.each { |el| - by_span.push el if el[:class] =~ /body|message|text/i + doc.root.each("*") { |el| + by_span.push el if el.pathname =~ /^(?:div|span|td|tr|tbody|table)$/ and el[:class] =~ /body|message|text/i } debug "other \#1: found: #{by_span.pretty_inspect}" end |