From 9ec5b9bab1b41cd2869b583d28afcffbb429d702 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 18 Sep 2007 23:40:42 +0000 Subject: first_html_par: make Hpricot handling more robust --- lib/rbot/core/utils/utils.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/rbot') diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index 335d0506..0582cd4b 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -513,7 +513,8 @@ module ::Irc if pre_h.nil? pre_h = Hpricot::Elements[] found_h = false - doc.root.search("*") { |e| + doc.search("*") { |e| + next if e.bogusetag? case e.pathname when /^h\d/ found_h = true @@ -558,7 +559,8 @@ module ::Irc # we don't need if by_span.nil? by_span = Hpricot::Elements[] - doc.root.search("*") { |el| + doc.search("*") { |el| + next if el.bogusetag? by_span.push el if el.pathname =~ AFTER_PAR_PATH and (el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS) } debug "other \#1: found: #{by_span.pretty_inspect}" -- cgit v1.2.3