diff options
-rw-r--r-- | data/rbot/plugins/search.rb | 33 | ||||
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 28 |
2 files changed, 33 insertions, 28 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index 6fb1959a..e94661b0 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -82,37 +82,14 @@ class SearchPlugin < Plugin debug "Unable to retrieve #{url}" next end - # We get the first par after the first main heading, if possible - header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im) - txt = String.new - if header_found - debug "Found header: #{header_found[1].inspect}" - while txt.empty? - header_found = $' - candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] - break unless candidate - txt.replace candidate.ircify_html - end - end - # If we haven't found a first par yet, try to get it from the whole - # document - if txt.empty? - header_found = xml - while txt.empty? - candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] - break unless candidate - txt.replace candidate.ircify_html - header_found = $' - end - end - # Nothing yet, try title - if txt.empty? + par = Utils.ircify_first_html_par(xml) + if par.empty? debug "No first par found\n#{xml}" # FIXME only do this if the 'url' plugin is loaded - txt.replace @bot.plugins['url'].get_title_from_html(xml) - next if txt.empty? + par = @bot.plugins['url'].get_title_from_html(xml) + next if par.empty? end - m.reply "[#{idx}] #{txt}", :overlong => :truncate + m.reply "[#{idx}] #{par}", :overlong => :truncate first_pars -=1 end end diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index fc89e1c3..52375334 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -415,5 +415,33 @@ module ::Irc } end end + + # Try to grab and IRFify the first HTML par (<p> tag) in the given string. + # If possible, grab the one after the first h1 heading + def Utils.ircify_first_html_par(xml) + header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im) + txt = String.new + if header_found + debug "Found header: #{header_found[1].inspect}" + while txt.empty? + header_found = $' + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + end + end + # If we haven't found a first par yet, try to get it from the whole + # document + if txt.empty? + header_found = xml + while txt.empty? + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] + break unless candidate + txt = candidate.ircify_html + header_found = $' + end + end + return txt + end end end |