diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-04 23:44:56 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-04 23:44:56 +0000 |
commit | eb161b03f7b2012e1f33834220b9400848497173 (patch) | |
tree | f06436bc55ceeab886bd240bbd03b35df3eb9872 /data/rbot/plugins | |
parent | 609c06621ad4829afb88fcc31d7f74ce99b969bf (diff) |
More search.rb first_par fixups
Diffstat (limited to 'data/rbot/plugins')
-rw-r--r-- | data/rbot/plugins/search.rb | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index f5bab421..3e1066f7 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -115,6 +115,9 @@ class SearchPlugin < Plugin while first_pars > 0 and urls.length > 0 url.replace(urls.shift) idx += 1 + + # FIXME what happens if some big file is returned? We should share + # code with the url plugin to only retrieve partial file content! xml = @bot.httputil.get_cached(url) if xml.nil? debug "Unable to retrieve #{url}" @@ -127,26 +130,28 @@ class SearchPlugin < Plugin debug "Found header: #{header_found[1].inspect}" while txt.empty? header_found = $' - candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] break unless candidate - txt.replace candidate + txt.replace candidate.ircify_html end end # If we haven't found a first par yet, try to get it from the whole # document if txt.empty? - txt = xml[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html + header_found = xml while txt.empty? - header_found = $' - candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im].ircify_html + candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im] break unless candidate - txt.replace candidate + txt.replace candidate.ircify_html + header_found = $' end end - # Nothing yet, give up + # Nothing yet, try title if txt.empty? debug "No first par found\n#{xml}" - next + # FIXME only do this if the 'url' plugin is loaded + txt.replace @bot.plugins['url'].get_title_from_html(xml) + next if txt.empty? end m.reply "[#{idx}] #{txt}".omissis_after(400) first_pars -=1 |