summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-01-28 23:16:43 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-01-28 23:16:43 +0000
commitedd7e0e88e10628cbfbf2e86decbfcfd7d2b53ae (patch)
treeeb5093a13e46f4754bc2497d5e3749e1236b6cd1
parente1556de8aa3d4cd29db17908d3a14bbb133474bd (diff)
Searches now can return the first paragraph of the first 'n' hits. Wikipedia and Google searches can be configured separately both concerning number of hits returned and number of 'first paragraph' returned
-rw-r--r--data/rbot/plugins/search.rb84
1 files changed, 78 insertions, 6 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index a498d47f..a035b831 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -4,17 +4,52 @@ Net::HTTP.version_1_2
GOOGLE_WAP_LINK = /<a accesskey="(\d)" href=".*?u=(.*?)">(.*?)<\/a>/im
+class ::String
+ def omissis_after(len)
+ if self.length > len
+ return self[0...len].sub(/\s+\S*$/,"...")
+ else
+ return self
+ end
+ end
+
+ def ircify_html
+ txt = self
+ txt.gsub!(/<\/?b\s*>/, "#{Bold}")
+ txt.gsub!(/<\/?i\s*>/, "#{Underline}")
+ ## This would be a nice addition, but the results are horrible
+ ## Maybe make it configurable?
+ # txt.gsub!(/<\/?a( [^>]*)?>/, "#{Reverse}")
+ txt.gsub!(/<\/?(p|br)>/, ' ')
+ txt.gsub!("\n", ' ')
+ txt.gsub!(/<[^>]+>/, '')
+ txt.gsub!(/\s+/, ' ')
+ return Utils.decode_html_entities(txt).strip!
+ end
+end
+
class SearchPlugin < Plugin
+ BotConfig.register BotConfigIntegerValue.new('google.hits',
+ :default => 3,
+ :desc => "Number of hits to return from Google searches")
+ BotConfig.register BotConfigIntegerValue.new('google.first_par',
+ :default => 0,
+ :desc => "When set to n > 0, the bot will return the first paragraph from the first n search hits")
+ BotConfig.register BotConfigIntegerValue.new('wikipedia.hits',
+ :default => 3,
+ :desc => "Number of hits to return from Wikipedia searches")
+ BotConfig.register BotConfigIntegerValue.new('wikipedia.first_par',
+ :default => 1,
+ :desc => "When set to n > 0, the bot will return the first paragraph from the first n wikipedia search hits")
+
def help(plugin, topic="")
case topic
- when "search"
- "search <string> => search google for <string>"
- when "google"
- "google <string> => search google for <string>"
+ when "search", "google"
+ "#{topic} <string> => search google for <string>"
when "wp"
"wp [<code>] <string> => search for <string> on Wikipedia. You can select a national <code> to only search the national Wikipedia"
else
- "search <string> (or: google <string>) => search google for <string> | wp <string> => search for <string> on Wikipedia"
+ "search <string> (or: google <string>) => search google for <string> | wp <string> => search for <string> on Wikipedia"
end
end
@@ -33,6 +68,7 @@ class SearchPlugin < Plugin
url = "http://www.google.com/wml/search?q=#{site}#{searchfor}"
+ hits = params[:hits] || @bot.config['google.hits']
begin
wml = @bot.httputil.get_cached(url)
@@ -45,21 +81,57 @@ class SearchPlugin < Plugin
m.reply "no results found for #{what}"
return
end
- results = results[0...3].map { |res|
+ urls = Array.new
+ results = results[0...hits].map { |res|
n = res[0]
t = Utils.decode_html_entities res[2].gsub(filter, '').strip
u = URI.unescape res[1]
+ urls.push(u)
"#{n}. #{Bold}#{t}#{Bold}: #{u}"
}.join(" | ")
m.reply "Results for #{what}: #{results}"
+
+ first_pars = params[:firstpar] || @bot.config['google.first_par']
+
+ idx = 0
+ while first_pars > 0 and urls.length > 0
+ url.replace(urls.shift)
+ idx += 1
+ xml = @bot.httputil.get_cached(url)
+ if xml.nil?
+ debug "Unable to retrieve #{url}"
+ next
+ end
+ # We get the first par after the first main heading, if possible
+ header_found = xml.match(/<h1( [^>]*)?>.*?<\/h1>/im)
+ txt = nil
+ if header_found
+ txt = header_found.post_match[/<p( [^>]*)?>.*?<\/p>/im]
+ end
+ # If we haven't found a first par yet, try to get it from the whole
+ # document
+ unless txt
+ txt = xml[/<p( [^>]*)?>.*?<\/p>/im]
+ end
+ # Nothing yet, give up
+ unless txt
+ debug "No first par found\n#{xml}"
+ next
+ end
+ m.reply "[#{idx}] #{txt.ircify_html}".omissis_after(400)
+ first_pars -=1
+ end
end
def wikipedia(m, params)
lang = params[:lang]
site = "#{lang.nil? ? '' : lang + '.'}wikipedia.org"
+ debug "Looking up things on #{site}"
params[:site] = site
params[:filter] = / - Wikipedia.*$/
+ params[:hits] = @bot.config['wikipedia.hits']
+ params[:firstpar] = @bot.config['wikipedia.first_par']
return google(m, params)
end
end