X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Fsearch.rb;h=aec8b1345611a2994a23f711e3573664ed7c6f81;hb=890cd1f8817a7ff8ad995d78091696429730a7c7;hp=28e3c8a3655c034f3e555644f65dff08b80ac1d1;hpb=09a5d3188420a10590917b7abc2de6cffb4327f5;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index 28e3c8a3..aec8b134 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -16,10 +16,13 @@ # for most languages GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q=" -GOOGLE_WAP_SEARCH = "http://www.google.com/wml/search?hl=en&q=" -GOOGLE_WAP_LINK = /(.*?)<\/a>/im -GOOGLE_CALC_RESULT = %r{
(Web definitions for .*?)
(.*?)
(.*?)\s-\s+(.*?)<\/a>/im
+GOOGLE_WAP_LINK = /(.*?)<\/a>/im
+GOOGLE_CALC_RESULT = %r{.*?]*>(.+?)}
+GOOGLE_COUNT_RESULT = %r{Results 1<\/b> - 10<\/b> of about (.*)<\/b> for}
+GOOGLE_DEF_RESULT = %r{]*>(Web definitions for .*?)
(.*?)
(.*?)\s-\s+]+>(.+?)<(br|/td)>}
class SearchPlugin < Plugin
Config.register Config::IntegerValue.new('google.hits',
@@ -43,6 +46,8 @@ class SearchPlugin < Plugin
"gcalc ]
to only search the national Wikipedia"
when "unpedia"
@@ -54,6 +59,10 @@ class SearchPlugin < Plugin
def google(m, params)
what = params[:words].to_s
+ if what.match(/^define:/)
+ return google_define(m, what, params)
+ end
+
searchfor = CGI.escape what
# This method is also called by other methods to restrict searching to some sites
if params[:site]
@@ -68,10 +77,11 @@ class SearchPlugin < Plugin
url = GOOGLE_WAP_SEARCH + site + searchfor
hits = params[:hits] || @bot.config['google.hits']
+ hits = 1 if params[:lucky]
first_pars = params[:firstpar] || @bot.config['google.first_par']
- single = (hits == 1 and first_pars == 1)
+ single = params[:lucky] || (hits == 1 and first_pars == 1)
begin
wml = @bot.httputil.get(url)
@@ -81,27 +91,42 @@ class SearchPlugin < Plugin
return
end
results = wml.scan(GOOGLE_WAP_LINK)
+
if results.length == 0
m.reply "no results found for #{what}"
return
end
+
single ||= (results.length==1)
+
urls = Array.new
+ n = 0
results = results[0...hits].map { |res|
- n = res[0]
- t = Utils.decode_html_entities res[2].gsub(filter, '').strip
- u = URI.unescape res[1]
+ n += 1
+ t = res[2].ircify_html(:img => "[%{src} %{alt} %{dimensions}]").strip
+ u = URI.unescape(res[0] || res[1])
urls.push(u)
- single ? u : "#{n}. #{Bold}#{t}#{Bold}: #{u}"
- }.join(" | ")
+ "%{n}%{b}%{t}%{b}%{sep}%{u}" % {
+ :n => (single ? "" : "#{n}. "),
+ :sep => (single ? " -- " : ": "),
+ :b => Bold, :t => t, :u => u
+ }
+ }
+
+ if params[:lucky]
+ m.reply results.first
+ return
+ end
+
+ result_string = results.join(" | ")
# If we return a single, full result, change the output to a more compact representation
if single
- m.reply "Result for %s: %s -- %s" % [what, results, Utils.get_first_pars(urls, first_pars)], :overlong => :truncate
+ m.reply "Result for %s: %s -- %s" % [what, result_string, Utils.get_first_pars(urls, first_pars)], :overlong => :truncate
return
end
- m.reply "Results for #{what}: #{results}", :split_at => /\s+\|\s+/
+ m.reply "Results for #{what}: #{result_string}", :split_at => /\s+\|\s+/
return unless first_pars > 0
@@ -109,10 +134,43 @@ class SearchPlugin < Plugin
end
+ def google_define(m, what, params)
+ begin
+ wml = @bot.httputil.get(GOOGLE_SEARCH + CGI.escape(what))
+ raise unless wml
+ rescue => e
+ m.reply "error googling for #{what}"
+ return
+ end
+
+ begin
+ related_index = wml.index(/Related phrases:/, 0)
+ raise unless related_index
+ defs_index = wml.index(/Definitions of /, related_index)
+ raise unless defs_index
+ defs_end = wml.index(/ e
+ m.reply "no results found for #{what}"
+ return
+ end
+
+ related = wml[related_index...defs_index]
+ defs = wml[defs_index...defs_end]
+
+ m.reply defs.ircify_html(:a_href => Underline), :split_at => (Underline + ' ')
+
+ end
+
+ def lucky(m, params)
+ params.merge!(:lucky => true)
+ google(m, params)
+ end
+
def gcalc(m, params)
what = params[:words].to_s
searchfor = CGI.escape(what)
-
+
debug "Getting gcalc thing: #{searchfor.inspect}"
url = GOOGLE_SEARCH + searchfor
@@ -124,24 +182,54 @@ class SearchPlugin < Plugin
end
debug "#{html.size} bytes of html recieved"
-
+
results = html.scan(GOOGLE_CALC_RESULT)
debug "results: #{results.inspect}"
-
+
if results.length != 1
m.reply "couldn't calculate #{what}"
return
end
-
+
result = results[0][0].ircify_html
debug "replying with: #{result.inspect}"
m.reply "#{result}"
end
+ def gcount(m, params)
+ what = params[:words].to_s
+ searchfor = CGI.escape(what)
+
+ debug "Getting gcount thing: #{searchfor.inspect}"
+ url = GOOGLE_SEARCH + searchfor
+
+ begin
+ html = @bot.httputil.get(url)
+ rescue => e
+ m.reply "error googlecounting #{what}"
+ return
+ end
+
+ debug "#{html.size} bytes of html recieved"
+
+ results = html.scan(GOOGLE_COUNT_RESULT)
+ debug "results: #{results.inspect}"
+
+ if results.length != 1
+ m.reply "couldn't count #{what}"
+ return
+ end
+
+ result = results[0][0].ircify_html
+ debug "replying with: #{result.inspect}"
+ m.reply "total results: #{result}"
+
+ end
+
def gdef(m, params)
what = params[:words].to_s
searchfor = CGI.escape("define " + what)
-
+
debug "Getting gdef thing: #{searchfor.inspect}"
url = GOOGLE_WAP_SEARCH + searchfor
@@ -155,16 +243,27 @@ class SearchPlugin < Plugin
debug html
results = html.scan(GOOGLE_DEF_RESULT)
debug "results: #{results.inspect}"
-
+
if results.length != 1
m.reply "couldn't find a definition for #{what} on Google"
return
end
-
- head = results[0][0].ircify_html
- text = results[0][1].ircify_html
- link = results[0][2]
+
+ gdef_link = "http://www.google.com" + CGI.unescapeHTML(results[0][0]) # could be used to extract all defs
+ head = results[0][1].ircify_html
+ text = results[0][2].ircify_html
+ link = results[0][3]
m.reply "#{head} -- #{link}\n#{text}"
+
+ ### gdef_link could be used for something like
+ # html_defs = @bot.httputil.get(gdef_link)
+ # related_index = html_defs.index(/Related phrases:/, 0)
+ # defs_index = html_defs.index(/Definitions of /, related_index)
+
+ # related = html_defs[related_index..defs_index]
+ # defs = html_defs[defs_index..-1]
+
+ # m.reply defs.gsub('
','