X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Fsearch.rb;h=f54c0df6bbadf558a8627783177315c22d0d674b;hb=95fab091ab9e2b42a6e8cfda72b231ef06971e39;hp=8b3b6848d11800084a22ac93a9b64d3c13802207;hpb=d372ec5babdbf5e6a9c49b21d16bcae74d0ff01c;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index 8b3b6848..f54c0df6 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -17,11 +17,10 @@ GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q=" GOOGLE_WAP_SEARCH = "http://www.google.com/m/search?hl=en&q=" -# GOOGLE_WAP_LINK = /(.*?)<\/a>/im -GOOGLE_WAP_LINK = /(.*?)<\/a>/im -GOOGLE_CALC_RESULT = %r{.*?

]*>(.+?)} +GOOGLE_WAP_LINK = /result">(?:]*>)?]*>(.*?)<\/a>/im +GOOGLE_CALC_RESULT = %r{.*?]*>(.+?)} GOOGLE_COUNT_RESULT = %r{Results 1<\/b> - 10<\/b> of about (.*)<\/b> for} -GOOGLE_DEF_RESULT = %r{]*>(Web definitions for .*?)
(.*?)
(.*?)\s-\s+
\s*(.*?)\s*
\s*(.*?)]+>(.+?)<(br|/td)>} class SearchPlugin < Plugin @@ -90,7 +89,7 @@ class SearchPlugin < Plugin m.reply "error googling for #{what}" return end - results = wml.match('

').pre_match.scan(GOOGLE_WAP_LINK) + results = wml.scan(GOOGLE_WAP_LINK) if results.length == 0 m.reply "no results found for #{what}" @@ -98,27 +97,46 @@ class SearchPlugin < Plugin end single ||= (results.length==1) + pretty = [] - urls = Array.new - n = 0 - results = results[0...hits].map { |res| - n += 1 - t = res[2].ircify_html(:img => "[%{src} %{alt} %{dimensions}]").strip - u = URI.unescape(res[0] || res[1]) - urls.push(u) - "%{n}%{b}%{t}%{b}%{sep}%{u}" % { - :n => (single ? "" : "#{n}. "), - :sep => (single ? " -- " : ": "), - :b => Bold, :t => t, :u => u - } - } + begin + urls = Array.new + + debug results + results.each do |res| + t = res[1].ircify_html(:img => "[%{src} %{alt} %{dimensions}]").strip + u = res[0] + if u.sub!(%r{^http://www.google.com/aclk\?},'') + u = CGI::parse(u)['adurl'].first + debug "skipping ad for #{u}" + next + elsif u.sub!(%r{^http://www.google.com/gwt/x\?},'') + u = CGI::parse(u)['u'].first + elsif u.sub!(%r{^/url\?},'') + u = CGI::parse(u)['q'].first + end + urls.push(u) + pretty.push("%{n}%{b}%{t}%{b}%{sep}%{u}" % { + :n => (single ? "" : "#{urls.length}. "), + :sep => (single ? " -- " : ": "), + :b => Bold, :t => t, :u => u + }) + break if urls.length == hits + end + rescue => e + m.reply "failed to understand what google found for #{what}" + error e + debug wml + debug results + return + end if params[:lucky] - m.reply results.first + m.reply pretty.first return end - result_string = results.join(" | ") + result_string = pretty.join(" | ") # If we return a single, full result, change the output to a more compact representation if single @@ -172,7 +190,7 @@ class SearchPlugin < Plugin searchfor = CGI.escape(what) debug "Getting gcalc thing: #{searchfor.inspect}" - url = GOOGLE_SEARCH + searchfor + url = GOOGLE_WAP_SEARCH + searchfor begin html = @bot.httputil.get(url) @@ -182,18 +200,19 @@ class SearchPlugin < Plugin end debug "#{html.size} bytes of html recieved" + debug html - results = html.scan(GOOGLE_CALC_RESULT) - debug "results: #{results.inspect}" + candidates = html.match(/font-weight:bold">(.*?)<\/(?:span|div)>/) + debug "candidates: #{candidates.inspect}" - if results.length != 1 + if candidates.nil? m.reply "couldn't calculate #{what}" return end + result = candidates[1] - result = results[0][0].ircify_html debug "replying with: #{result.inspect}" - m.reply "#{result}" + m.reply result.ircify_html end def gcount(m, params) @@ -249,21 +268,9 @@ class SearchPlugin < Plugin return end - gdef_link = "http://www.google.com" + CGI.unescapeHTML(results[0][0]) # could be used to extract all defs - head = results[0][1].ircify_html - text = results[0][2].ircify_html - link = results[0][3] - m.reply "#{head} -- #{link}\n#{text}" - - ### gdef_link could be used for something like - # html_defs = @bot.httputil.get(gdef_link) - # related_index = html_defs.index(/Related phrases:/, 0) - # defs_index = html_defs.index(/Definitions of /, related_index) - - # related = html_defs[related_index..defs_index] - # defs = html_defs[defs_index..-1] - - # m.reply defs.gsub('
','

  • ').ircify_html + head = results[0][0].ircify_html + text = results[0][1].ircify_html + m.reply "#{head} -- #{text}" end def wikipedia(m, params)