From 63314f4987d7acc8a39a3680320e339cc94da66b Mon Sep 17 00:00:00 2001 From: Matthias Hecker Date: Fri, 3 Apr 2020 13:51:57 +0200 Subject: [PATCH] plugin(search): fix wolfram and gdef, removed some this removes gcount and gtime from the search plugin, google no longer provides this information easily --- data/rbot/plugins/search.rb | 94 ++++----------------------------- lib/rbot/core/utils/httputil.rb | 21 +++++++- 2 files changed, 31 insertions(+), 84 deletions(-) diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb index ec39c283..b48e937e 100644 --- a/data/rbot/plugins/search.rb +++ b/data/rbot/plugins/search.rb @@ -19,16 +19,12 @@ GOOGLE_SEARCH = "https://www.google.com/search?hl=en&oe=UTF-8&ie=UTF-8&gbv=1&q=" GOOGLE_WAP_SEARCH = "https://www.google.com/m/search?hl=en&ie=UTF-8&gbv=1&q=" GOOGLE_WAP_LINK = /]*>\s*]*>(.*?)\s*<\/div>/im GOOGLE_CALC_RESULT = />Calculator<\/span>(?:<\/?[^>]+>\s*)+([^<]+)/ -GOOGLE_COUNT_RESULT = %r{Results 1<\/b> - 10<\/b> of about (.*)<\/b> for} -GOOGLE_DEF_RESULT = %r{onebox_result">\s*(.*?)\s*
\s*(.*?)]+>(.+?)<(br|/td)>} - +GOOGLE_XPATH_DEF = "//img[@id='flex_text_audio_icon_chunk']/../../../../div[3]//text()" DDG_API_SEARCH = "http://api.duckduckgo.com/?format=xml&no_html=1&skip_disambig=1&no_redirect=0&q=" - WOLFRAM_API_SEARCH = "http://api.wolframalpha.com/v2/query?input=%{terms}&appid=%{key}&format=plaintext" + "&scantimeout=3.0&podtimeout=4.0&formattimeout=8.0&parsetimeout=5.0" + "&excludepodid=SeriesRepresentations:*" -WOLFRAM_API_KEY = "4EU37Y-TX9WJG3JH3" +WOLFRAM_API_KEY = "9RW6XR-QTL2JT7J4W" class SearchPlugin < Plugin Config.register Config::IntegerValue.new('duckduckgo.hits', @@ -61,8 +57,6 @@ class SearchPlugin < Plugin "gcalc => use the google calculator to find the answer to " when "gdef" "gdef => use the google define mechanism to find a definition of " - when "gtime" - "gtime => use the google clock to find the current time at " when "wa" "wa => searches WolframAlpha for " when "wp" @@ -312,36 +306,6 @@ class SearchPlugin < Plugin m.reply result.ircify_html end - def gcount(m, params) - what = params[:words].to_s - searchfor = CGI.escape(what) - - debug "Getting gcount thing: #{searchfor.inspect}" - url = GOOGLE_SEARCH + searchfor - - begin - html = @bot.httputil.get(url) - rescue => e - m.reply "error googlecounting #{what}" - return - end - - debug "#{html.size} bytes of html recieved" - - results = html.scan(GOOGLE_COUNT_RESULT) - debug "results: #{results.inspect}" - - if results.length != 1 - m.reply "couldn't count #{what}" - return - end - - result = results[0][0].ircify_html - debug "replying with: #{result.inspect}" - m.reply "total results: #{result}" - - end - def gdef(m, params) what = params[:words].to_s searchfor = CGI.escape("define " + what) @@ -350,24 +314,19 @@ class SearchPlugin < Plugin url = GOOGLE_WAP_SEARCH + searchfor begin - html = @bot.httputil.get(url) + resp = @bot.httputil.get(url, resp: true) rescue => e m.reply "error googledefining #{what}" return end - debug html - results = html.scan(GOOGLE_DEF_RESULT) - debug "results: #{results.inspect}" + results = resp.xpath(GOOGLE_XPATH_DEF).map(&:content) - if results.length != 1 + if results.empty? m.reply "couldn't find a definition for #{what} on Google" - return + else + m.reply "#{results.first} -- #{results[1..-1].join(' ')}" end - - head = results[0][0].ircify_html - text = results[0][1].ircify_html - m.reply "#{head} -- #{text}" end def wolfram(m, params) @@ -438,47 +397,16 @@ class SearchPlugin < Plugin params[:firstpar] = @bot.config['wikipedia.first_par'] return google(m, params) end - - def gtime(m, params) - where = params[:words].to_s - where.sub!(/^\s*in\s*/, '') - searchfor = CGI.escape("time in " + where) - url = GOOGLE_SEARCH + searchfor - - begin - html = @bot.httputil.get(url) - rescue => e - m.reply "Error googletiming #{where}" - return - end - - debug html - results = html.scan(GOOGLE_TIME_RESULT) - debug "results: #{results.inspect}" - - if results.length != 1 - m.reply "Couldn't find the time for #{where} on Google" - return - end - - time = results[0][0].ircify_html - m.reply "#{time}" - end end plugin = SearchPlugin.new - plugin.map "ddg *words", :action => 'duckduckgo', :threaded => true - plugin.map "search *words", :action => 'google', :threaded => true - plugin.map "google *words", :action => 'google', :threaded => true - plugin.map "lucky *words", :action => 'lucky', :threaded => true - -# Broken: -plugin.map "gcount *words", :action => 'gcount', :threaded => true - +plugin.map "ddg *words", :action => 'duckduckgo', :threaded => true +plugin.map "search *words", :action => 'google', :threaded => true +plugin.map "google *words", :action => 'google', :threaded => true +plugin.map "lucky *words", :action => 'lucky', :threaded => true plugin.map "gcalc *words", :action => 'gcalc', :threaded => true plugin.map "gdef *words", :action => 'gdef', :threaded => true -plugin.map "gtime *words", :action => 'gtime', :threaded => true plugin.map "wa *words", :action => 'wolfram', :threaded => true plugin.map "wp :lang *words", :action => 'wikipedia', :requirements => { :lang => /^\w\w\w?$/ }, :threaded => true plugin.map "wp *words", :action => 'wikipedia', :threaded => true diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb index f7871e5b..e629becb 100644 --- a/lib/rbot/core/utils/httputil.rb +++ b/lib/rbot/core/utils/httputil.rb @@ -29,6 +29,12 @@ rescue LoadError => e EOC end +begin + require 'nokogiri' +rescue LoadError => e + error "No nokogiri library found, some features might not be available!" +end + # To handle Gzipped pages require 'stringio' require 'zlib' @@ -163,6 +169,15 @@ module ::Net return self.body_to_utf(self.decompress_body(partial)) end + + def xpath(path) + document = Nokogiri::HTML.parse(self.body) + document.xpath(path) + end + + def to_json + JSON::parse(self.body) + end end end @@ -649,7 +664,11 @@ class HttpUtil resp = get_response(uri, options, &block) raise "http error: #{resp}" unless Net::HTTPOK === resp || Net::HTTPPartialContent === resp - return resp.body + if options[:resp] + return resp + else + return resp.body + end rescue Exception => e error e end -- 2.39.2