plugin(oxford): fix result handling, closes #37

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / oxford.rb
diff --git a/data/rbot/plugins/oxford.rb b/data/rbot/plugins/oxford.rb

index 990417133485870684aec2d69774e26d853214f7..5c9b0e4eb4f481f0f0b6411fcac11acf1284430e 100644 (file)
--- a/data/rbot/plugins/oxford.rb
+++ b/data/rbot/plugins/oxford.rb
@@ -8,52 +8,80 @@
  # Copyright:: (C) 2006-2007 Giuseppe Bilotta
  # License:: GPL v2
  #
+require 'cgi'
+require 'uri'
  
  class OxfordPlugin < Plugin
-  Config.register Config::IntegerValue.new('oxford.hits',
-    :default => 3,
-    :desc => "Number of hits to return from a dictionary lookup")
-  Config.register Config::IntegerValue.new('oxford.first_par',
-    :default => 0,
-    :desc => "When set to n > 0, the bot will return the first paragraph from the first n dictionary hits")
+  Config.register Config::IntegerValue.new(
+    'oxford.max_lines',
+    :default => 1,
+    :desc => 'The number of lines to respond with.')
  
    def initialize
      super
-    @oxurl = "http://www.oxforddictionaries.com/definition/english/%s"
+    @base_url = "https://www.lexico.com"
    end
  
    def help(plugin, topic="")
-    'oxford <word>: check for <word> on the oxford english dictionary.'
+    'oxford <word>: check for <word> on the lexico english dictionary (powered by oxford english dictionary).'
    end
  
    def oxford(m, params)
-    justcheck = params[:justcheck]
+    word = params[:word].join(' ')
  
-    word = params[:word].join
-    [word, word + "_1"].each { |check|
-      url = @oxurl % CGI.escape(check)
-      if params[:british]
-        url << "?view=uk"
-      end
-      h = @bot.httputil.get(url, :max_redir => 5)
-      if h
-       defs = h.split("<span class=\"definition\">")
-       defs = defs[1..-1].map {|d| d.split("</span>")[0]}
-        if defs.size == 0
-         return false if justcheck
-         m.reply "#{word} not found"
-         return false
-       end
-       m.reply("#{word}: #{url}") unless justcheck
-       defn = defs[0]
-        m.reply("#{Bold}%s#{Bold}: %s" % [word, defn.ircify_html(:nbsp => :space)], :overlong => :truncate)
-        return true
+    url = "#{@base_url}/definition/#{URI::encode word}"
+
+    begin
+      debug "searching definition for #{word.inspect}"
+
+      response = @bot.httputil.get(url, resp: true)
+      definition = parse_definition(response)
+
+      # try to find alternative word (different spelling, typos, etc.)
+      if definition.empty?
+        debug "search for alternative spelling result"
+        url = title = nil
+        exact_matches = response.xpath('//div[@class="no-exact-matches"]//ul/li/a')
+        if not exact_matches.empty? and not exact_matches.first['href'].empty?
+          url = @base_url + exact_matches.first['href']
+          title = exact_matches.first.content
+        else
+          debug 'use web-service to find alternative result'
+          # alternatively attempt to use their webservice (json-p) instead
+          url = "#{@base_url}/search/dataset.js?dataset=noad&dictionary=en&query=#{CGI.escape word}"
+          response = @bot.httputil.get(url, headers: {'X-Requested-With': 'XMLHttpRequest'})
+          alternative = response.gsub(/\\/, '').scan(/href="([^"]+)">([^<]+)</)
+          url = @base_url + alternative.first[0]
+          title = alternative.first[1]
+        end
+
+        debug "search for alternative spelling result, returned title=#{title.inspect} url=#{url.inspect}"
+
+        if url and title
+          unless title.downcase == word.downcase
+            m.reply "did you mean: #{Bold}#{title.ircify_html}#{NormalText}?"
+          end
+          response = @bot.httputil.get(url, resp: true)
+          definition = parse_definition(response)
+        end
        end
-    }
+    rescue => e
+      m.reply "error accessing lexico url -> #{url}"
+      error e
+      return
+    end
+
+    unless definition.empty?
+      m.reply definition.ircify_html, max_lines: @bot.config['oxford.max_lines']
+    else
+      m.reply "couldn't find a definition for #{word} on oxford dictionary"
+    end
    end
  
-  def is_british?(word)
-    return oxford(nil, :word => word, :justcheck => true, :british => true)
+  private
+
+  def parse_definition(r)
+    r.xpath('//section[@class="gramb"]//text()').map(&:content).join(' ')
    end
  end