From 6cbe66cdd40ef0bc0d25ba37c22bb7e08404a09f Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 14 Oct 2006 10:02:43 +0000 Subject: Update demauro plugin to use the WAP interface: much better, much faster --- data/rbot/plugins/demauro.rb | 96 +++++++++++--------------------------------- 1 file changed, 23 insertions(+), 73 deletions(-) (limited to 'data') diff --git a/data/rbot/plugins/demauro.rb b/data/rbot/plugins/demauro.rb index 12a784ff..b502238e 100644 --- a/data/rbot/plugins/demauro.rb +++ b/data/rbot/plugins/demauro.rb @@ -1,92 +1,42 @@ require 'erb' +DEMAURO_LEMMA = /(.*?)(?: - (.*?))<\/anchor>/ class DeMauroPlugin < Plugin - include ERB::Util - def initialize super @dmurl = "http://www.demauroparavia.it/" + @wapurl = "http://wap.demauroparavia.it/" end def help(plugin, topic="") - return "demauro => fornisce il link della definizione della parola dal dizionario De Mauro/Paravia" + return "demauro => provides a link to the definition of the word from the Italian dictionary De Mauro/Paravia" end def demauro(m, params) - parola = params[:parola] - url = @dmurl + "cerca?stringa=#{url_encode(parola)}" - uri = URI.parse(url) - http = @bot.httputil.get_proxy(uri) - xml = nil - defurls = Array.new - begin - http.start() { |http| - resp = http.get(uri.request_uri()) - case resp.code - when "200" - xml = resp.body - when "302" - loc = resp['location'] - if loc =~ /#{@dmurl}\d+/ - defurls << loc - end - else - debug resp.to_a - end - } - rescue => e - debug "HttpUtil.get exception: #{e.inspect}, while trying to get #{uri}" - debug e.backtrace.join("\n") - m.reply "C'è stato un errore nella ricerca" + parola = params[:parola].downcase + url = @wapurl + "index.php?lemma=#{ERB::Util.url_encode(parola)}" + xml = @bot.httputil.get(url) + if xml.nil? + info = @bot.httputil.last_response + info = info ? "(#{info.code} - #{info.message})" : "" + m.reply "An error occurred while looking for #{parola}#{info}" + return + end + if xml=~ /Non ho trovato occorrenze per/ + m.reply "Nothing found for #{parola}" return end - if xml - if xml=~ /Non ho trovato occorrenze per/ - m.reply "Parola non trovata" - return - else - xml.gsub(/href="(\d+)"/) { |match| - debug match.to_a.join(" || ") - defurls << "#{@dmurl}#{$1}" - } - end + entries = xml.scan(DEMAURO_LEMMA) + text = parola + if !entries.assoc(parola) and !entries.assoc(parola.upcase) + text += " not found. Similar words" end - lemmas = Array.new - defurls.each { |url| - uri = URI.parse(url) - http = @bot.httputil.get_proxy(uri) - begin - debug "Scanning #{url}" - http.start() { |http| - resp = http.get(uri.request_uri()) - case resp.code - when "200" - debug "Got data" - matched = /(.*)<\/span>(.*?)<\/span>/.match(resp.body) - dirtylemma = matched[1] - qual = matched[2] - lemma = dirtylemma.gsub(/<\/?span(?: class="pipelemma")?>/,"") - debug lemma - lemma = lemma.gsub(/1<\/sup>/,'¹').gsub(/2<\/sup>/,'²').gsub(/3<\/sup>/,'³') - lemma = lemma.gsub(/4<\/sup>/,'⁴').gsub(/5<\/sup>/,'⁵').gsub(/6<\/sup>/,'⁶') - lemma = lemma.gsub(/7<\/sup>/,'⁷').gsub(/8<\/sup>/,'⁸').gsub(/9<\/sup>/,'⁹') - debug lemma - lemma += " #{qual} (#{uri})" - lemmas << lemma - else - debug resp.to_a.join("\r") - end - } - rescue => e - debug "Exception '#{e.inspect}' while trying to get and parse #{uri}" - debug e.backtrace.join("\n") - m.reply "C'è stato un errore nell'elaborazione del risultato" - return - end - } - pre = lemmas.length > 1 ? "Lemmi trovati" : "Lemma trovato" - m.reply "#{pre}: #{lemmas.join(' ; ')}" + text += ": " + text += entries[0..5].map { |ar| + "#{ar[0]} - #{ar[1].gsub(/<\/?em>/,'')}: #{@dmurl}#{ar[2]}" + }.join(" | ") + m.reply text end end -- cgit v1.2.3