X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;ds=sidebyside;f=data%2Frbot%2Fplugins%2Fimdb.rb;h=5615ac00c4dd7e4cb07fc1a8b807738a8501d77d;hb=bbf28120c7975c1b5d464d35649d5a62c50bcd2f;hp=6cbdaf5386728dc09865491d57117872bc9bad39;hpb=edd1cf77be07ae507014574141e920ad23eb164d;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb index 6cbdaf53..5615ac00 100644 --- a/data/rbot/plugins/imdb.rb +++ b/data/rbot/plugins/imdb.rb @@ -6,9 +6,10 @@ # Author:: Arnaud Cornet # Copyright:: (C) 2005 Arnaud Cornet # License:: MIT license +# +# Notes by Giuseppe Bilotta: +# TODO return more than one match (configurable) -require 'net/http' -require 'cgi' require 'uri/common' class Imdb @@ -17,26 +18,34 @@ class Imdb end def search(rawstr) - str = URI.escape(rawstr) - @http = @bot.httputil.get_proxy(URI.parse("http://us.imdb.com/find?q=#{str}")) - @http.start + str = URI.escape(rawstr) << ";site=aka" + return do_search(str) + end + + def do_search(str) + resp = nil begin - resp, data = @http.get("/find?q=#{str}", "User-Agent" => "Mozilla/5.0") - rescue Net::ProtoRetriableError => detail - head = detail.data - if head.code == "301" or head.code == "302" - return head['location'].gsub(/http:\/\/us.imdb.com/, "").gsub(/\?.*/, "") - end + resp = @bot.httputil.get_response("http://us.imdb.com/find?q=#{str}", + :max_redir => -1) + rescue Exception => e + error e.message + warning e.backtrace.join("\n") + return nil end + if resp.code == "200" - m = /]*)>([^<]*)<\/a>/.match(resp.body) + m = /]*)>(?:[^<]*)<\/a>/.match(resp.body) if m url = m[1] - title = m[2] return url end elsif resp.code == "302" - return resp['location'].gsub(/http:\/\/us.imdb.com/, "").gsub(/\?.*/, "") + new_loc = resp['location'].gsub(/http:\/\/us.imdb.com/, "") + if new_loc.match(/\/find\?q=(.*)/) + return do_search($1) + else + return new_loc.gsub(/\?.*/, "") + end end return nil end @@ -47,24 +56,121 @@ class Imdb debug "IMDB: search returned NIL" return nil end - resp, data = @http.get(sr, "User-Agent" => - "Mozilla/5.0 (compatible; Konqueror/3.1; Linux)") + type = sr.match(/^\/([^\/]+)\//)[1].downcase.intern rescue nil + case type + when :title + return info_title(sr) + when :name + return info_name(sr) + else + return "#{sr}" + end + end + + def grab_info(info, body) + /
\s+
#{info}:<\/h5>\s+(.*?)<\/div>/mi.match(body)[1] rescue nil + end + + def info_title(sr) + resp = nil + begin + resp = @bot.httputil.get_response('http://us.imdb.com' + sr, + :max_redir => -1) + rescue Exception => e + error e.message + warning e.backtrace.join("\n") + return nil + end + if resp.code == "200" m = /([^<]*)<\/title>/.match(resp.body) return nil if !m - title = CGI.unescapeHTML(m[1]) + title = Utils.decode_html_entities(m[1]) - m = /<b>([0-9.]+)\/10<\/b> \(([0-9,]+) votes?\)/.match(resp.body) + m = /<b>([0-9.]+)\/10<\/b>\n?\r?\s+<small>\(<a href="ratings">([0-9,]+) votes?<\/a>\)<\/small>/.match(resp.body) return nil if !m score = m[1] votes = m[2] + plot = nil + data = grab_info(/Plot (?:Outline|Summary)/, resp.body) + if data + plot = "Plot: #{data.ircify_html.gsub(/\s+more$/,'')}" + end + genre = Array.new resp.body.scan(/<a href="\/Sections\/Genres\/[^\/]+\/">([^<]+)<\/a>/) do |gnr| genre << gnr end - return ["http://us.imdb.com" + sr, title, score, votes, - genre] + info = "#{title} : http://us.imdb.com#{sr}\n" + info << "Ratings: #{score}/10 (#{votes} voters). Genre: #{genre.join('/')}\n" + info << plot if plot + return info + end + return nil + end + + def info_name(sr) + resp = nil + begin + resp = @bot.httputil.get_response('http://us.imdb.com' + sr, + :max_redir => -1) + rescue Exception => e + error e.message + warning e.backtrace.join("\n") + return nil + end + + if resp.code == "200" + m = /<title>([^<]*)<\/title>/.match(resp.body) + return nil if !m + name = Utils.decode_html_entities(m[1]) + + birth = nil + data = grab_info("Date of Birth", resp.body) + if data + birth = "Birth: #{data.ircify_html.gsub(/\s+more$/,'')}" + end + + death = nil + data = grab_info("Date of Death", resp.body) + if data + death = "Death: #{data.ircify_html.gsub(/\s+more$/,'')}" + end + + movies = {} + + filmorate = nil + begin + filmorate = @bot.httputil.get("http://us.imdb.com" + sr + "filmorate") + rescue Exception + end + + if filmorate + filmorate.scan(/<div class="filmo">.*?<a href="\/title.*?<\/div>/m) { |str| + what = str.match(/<a name="[^"]+">([^<]+)<\/a>/)[1] rescue nil + # next unless what + next unless ['Actor', 'Director'].include?(what) + movies[what] = str.scan(/<a href="\/title\/[^"]+">([^<]+)<\/a>/)[0..2].map { |tit| + Utils.decode_html_entities(tit) + } + } + end + debug movies.inspect + + info = "#{name} : http://us.imdb.com#{sr}\n" + info << [birth, death].compact.join('. ') << "\n" + unless movies.empty? + info << "Top Movies:: " + ar = [] + movies.keys.sort.each { |key| + ar << key.dup + ar.last << ": " + movies[key].join(', ') + } + info << ar.join('. ') + end + return info + end return nil end @@ -83,8 +189,7 @@ class ImdbPlugin < Plugin m.reply "Nothing found for #{what}" return nil end - m.reply "#{info[1]} : #{info[0]}" - m.reply "Ratings: #{info[2]}/10 (#{info[3]} voters). Genre: #{info[4].join('/')}" + m.reply info end end