X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Fslashdot.rb;h=2c4a23618b8c91d8f551f53e2dd13b855c9befb5;hb=bc7efe2d4b360da0276287e6cc7f6a401609c162;hp=c9e35b9e9592badde10fe4689aa9765e17c55add;hpb=109fa2a5b63af113df2c6b21d44135efa0d94d70;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/slashdot.rb b/data/rbot/plugins/slashdot.rb index c9e35b9e..2c4a2361 100644 --- a/data/rbot/plugins/slashdot.rb +++ b/data/rbot/plugins/slashdot.rb @@ -1,3 +1,8 @@ +#-- vim:sw=2:et +#++ +# +# :title: Slashdot plugin for rbot + require 'rexml/document' class SlashdotPlugin < Plugin @@ -5,7 +10,59 @@ class SlashdotPlugin < Plugin def help(plugin, topic="") "slashdot search [=4] => search slashdot for , slashdot [=4] => return up to slashdot headlines (use negative max to return that many headlines, but all on one line.)" end - + + # This method defines a filter for /. pages. It's needed because the generic + # summarization grabs a comment, not the actual article. + # + # This filter relies on Hpricot being available, since REXML isn't too + # happy with the /. pages + def slashdot_filter(s) + return nil unless defined? Hpricot + loc = Utils.check_location(s, /slashdot\.org/) + return nil unless loc + h = Hpricot(s[:text]) + # If we have no title tag in a head tag, return as this is not + # a /. page (it's probably a Slashdot RSS + ht = h/"head/title" + return nil if ht.empty? + title = ht.first.to_html.ircify_html + arts = (h/"div.article") + return nil if arts.empty? + if arts.length > 1 + tits = [] + arts.each { |el| + # see if the div tag with generaltitle class is present + artitle = (el/"div.generaltitle").first + if artitle + tits << artitle.to_html.ircify_html + next + end + # otherwise, check for skin+datitle a tags + datitle = (el/"a.datitle").first + next unless datitle + skin = (el/"a.skin").first + artitle = [ + skin ? skin.innerHTML.ircify_html : nil, + datitle.innerHTML.ircify_html + ].compact.join(" ") + tits << artitle + } + content = tits.join(" | ") + else + det = (arts.first/"div.details").first.to_html.ircify_html + body = (arts.first/"div.body").first.to_html.ircify_html + content = [det, body].join(' ') + end + return {:title => title, :content => content} + end + + def initialize + super + if defined? Hpricot + @bot.register_filter(:slashdot, :htmlinfo) { |s| slashdot_filter(s) } + end + end + def search_slashdot(m, params) max = params[:limit].to_i search = params[:search].to_s @@ -18,9 +75,9 @@ class SlashdotPlugin < Plugin debug xml.inspect begin doc = Document.new xml - rescue REXML::ParseException => e - warning e.inspect - m.reply "couldn't parse output XML: #{e.class}" + rescue REXML::ParseException => err + warning err.inspect + m.reply "couldn't parse output XML: #{err.class}" return end unless doc @@ -42,7 +99,7 @@ class SlashdotPlugin < Plugin m.reply "search for #{search} failed" end end - + def slashdot(m, params) debug params.inspect max = params[:limit].to_i @@ -66,12 +123,12 @@ class SlashdotPlugin < Plugin max = 8 if max > 8 matches = Array.new doc.elements.each("*/story") {|e| - matches << [ e.elements["title"].text, - e.elements["author"].text, + matches << [ e.elements["title"].text, + e.elements["author"].text, e.elements["time"].text.gsub(/\d{4}-(\d{2})-(\d{2})/, "\\2/\\1").gsub(/:\d\d$/, "") ] done += 1 break if done >= max - } + } if oneline m.reply matches.collect{|mat| mat[0]}.join(" | ") else