From fce8443bb6ab99c3b36fc95583bb59275f019251 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Wed, 19 Mar 2008 13:14:25 +0100 Subject: slashdot plugin: provide and htmlinfo filter for /. pages --- data/rbot/plugins/slashdot.rb | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'data/rbot/plugins') diff --git a/data/rbot/plugins/slashdot.rb b/data/rbot/plugins/slashdot.rb index c9e35b9e..b02a5a25 100644 --- a/data/rbot/plugins/slashdot.rb +++ b/data/rbot/plugins/slashdot.rb @@ -1,3 +1,8 @@ +#-- vim:sw=2:et +#++ +# +# :title: Slashdot plugin for rbot + require 'rexml/document' class SlashdotPlugin < Plugin @@ -5,6 +10,40 @@ class SlashdotPlugin < Plugin def help(plugin, topic="") "slashdot search [=4] => search slashdot for , slashdot [=4] => return up to slashdot headlines (use negative max to return that many headlines, but all on one line.)" end + + # This method defines a filter for /. pages. It's needed because the generic + # summarization grabs a comment, not the actual article. + # + # This filter relies on Hpricot being available, since REXML isn't too + # happy with the /. pages + def slashdot_filter(s) + return nil unless defined? Hpricot + loc = Utils.check_location(s, /slashdot\.org/) + return nil unless loc + h = Hpricot(s[:text]) + title = (h/"head/title").first.to_html.ircify_html + arts = (h/"div.article") + if arts.length > 1 + tits = [] + arts.each { |el| + artitle = (el/"div.generaltitle").first.to_html.ircify_html + tits << artitle + } + content = tits.join(" | ") + else + det = (arts.first/"div.details").first.to_html.ircify_html + body = (arts.first/"div.body").first.to_html.ircify_html + content = [det, body].join(' ') + end + return {:title => title, :content => content} + end + + def initialize + super + if defined? Hpricot + @bot.register_filter(:slashdot, :htmlinfo) { |s| slashdot_filter(s) } + end + end def search_slashdot(m, params) max = params[:limit].to_i -- cgit v1.2.3