plugin(script): remove deprecated $SAFE

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / slashdot.rb
diff --git a/data/rbot/plugins/slashdot.rb b/data/rbot/plugins/slashdot.rb

index b09ac7a789706a477a88bfa19b3fce935295aed9..2c4a23618b8c91d8f551f53e2dd13b855c9befb5 100644 (file)
--- a/data/rbot/plugins/slashdot.rb
+++ b/data/rbot/plugins/slashdot.rb
@@ -1,63 +1,110 @@
+#-- vim:sw=2:et
+#++
+#
+# :title: Slashdot plugin for rbot
+
  require 'rexml/document'
-require 'uri/common'
  
  class SlashdotPlugin < Plugin
    include REXML
    def help(plugin, topic="")
      "slashdot search <string> [<max>=4] => search slashdot for <string>, slashdot [<max>=4] => return up to <max> slashdot headlines (use negative max to return that many headlines, but all on one line.)"
    end
-  def privmsg(m)
-    if m.params && m.params =~ /^search\s+(.*)\s+(\d+)$/
-      search = $1
-      limit = $2.to_i
-      search_slashdot m, search, limit
-    elsif m.params && m.params =~ /^search\s+(.*)$/
-      search = $1
-      search_slashdot m, search
-    elsif m.params && m.params =~ /^([-\d]+)$/
-      limit = $1.to_i
-      slashdot m, limit
+
+  # This method defines a filter for /. pages. It's needed because the generic
+  # summarization grabs a comment, not the actual article.
+  #
+  # This filter relies on Hpricot being available, since REXML isn't too
+  # happy with the /. pages
+  def slashdot_filter(s)
+    return nil unless defined? Hpricot
+    loc = Utils.check_location(s, /slashdot\.org/)
+    return nil unless loc
+    h = Hpricot(s[:text])
+    # If we have no title tag in a head tag, return as this is not
+    # a /. page (it's probably a Slashdot RSS
+    ht = h/"head/title"
+    return nil if ht.empty?
+    title = ht.first.to_html.ircify_html
+    arts = (h/"div.article")
+    return nil if arts.empty?
+    if arts.length > 1
+      tits = []
+      arts.each { |el|
+        # see if the div tag with generaltitle class is present
+        artitle = (el/"div.generaltitle").first
+        if artitle
+          tits << artitle.to_html.ircify_html
+          next
+        end
+        # otherwise, check for skin+datitle a tags
+        datitle = (el/"a.datitle").first
+        next unless datitle
+        skin = (el/"a.skin").first
+        artitle = [
+          skin ? skin.innerHTML.ircify_html : nil,
+          datitle.innerHTML.ircify_html
+        ].compact.join(" ")
+        tits << artitle
+      }
+      content = tits.join(" | ")
      else
-      slashdot m
+      det = (arts.first/"div.details").first.to_html.ircify_html
+      body = (arts.first/"div.body").first.to_html.ircify_html
+      content = [det, body].join(' ')
      end
+    return {:title => title, :content => content}
    end
-  
-  def search_slashdot(m, search, max=4)
-    begin
-      xml = @bot.httputil.get(URI.parse("http://slashdot.org/search.pl?content_type=rss&query=#{URI.escape(search)}"))
-    rescue URI::InvalidURIError, URI::BadURIError => e
-      m.reply "illegal search string #{search}"
-      return
+
+  def initialize
+    super
+    if defined? Hpricot
+      @bot.register_filter(:slashdot, :htmlinfo) { |s| slashdot_filter(s) }
      end
+  end
+
+  def search_slashdot(m, params)
+    max = params[:limit].to_i
+    search = params[:search].to_s
+
+    xml = @bot.httputil.get("http://slashdot.org/search.pl?content_type=rss&query=#{CGI.escape(search)}")
      unless xml
        m.reply "search for #{search} failed"
        return
      end
+    debug xml.inspect
      begin
        doc = Document.new xml
-    rescue REXML::ParseException => e
-      puts e
-      m.reply "couldn't parse output XML: #{e.class}"
+    rescue REXML::ParseException => err
+      warning err.inspect
+      m.reply "couldn't parse output XML: #{err.class}"
        return
      end
      unless doc
        m.reply "search for #{search} failed"
        return
      end
+    debug doc.inspect
      max = 8 if max > 8
      done = 0
      doc.elements.each("*/item") {|e|
        desc = e.elements["title"].text
        desc.gsub!(/(.{150}).*/, '\1..')
-      reply = sprintf("%s | %s", e.elements["link"].text, desc)
+      reply = sprintf("%s | %s", e.elements["link"].text, desc.ircify_html)
        m.reply reply
        done += 1
        break if done >= max
      }
+    unless done > 0
+      m.reply "search for #{search} failed"
+    end
    end
-  
-  def slashdot(m, max=4)
-    xml = @bot.httputil.get(URI.parse("http://slashdot.org/slashdot.xml"))
+
+  def slashdot(m, params)
+    debug params.inspect
+    max = params[:limit].to_i
+    debug "max is #{max}"
+    xml = @bot.httputil.get('http://slashdot.org/slashdot.xml')
      unless xml
        m.reply "slashdot news parse failed"
        return
@@ -76,12 +123,12 @@ class SlashdotPlugin < Plugin
      max = 8 if max > 8
      matches = Array.new
      doc.elements.each("*/story") {|e|
-      matches << [ e.elements["title"].text, 
-                   e.elements["author"].text, 
+      matches << [ e.elements["title"].text,
+                   e.elements["author"].text,
                     e.elements["time"].text.gsub(/\d{4}-(\d{2})-(\d{2})/, "\\2/\\1").gsub(/:\d\d$/, "") ]
        done += 1
        break if done >= max
-    } 
+    }
      if oneline
        m.reply matches.collect{|mat| mat[0]}.join(" | ")
      else
@@ -92,4 +139,7 @@ class SlashdotPlugin < Plugin
    end
  end
  plugin = SlashdotPlugin.new
-plugin.register("slashdot")
+plugin.map 'slashdot search :limit *search', :action => 'search_slashdot',
+           :defaults => {:limit => 4}, :requirements => {:limit => /^-?\d+$/}
+plugin.map 'slashdot :limit', :defaults => {:limit => 4},
+                              :requirements => {:limit => /^-?\d+$/}