data/rbot/plugins/slashdot.rb

   1 #-- vim:sw=2:et
   2 #++
   3 #
   4 # :title: Slashdot plugin for rbot
   5
   6 require 'rexml/document'
   7
   8 class SlashdotPlugin < Plugin
   9   include REXML
  10   def help(plugin, topic="")
  11     "slashdot search <string> [<max>=4] => search slashdot for <string>, slashdot [<max>=4] => return up to <max> slashdot headlines (use negative max to return that many headlines, but all on one line.)"
  12   end
  13
  14   # This method defines a filter for /. pages. It's needed because the generic
  15   # summarization grabs a comment, not the actual article.
  16   #
  17   # This filter relies on Hpricot being available, since REXML isn't too
  18   # happy with the /. pages
  19   def slashdot_filter(s)
  20     return nil unless defined? Hpricot
  21     loc = Utils.check_location(s, /slashdot\.org/)
  22     return nil unless loc
  23     h = Hpricot(s[:text])
  24     # If we have no title tag in a head tag, return as this is not
  25     # a /. page (it's probably a Slashdot RSS
  26     ht = h/"head/title"
  27     return nil if ht.empty?
  28     title = ht.first.to_html.ircify_html
  29     arts = (h/"div.article")
  30     return nil if arts.empty?
  31     if arts.length > 1
  32       tits = []
  33       arts.each { |el|
  34         # see if the div tag with generaltitle class is present
  35         artitle = (el/"div.generaltitle").first
  36         if artitle
  37           tits << artitle.to_html.ircify_html
  38           next
  39         end
  40         # otherwise, check for skin+datitle a tags
  41         datitle = (el/"a.datitle").first
  42         next unless datitle
  43         skin = (el/"a.skin").first
  44         artitle = [
  45           skin ? skin.innerHTML.ircify_html : nil,
  46           datitle.innerHTML.ircify_html
  47         ].compact.join(" ")
  48         tits << artitle
  49       }
  50       content = tits.join(" | ")
  51     else
  52       det = (arts.first/"div.details").first.to_html.ircify_html
  53       body = (arts.first/"div.body").first.to_html.ircify_html
  54       content = [det, body].join(' ')
  55     end
  56     return {:title => title, :content => content}
  57   end
  58
  59   def initialize
  60     super
  61     if defined? Hpricot
  62       @bot.register_filter(:slashdot, :htmlinfo) { |s| slashdot_filter(s) }
  63     end
  64   end
  65
  66   def search_slashdot(m, params)
  67     max = params[:limit].to_i
  68     search = params[:search].to_s
  69
  70     xml = @bot.httputil.get("http://slashdot.org/search.pl?content_type=rss&query=#{CGI.escape(search)}")
  71     unless xml
  72       m.reply "search for #{search} failed"
  73       return
  74     end
  75     debug xml.inspect
  76     begin
  77       doc = Document.new xml
  78     rescue REXML::ParseException => e
  79       warning e.inspect
  80       m.reply "couldn't parse output XML: #{e.class}"
  81       return
  82     end
  83     unless doc
  84       m.reply "search for #{search} failed"
  85       return
  86     end
  87     debug doc.inspect
  88     max = 8 if max > 8
  89     done = 0
  90     doc.elements.each("*/item") {|e|
  91       desc = e.elements["title"].text
  92       desc.gsub!(/(.{150}).*/, '\1..')
  93       reply = sprintf("%s | %s", e.elements["link"].text, desc.ircify_html)
  94       m.reply reply
  95       done += 1
  96       break if done >= max
  97     }
  98     unless done > 0
  99       m.reply "search for #{search} failed"
 100     end
 101   end
 102
 103   def slashdot(m, params)
 104     debug params.inspect
 105     max = params[:limit].to_i
 106     debug "max is #{max}"
 107     xml = @bot.httputil.get('http://slashdot.org/slashdot.xml')
 108     unless xml
 109       m.reply "slashdot news parse failed"
 110       return
 111     end
 112     doc = Document.new xml
 113     unless doc
 114       m.reply "slashdot news parse failed (invalid xml)"
 115       return
 116     end
 117     done = 0
 118     oneline = false
 119     if max < 0
 120       max = (0 - max)
 121       oneline = true
 122     end
 123     max = 8 if max > 8
 124     matches = Array.new
 125     doc.elements.each("*/story") {|e|
 126       matches << [ e.elements["title"].text,
 127                    e.elements["author"].text,
 128                    e.elements["time"].text.gsub(/\d{4}-(\d{2})-(\d{2})/, "\\2/\\1").gsub(/:\d\d$/, "") ]
 129       done += 1
 130       break if done >= max
 131     }
 132     if oneline
 133       m.reply matches.collect{|mat| mat[0]}.join(" | ")
 134     else
 135       matches.each {|mat|
 136         m.reply sprintf("%36s | %8s | %8s", mat[0][0,36], mat[1][0,8], mat[2])
 137       }
 138     end
 139   end
 140 end
 141 plugin = SlashdotPlugin.new
 142 plugin.map 'slashdot search :limit *search', :action => 'search_slashdot',
 143            :defaults => {:limit => 4}, :requirements => {:limit => /^-?\d+$/}
 144 plugin.map 'slashdot :limit', :defaults => {:limit => 4},
 145                               :requirements => {:limit => /^-?\d+$/}