markov: refactor triplet learning

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / freshmeat.rb
diff --git a/data/rbot/plugins/freshmeat.rb b/data/rbot/plugins/freshmeat.rb

index 20fa724800fb228a693e463608f1eef3dc230e71..3ae887d1714d6fe0a43efdb8db7e74c8c0027986 100644 (file)
--- a/data/rbot/plugins/freshmeat.rb
+++ b/data/rbot/plugins/freshmeat.rb
@@ -1,27 +1,61 @@
+#-- vim:sw=2:et
+#++
+#
+# :title: Freshmeat plugin for rbot
+
  require 'rexml/document'
-require 'uri/common'
  
  class FreshmeatPlugin < Plugin
    include REXML
    def help(plugin, topic="")
      "freshmeat search [<max>=4] <string> => search freshmeat for <string>, freshmeat [<max>=4] => return up to <max> freshmeat headlines"
    end
-  
+
+  REL_ENTRY = %r{<a href="/(release)s/(\d+)/"><font color="#000000">(.*?)</font></a>}
+  PRJ_ENTRY = %r{<a href="/(project)s/(\S+?)/"><b>(.*?)</b></a>}
+
+  # This method defines a filter for fm pages. It's needed because the generic
+  # summarization grabs a comment, not the actual article.
+  #
+  def freshmeat_filter(s)
+    loc = Utils.check_location(s, /freshmeat\.net/)
+    return nil unless loc
+    entries = []
+    s[:text].scan(/#{REL_ENTRY}|#{PRJ_ENTRY}/) { |m|
+      entry = {
+        :type => ($1 || $4).dup,
+        :code => ($2 || $5).dup,
+        :name => ($3 || $6).dup
+      }
+      entries << entry
+    }
+    return nil if entries.empty?
+    title = s[:text].ircify_html_title
+    content = entries.inject([]) { |l, e| l << e[:name] }.join(" | ")
+    return {:title => title, :content => content}
+  end
+
+  def initialize
+    super
+    @bot.register_filter(:freshmeat, :htmlinfo) { |s| freshmeat_filter(s) }
+  end
+
    def search_freshmeat(m, params)
      max = params[:limit].to_i
      search = params[:search].to_s
      max = 8 if max > 8
-    begin
-      xml = @bot.httputil.get(URI.parse("http://freshmeat.net/search-xml/?orderby=locate_projectname_full_DESC&q=#{URI.escape(search)}"))
-    rescue URI::InvalidURIError, URI::BadURIError => e
-      m.reply "illegal search string #{search}"
-      return
-    end
+    xml = @bot.httputil.get("http://freshmeat.net/search-xml/?orderby=locate_projectname_full_DESC&q=#{CGI.escape(search)}")
      unless xml
        m.reply "search for #{search} failed"
        return
      end
-    doc = Document.new xml
+    doc = nil
+    begin
+      doc = Document.new xml
+    rescue
+      debug xml
+      error $!
+    end
      unless doc
        m.reply "search for #{search} failed"
        return
@@ -54,27 +88,33 @@ class FreshmeatPlugin < Plugin
        m.reply reply
      }
    end
-  
+
    def freshmeat(m, params)
      max = params[:limit].to_i
      max = 8 if max > 8
-    xml = @bot.httputil.get(URI.parse("http://images.feedstermedia.com/feedcache/ostg/freshmeat/fm-releases-global.xml"))
-    unless xml
-      m.reply "freshmeat news parse failed"
-      return
-    end
-    doc = Document.new xml
-    unless doc
+    begin
+      xml = @bot.httputil.get('http://freshmeat.net/backend/fm-releases-global.xml')
+      unless xml
+        m.reply "freshmeat news parse failed"
+        return
+      end
+      doc = Document.new xml
+      unless doc
+        m.reply "freshmeat news parse failed"
+        return
+      end
+    rescue
        m.reply "freshmeat news parse failed"
        return
      end
+
      matches = Array.new
      max_width = 60
      title_width = 0
      done = 0
      doc.elements.each("*/channel/item") {|e|
-      desc = e.elements["description"].text
-      title = e.elements["title"].text
+      desc = e.elements["description"].text.ircify_html
+      title = e.elements["title"].text.ircify_html
        #title.gsub!(/\s+\(.*\)\s*$/, "")
        title.strip!
        title_width = title.length if title.length > title_width
@@ -94,5 +134,5 @@ end
  plugin = FreshmeatPlugin.new
  plugin.map 'freshmeat search :limit *search', :action => 'search_freshmeat',
              :defaults => {:limit => 4}, :requirements => {:limit => /^\d+$/}
-plugin.map 'freshmeat :limit', :defaults => {:limit => 4}, 
+plugin.map 'freshmeat :limit', :defaults => {:limit => 4},
                                 :requirements => {:limit => /^\d+$/}