X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Frss.rb;h=e276d0a44bc3343d208e1a19f8e34e6b37e28323;hb=c076cffc3616290badcc5e14aeb06cb291021a53;hp=636385a5a6afb9591855a989a0d4b22d1e4c82c0;hpb=aded97a496200b6b801bf3e1a56e1a34592af079;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 636385a5..e276d0a4 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1,18 +1,23 @@ +#-- vim:sw=2:et +#++ +# # RSS feed plugin for RubyBot # (c) 2004 Stanislav Karchebny # (c) 2005 Ian Monroe # (c) 2005 Mark Kretschmann +# (c) 2006 Giuseppe Bilotta +# # Licensed under MIT License. require 'rss/parser' require 'rss/1.0' require 'rss/2.0' require 'rss/dublincore' -begin - # require 'rss/dublincore/2.0' -rescue - warning "Unable to load RSS libraries, RSS plugin functionality crippled" -end +# begin +# require 'rss/dublincore/2.0' +# rescue +# warning "Unable to load RSS libraries, RSS plugin functionality crippled" +# end class ::String def shorten(limit) @@ -54,42 +59,60 @@ class ::RssBlob end def watched_by?(who) - @watchers.include?(who) + # We need to check bot 'who' itself and the String form, because rss + # watches added before the new Irc framework represented watchers as + # Strings whereas they are now Channels. + # + @watchers.include?(who) || @watchers.include?(who.to_s) end def add_watch(who) if watched_by?(who) return nil end - @watchers << who unless watched_by?(who) + # TODO FIXME? should we just store watchers as Strings instead? + # This should then be @watchers << who.downcase + @watchers << who return who end def rm_watch(who) + # See comment to watched_by? + # @watchers.delete(who) + @watchers.delete(who.to_s) + end + + def to_a + [@handle,@url,@type,@watchers] end - # def to_ary - # [@handle,@url,@type,@watchers] - # end + def to_s(watchers=false) + if watchers + a = self.to_a.flatten + else + a = self.to_a[0,3] + end + a.join(" | ") + end end class RSSFeedsPlugin < Plugin + BotConfig.register BotConfigIntegerValue.new('rss.head_max', + :default => 30, :validate => Proc.new{|v| v > 0 && v < 200}, + :desc => "How many characters to use of a RSS item header") + + BotConfig.register BotConfigIntegerValue.new('rss.text_max', + :default => 90, :validate => Proc.new{|v| v > 0 && v < 400}, + :desc => "How many characters to use of a RSS item text") + + BotConfig.register BotConfigIntegerValue.new('rss.thread_sleep', + :default => 300, :validate => Proc.new{|v| v > 30}, + :desc => "How many seconds to sleep before checking RSS feeds again") + @@watchThreads = Hash.new @@mutex = Mutex.new - # Keep a 1:1 relation between commands and handlers - @@handlers = { - "rss" => "handle_rss", - "addrss" => "handle_addrss", - "rmrss" => "handle_rmrss", - "rmwatch" => "handle_rmwatch", - "listrss" => "handle_listrss", - "listwatches" => "handle_listrsswatch", - "rewatch" => "handle_rewatch", - "watchrss" => "handle_watchrss", - } - def initialize super kill_threads @@ -98,7 +121,7 @@ class RSSFeedsPlugin < Plugin else @feeds = Hash.new end - handle_rewatch + rewatch_rss end def watchlist @@ -120,95 +143,149 @@ class RSSFeedsPlugin < Plugin debug "Killing thread for #{url}" thread.kill } - # @@watchThreads.each { |url, thread| - # debug "Joining on killed thread for #{url}" - # thread.join - # } @@watchThreads = Hash.new } end def help(plugin,topic="") - "RSS Reader: rss name [limit] => read a named feed [limit maximum posts, default 5], addrss [force] name url => add a feed, listrss => list all available feeds, rmrss name => remove the named feed, watchrss url [type] => watch a rss feed for changes (type may be 'amarokblog', 'amarokforum', 'mediawiki', 'gmame' or empty - it defines special formatting of feed items), rewatch => restart all rss watches, rmwatch url => stop watching for changes in url, listwatches => see a list of watched feeds" + case topic + when "show" + "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}; #{Bold}limit#{Bold} can also be in the form a..b, to display a specific range of items" + when "list" + "rss list [#{Bold}handle#{Bold}] : list all rss feeds (matching #{Bold}handle#{Bold})" + when "watched" + "rss watched [#{Bold}handle#{Bold}] : list all watched rss feeds (matching #{Bold}handle#{Bold})" + when "add" + "rss add #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : add a new rss called #{Bold}handle#{Bold} from url #{Bold}url#{Bold} (of type #{Bold}type#{Bold})" + when /^(del(ete)?|rm)$/ + "rss del(ete)|rm #{Bold}handle#{Bold} : delete rss feed #{Bold}handle#{Bold}" + when "replace" + "rss replace #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : try to replace the url of rss called #{Bold}handle#{Bold} with #{Bold}url#{Bold} (of type #{Bold}type#{Bold}); only works if nobody else is watching it" + when "forcereplace" + "rss forcereplace #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : replace the url of rss called #{Bold}handle#{Bold} with #{Bold}url#{Bold} (of type #{Bold}type#{Bold})" + when "watch" + "rss watch #{Bold}handle#{Bold} [#{Bold}url#{Bold} [#{Bold}type#{Bold}]] : watch rss #{Bold}handle#{Bold} for changes; when the other parameters are present, it will be created if it doesn't exist yet" + when /(un|rm)watch/ + "rss unwatch|rmwatch #{Bold}handle#{Bold} : stop watching rss #{Bold}handle#{Bold} for changes" + when "rewatch" + "rss rewatch : restart threads that watch for changes in watched rss" + else + "manage RSS feeds: rss show|list|watched|add|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch" + end end - def report_problem(report, m=nil) - if m - m.reply report - else - warning report - end + def report_problem(report, e=nil, m=nil) + if m && m.respond_to?(:reply) + m.reply report + else + warning report + end + if e + debug e.inspect + debug e.backtrace.join("\n") if e.respond_to?(:backtrace) + end end - def privmsg(m) - meth = self.method(@@handlers[m.plugin]) - meth.call(m) - end + def show_rss(m, params) + handle = params[:handle] + lims = params[:limit].to_s.match(/(\d+)(?:..(\d+))?/) + debug lims.to_a.inspect + if lims[2] + ll = [[lims[1].to_i-1,lims[2].to_i-1].min, 0].max + ul = [[lims[1].to_i-1,lims[2].to_i-1].max, 14].min + rev = lims[1].to_i > lims[2].to_i + else + ll = 0 + ul = [[lims[1].to_i-1, 1].max, 14].min + rev = false + end - def handle_rss(m) - unless m.params - m.reply("incorrect usage: " + help(m.plugin)) + feed = @feeds.fetch(handle, nil) + unless feed + m.reply "I don't know any feeds named #{handle}" return end - limit = 5 - if m.params =~ /\s+(\d+)$/ - limit = $1.to_i - if limit < 1 || limit > 15 - m.reply("weird, limit not in [1..15], reverting to default") - limit = 5 - end - m.params.gsub!(/\s+\d+$/, '') + + m.reply "lemme fetch it..." + title = items = nil + @@mutex.synchronize { + title, items = fetchRss(feed, m) + } + return unless items + + # We sort the feeds in freshness order (newer ones first) + items = freshness_sort(items) + disp = items[ll..ul] + disp.reverse! if rev + + m.reply "Channel : #{title}" + disp.each do |item| + printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true}) end + end - url = '' - if m.params =~ /^https?:\/\// - url = m.params - @@mutex.synchronize { - @feeds[url] = RssBlob.new(url) - feed = @feeds[url] + def itemDate(item,ex=nil) + return item.pubDate if item.respond_to?(:pubDate) + return item.date if item.respond_to?(:date) + return ex + end + + def freshness_sort(items) + notime = Time.at(0) + items.sort { |a, b| + itemDate(b, notime) <=> itemDate(a, notime) + } + end + + def list_rss(m, params) + wanted = params[:handle] + reply = String.new + @@mutex.synchronize { + @feeds.each { |handle, feed| + next if wanted and !handle.match(wanted) + reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" + (reply << " (watched)") if feed.watched_by?(m.replyto) + reply << "\n" } - else - feed = @feeds.fetch(m.params, nil) - unless feed - m.reply(m.params + "? what is that feed about?") - return - end + } + if reply.empty? + reply = "no feeds found" + reply << " matching #{wanted}" if wanted end + m.reply reply + end - m.reply("Please wait, querying...") - title = items = nil + def watched_rss(m, params) + wanted = params[:handle] + reply = String.new @@mutex.synchronize { - title, items = fetchRss(feed, m) + watchlist.each { |handle, feed| + next if wanted and !handle.match(wanted) + next unless feed.watched_by?(m.replyto) + reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})\n" + } } - return unless items - m.reply("Channel : #{title}") - # TODO: optional by-date sorting if dates present - items[0...limit].each do |item| - printRssItem(m.replyto,item) + if reply.empty? + reply = "no watched feeds" + reply << " matching #{wanted}" if wanted end + m.reply reply end - def handle_addrss(m) - unless m.params - m.reply "incorrect usage: " + help(m.plugin) + def add_rss(m, params, force=false) + handle = params[:handle] + url = params[:url] + unless url.match(/https?/) + m.reply "I only deal with feeds from HTTP sources, so I can't use #{url} (maybe you forgot the handle?)" return end - if m.params =~ /^force / - forced = true - m.params.gsub!(/^force /, '') - end - feed = m.params.scan(/\S+/) - if feed.nil? or feed.length < 2 - m.reply("incorrect usage: " + help(m.plugin)) + type = params[:type] + if @feeds.fetch(handle, nil) && !force + m.reply "There is already a feed named #{handle} (URL: #{@feeds[handle].url})" return end - handle = feed[0] - handle.gsub!("|", '_') - url = feed[1] - type = feed[2] || nil - debug "Handle: #{handle.inspect}, Url: #{url.inspect}, Type: #{type.inspect}" - if @feeds.fetch(handle, nil) && !forced - m.reply("But there is already a feed named #{handle} with url #{@feeds[handle].url}") + unless url + m.reply "You must specify both a handle and an url to add an RSS feed" return end @@mutex.synchronize { @@ -222,8 +299,8 @@ class RSSFeedsPlugin < Plugin return handle end - def handle_rmrss(m) - feed = handle_rmwatch(m, true) + def del_rss(m, params, pass=false) + feed = unwatch_rss(m, params, true) if feed.watched? m.reply "someone else is watching #{feed.handle}, I won't remove it from my list" return @@ -231,16 +308,53 @@ class RSSFeedsPlugin < Plugin @@mutex.synchronize { @feeds.delete(feed.handle) } - m.okay + m.okay unless pass return end - def handle_rmwatch(m,pass=false) - unless m.params - m.reply "incorrect usage: " + help(m.plugin) - return + def replace_rss(m, params) + handle = params[:handle] + if @feeds.key?(handle) + del_rss(m, {:handle => handle}, true) + end + if @feeds.key?(handle) + m.reply "can't replace #{feed.handle}" + else + add_rss(m, params, true) + end + end + + def forcereplace_rss(m, params) + add_rss(m, params, true) + end + + def watch_rss(m, params) + handle = params[:handle] + url = params[:url] + type = params[:type] + if url + add_rss(m, params) + end + feed = nil + @@mutex.synchronize { + feed = @feeds.fetch(handle, nil) + } + if feed + @@mutex.synchronize { + if feed.add_watch(m.replyto) + watchRss(feed, m) + m.okay + else + m.reply "Already watching #{feed.handle}" + end + } + else + m.reply "Couldn't watch feed #{handle} (no such feed found)" end - handle = m.params + end + + def unwatch_rss(m, params, pass=false) + handle = params[:handle] unless @feeds.has_key?(handle) m.reply("dunno that feed") return @@ -263,37 +377,7 @@ class RSSFeedsPlugin < Plugin return feed end - def handle_listrss(m) - reply = '' - if @feeds.length == 0 - reply = "No feeds yet." - else - @@mutex.synchronize { - @feeds.each { |handle, feed| - reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" - (reply << " (watched)") if feed.watched_by?(m.replyto) - reply << "\n" - debug reply - } - } - end - m.reply reply - end - - def handle_listrsswatch(m) - reply = '' - if watchlist.length == 0 - reply = "No watched feeds yet." - else - watchlist.each { |handle, feed| - (reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})\n") if feed.watched_by?(m.replyto) - debug reply - } - end - m.reply reply - end - - def handle_rewatch(m=nil) + def rewatch_rss(m=nil, params=nil) kill_threads # Read watches from list. @@ -303,112 +387,112 @@ class RSSFeedsPlugin < Plugin m.okay if m end - def handle_watchrss(m) - unless m.params - m.reply "incorrect usage: " + help(m.plugin) - return - end - if m.params =~ /\s+/ - handle = handle_addrss(m) - else - handle = m.params - end - feed = nil - @@mutex.synchronize { - feed = @feeds.fetch(handle, nil) - } - if feed - @@mutex.synchronize { - if feed.add_watch(m.replyto) - watchRss(feed, m) - m.okay - else - m.reply "Already watching #{feed.handle}" - end - } - else - m.reply "Couldn't watch feed #{handle} (no such feed found)" - end - end - private def watchRss(feed, m=nil) if @@watchThreads.has_key?(feed.handle) - report_problem("watcher thread for #{feed.handle} is already running", m) + report_problem("watcher thread for #{feed.handle} is already running", nil, m) return end @@watchThreads[feed.handle] = Thread.new do - debug 'watchRss thread started.' + debug "watcher for #{feed} started" oldItems = [] firstRun = true + failures = 0 loop do begin - debug 'Fetching rss feed...' + debug "fetching #{feed}" title = newItems = nil @@mutex.synchronize { title, newItems = fetchRss(feed) } unless newItems - m.reply "no items in feed" - break - end - debug "Checking if new items are available" - if firstRun - debug "First run, we'll see next time" - firstRun = false + debug "no items in feed #{feed}" + failures +=1 else - otxt = oldItems.map { |item| item.to_s } - dispItems = newItems.reject { |item| - otxt.include?(item.to_s) - } - if dispItems.length > 0 - debug "Found #{dispItems.length} new items" - dispItems.each { |item| - debug "showing #{item.title}" - @@mutex.synchronize { - printFormattedRss(feed.watchers, item, feed.type) - } - } + debug "Checking if new items are available for #{feed}" + if firstRun + debug "First run, we'll see next time" + firstRun = false else - debug "No new items found" + otxt = oldItems.map { |item| item.to_s } + dispItems = newItems.reject { |item| + otxt.include?(item.to_s) + } + if dispItems.length > 0 + debug "Found #{dispItems.length} new items in #{feed}" + # When displaying watched feeds, publish them from older to newer + dispItems.reverse.each { |item| + @@mutex.synchronize { + printFormattedRss(feed, item) + } + } + else + debug "No new items found in #{feed}" + end end + oldItems = newItems.dup end - oldItems = newItems.dup rescue Exception => e - error "IO failed: #{e.inspect}" + error "Error watching #{feed}: #{e.inspect}" debug e.backtrace.join("\n") + failures += 1 end - seconds = 150 + rand(100) - debug "Thread going to sleep #{seconds} seconds.." + seconds = @bot.config['rss.thread_sleep'] * (failures + 1) + seconds += seconds * (rand(100)-50)/100 + debug "watcher for #{feed} going to sleep #{seconds} seconds.." sleep seconds end end end - def printRssItem(loc,item) - if item.kind_of?(RSS::RDF::Item) - @bot.say loc, item.title.chomp.riphtml.shorten(20) + " @ " + item.link - else - @bot.say loc, "#{item.pubDate.to_s.chomp+": " if item.pubDate}#{item.title.chomp.riphtml.shorten(20)+" :: " if item.title}#{" @ "+item.link.chomp if item.link}" - end - end - - def printFormattedRss(locs, item, type) - locs.each { |loc| - case type - when 'amarokblog' - @bot.say loc, "::#{item.category.content} just blogged at #{item.link}::" - @bot.say loc, "::#{item.title.chomp.riphtml} - #{item.description.chomp.riphtml.shorten(60)}::" - when 'amarokforum' - @bot.say loc, "::Forum:: #{item.pubDate.to_s.chomp+": " if item.pubDate}#{item.title.chomp.riphtml+" :: " if item.title}#{" @ "+item.link.chomp if item.link}" - when 'mediawiki' - @bot.say loc, "::Wiki:: #{item.title} has been edited by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(60)} #{item.link} ::" - debug "mediawiki #{item.title}" - when "gmame" - @bot.say loc, "::amarok-devel:: Message #{item.title} sent by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(60)}::" + def printFormattedRss(feed, item, opts=nil) + places = feed.watchers + handle = "::#{feed.handle}:: " + date = String.new + if opts + places = opts[:places] if opts.key?(:places) + handle = opts[:handle].to_s if opts.key?(:handle) + if opts.key?(:date) && opts[:date] + if item.respond_to?(:pubDate) + if item.pubDate.class <= Time + date = item.pubDate.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.pubDate.to_s + end + elsif item.respond_to?(:date) + if item.date.class <= Time + date = item.date.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.date.to_s + end + else + date = "(no date)" + end + date += " :: " + end + end + title = "#{Bold}#{item.title.chomp.riphtml}#{Bold}" if item.title + desc = item.description.gsub(/\s+/,' ').strip.riphtml.shorten(@bot.config['rss.text_max']) if item.description + link = item.link.chomp if item.link + places.each { |loc| + case feed.type + when 'blog' + @bot.say loc, "#{handle}#{date}#{item.category.content} blogged at #{link}" + @bot.say loc, "#{handle}#{title} - #{desc}" + when 'forum' + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" + when 'wiki' + @bot.say loc, "#{handle}#{date}#{item.title} has been edited by #{item.dc_creator}. #{desc} #{link}" + when 'gmame' + @bot.say loc, "#{handle}#{date}Message #{title} sent by #{item.dc_creator}. #{desc}" + when 'trac' + @bot.say loc, "#{handle}#{date}#{title} @ #{link}" + unless item.title =~ /^Changeset \[(\d+)\]/ + @bot.say loc, "#{handle}#{date}#{desc}" + end else - printRssItem(loc,item) + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" end } end @@ -416,46 +500,49 @@ class RSSFeedsPlugin < Plugin def fetchRss(feed, m=nil) begin # Use 60 sec timeout, cause the default is too low - xml = @bot.httputil.get_cached(feed.url,60,60) + # Do not use get_cached for RSS until we have proper cache handling + # xml = @bot.httputil.get_cached(feed.url,60,60) + xml = @bot.httputil.get(feed.url,60,60) rescue URI::InvalidURIError, URI::BadURIError => e - report_problem("invalid rss feed #{feed.url}", m) + report_problem("invalid rss feed #{feed.url}", e, m) + return + rescue => e + report_problem("error getting #{feed.url}", e, m) return end - debug 'fetched' + debug "fetched #{feed}" unless xml - report_problem("reading feed #{url} failed", m) + report_problem("reading feed #{feed} failed", nil, m) return end begin ## do validate parse rss = RSS::Parser.parse(xml) - debug 'parsed' + debug "parsed #{feed}" rescue RSS::InvalidRSSError ## do non validate parse for invalid RSS 1.0 begin rss = RSS::Parser.parse(xml, false) - rescue RSS::Error - report_problem("parsing rss stream failed, whoops =(", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, whoops =(", e, m) return end - rescue RSS::Error - report_problem("parsing rss stream failed, oioi", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, oioi", e, m) return rescue => e - report_problem("processing error occured, sorry =(", m) - debug e.inspect - debug e.backtrace.join("\n") + report_problem("processing error occured, sorry =(", e, m) return end items = [] if rss.nil? - report_problem("#{feed.url} does not include RSS 1.0 or 0.9x/2.0",m) + report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m) else begin - rss.output_encoding = "euc-jp" - rescue RSS::UnknownConvertMethod - report_problem("bah! something went wrong =(",m) + rss.output_encoding = 'UTF-8' + rescue RSS::UnknownConvertMethod => e + report_problem("bah! something went wrong =(", e, m) return end rss.channel.title ||= "Unknown" @@ -467,7 +554,7 @@ class RSSFeedsPlugin < Plugin end if items.empty? - report_problem("no items found in the feed, maybe try weed?",m) + report_problem("no items found in the feed, maybe try weed?", e, m) return end return [title, items] @@ -475,12 +562,39 @@ class RSSFeedsPlugin < Plugin end plugin = RSSFeedsPlugin.new -plugin.register("rss") -plugin.register("addrss") -plugin.register("rmrss") -plugin.register("rmwatch") -plugin.register("listrss") -plugin.register("rewatch") -plugin.register("watchrss") -plugin.register("listwatches") + +plugin.map 'rss show :handle :limit', + :action => 'show_rss', + :requirements => {:limit => /^\d+(?:\.\.\d+)?$/}, + :defaults => {:limit => 5} +plugin.map 'rss list :handle', + :action => 'list_rss', + :defaults => {:handle => nil} +plugin.map 'rss watched :handle', + :action => 'watched_rss', + :defaults => {:handle => nil} +plugin.map 'rss add :handle :url :type', + :action => 'add_rss', + :defaults => {:type => nil} +plugin.map 'rss del :handle', + :action => 'del_rss' +plugin.map 'rss delete :handle', + :action => 'del_rss' +plugin.map 'rss rm :handle', + :action => 'del_rss' +plugin.map 'rss replace :handle :url :type', + :action => 'replace_rss', + :defaults => {:type => nil} +plugin.map 'rss forcereplace :handle :url :type', + :action => 'forcereplace_rss', + :defaults => {:type => nil} +plugin.map 'rss watch :handle :url :type', + :action => 'watch_rss', + :defaults => {:url => nil, :type => nil} +plugin.map 'rss unwatch :handle', + :action => 'unwatch_rss' +plugin.map 'rss rmwatch :handle', + :action => 'unwatch_rss' +plugin.map 'rss rewatch', + :action => 'rewatch_rss'