X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Frss.rb;h=13689bf14efdfeb63136de3a437ecbd64a71a6a3;hb=9d29f400bb3a354779185d61049ce7cdfa7744ee;hp=0e6e5530730a06d104eaf8ed6f06943797d251f5;hpb=a325446ce0a058d4e561aabcff11ceb0ad3e3cf1;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 0e6e5530..13689bf1 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1,18 +1,23 @@ +#-- vim:sw=2:et +#++ +# # RSS feed plugin for RubyBot # (c) 2004 Stanislav Karchebny # (c) 2005 Ian Monroe # (c) 2005 Mark Kretschmann +# (c) 2006 Giuseppe Bilotta +# # Licensed under MIT License. require 'rss/parser' require 'rss/1.0' require 'rss/2.0' require 'rss/dublincore' -begin - # require 'rss/dublincore/2.0' -rescue - warning "Unable to load RSS libraries, RSS plugin functionality crippled" -end +# begin +# require 'rss/dublincore/2.0' +# rescue +# warning "Unable to load RSS libraries, RSS plugin functionality crippled" +# end class ::String def shorten(limit) @@ -26,10 +31,6 @@ class ::String def riphtml self.gsub(/<[^>]+>/, '').gsub(/&/,'&').gsub(/"/,'"').gsub(/</,'<').gsub(/>/,'>').gsub(/&ellip;/,'...').gsub(/'/, "'").gsub("\n",'') end - - def mysqlize - self.gsub(/'/, "''") - end end class ::RssBlob @@ -46,7 +47,17 @@ class ::RssBlob @handle = url end @type = type - @watchers = watchers + @watchers=[] + sanitize_watchers(watchers) + end + + # Downcase all watchers, possibly turning them into Strings if they weren't + def sanitize_watchers(list=@watchers) + ls = list.dup + @watchers.clear + ls.each { |w| + add_watch(w) + } end def watched? @@ -54,68 +65,104 @@ class ::RssBlob end def watched_by?(who) - @watchers.include?(who) + @watchers.include?(who.downcase) end def add_watch(who) if watched_by?(who) return nil end - @watchers << who unless watched_by?(who) + @watchers << who.downcase return who end def rm_watch(who) - @watchers.delete(who) + @watchers.delete(who.downcase) + end + + def to_a + [@handle,@url,@type,@watchers] end - # def to_ary - # [@handle,@url,@type,@watchers] - # end + def to_s(watchers=false) + if watchers + a = self.to_a.flatten + else + a = self.to_a[0,3] + end + a.compact.join(" | ") + end end class RSSFeedsPlugin < Plugin - @@watchThreads = Hash.new - @@mutex = Mutex.new + BotConfig.register BotConfigIntegerValue.new('rss.head_max', + :default => 30, :validate => Proc.new{|v| v > 0 && v < 200}, + :desc => "How many characters to use of a RSS item header") + + BotConfig.register BotConfigIntegerValue.new('rss.text_max', + :default => 90, :validate => Proc.new{|v| v > 0 && v < 400}, + :desc => "How many characters to use of a RSS item text") + + BotConfig.register BotConfigIntegerValue.new('rss.thread_sleep', + :default => 300, :validate => Proc.new{|v| v > 30}, + :desc => "How many seconds to sleep before checking RSS feeds again") def initialize super - kill_threads if @registry.has_key?(:feeds) @feeds = @registry[:feeds] + @feeds.keys.grep(/[A-Z]/) { |k| + @feeds[k.downcase] = @feeds[k] + @feeds.delete(k) + } + @feeds.each { |k, f| + f.sanitize_watchers + } else @feeds = Hash.new end + @watch = Hash.new rewatch_rss end + def name + "rss" + end + def watchlist @feeds.select { |h, f| f.watched? } end def cleanup - kill_threads + stop_watches end def save @registry[:feeds] = @feeds end - def kill_threads - @@mutex.synchronize { - # Abort all running threads. - @@watchThreads.each { |url, thread| - debug "Killing thread for #{url}" - thread.kill - } - @@watchThreads = Hash.new + def stop_watch(handle) + if @watch.has_key?(handle) + begin + debug "Stopping watch #{handle}" + @bot.timer.remove(@watch[handle]) + @watch.delete(handle) + rescue => e + report_problem("Failed to stop watch for #{handle}", e, nil) + end + end + end + + def stop_watches + @watch.each_key { |k| + stop_watch(k) } end def help(plugin,topic="") case topic when "show" - "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}" + "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}; #{Bold}limit#{Bold} can also be in the form a..b, to display a specific range of items" when "list" "rss list [#{Bold}handle#{Bold}] : list all rss feeds (matching #{Bold}handle#{Bold})" when "watched" @@ -139,47 +186,75 @@ class RSSFeedsPlugin < Plugin end end - def report_problem(report, m=nil) - if m + def report_problem(report, e=nil, m=nil) + if m && m.respond_to?(:reply) m.reply report else warning report end + if e + debug e.inspect + debug e.backtrace.join("\n") if e.respond_to?(:backtrace) + end end def show_rss(m, params) handle = params[:handle] - limit = params[:limit].to_i - limit = 15 if limit > 15 - limit = 1 if limit <= 0 - feed = @feeds.fetch(handle, nil) + lims = params[:limit].to_s.match(/(\d+)(?:..(\d+))?/) + debug lims.to_a.inspect + if lims[2] + ll = [[lims[1].to_i-1,lims[2].to_i-1].min, 0].max + ul = [[lims[1].to_i-1,lims[2].to_i-1].max, 14].min + rev = lims[1].to_i > lims[2].to_i + else + ll = 0 + ul = [[lims[1].to_i-1, 0].max, 14].min + rev = false + end + + feed = @feeds.fetch(handle.downcase, nil) unless feed m.reply "I don't know any feeds named #{handle}" return end - m.reply("Please wait, querying...") + + m.reply "lemme fetch it..." title = items = nil - @@mutex.synchronize { - title, items = fetchRss(feed, m) - } + title, items = fetchRss(feed, m) return unless items - m.reply("Channel : #{title}") - # TODO: optional by-date sorting if dates present - items[0...limit].reverse.each do |item| - printRssItem(m.replyto,item) + + # We sort the feeds in freshness order (newer ones first) + items = freshness_sort(items) + disp = items[ll..ul] + disp.reverse! if rev + + m.reply "Channel : #{title}" + disp.each do |item| + printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true}) end end + def itemDate(item,ex=nil) + return item.pubDate if item.respond_to?(:pubDate) and item.pubDate + return item.date if item.respond_to?(:date) and item.date + return ex + end + + def freshness_sort(items) + notime = Time.at(0) + items.sort { |a, b| + itemDate(b, notime) <=> itemDate(a, notime) + } + end + def list_rss(m, params) wanted = params[:handle] reply = String.new - @@mutex.synchronize { - @feeds.each { |handle, feed| - next if wanted and !handle.match(wanted) - reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" - (reply << " (watched)") if feed.watched_by?(m.replyto) - reply << "\n" - } + @feeds.each { |handle, feed| + next if wanted and !handle.match(/#{wanted}/i) + reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" + (reply << " (watched)") if feed.watched_by?(m.replyto) + reply << "\n" } if reply.empty? reply = "no feeds found" @@ -191,12 +266,10 @@ class RSSFeedsPlugin < Plugin def watched_rss(m, params) wanted = params[:handle] reply = String.new - @@mutex.synchronize { - watchlist.each { |handle, feed| - next if wanted and !handle.match(wanted) - next unless feed.watched_by?(m.replyto) - reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})\n" - } + watchlist.each { |handle, feed| + next if wanted and !handle.match(/#{wanted}/i) + next unless feed.watched_by?(m.replyto) + reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})\n" } if reply.empty? reply = "no watched feeds" @@ -208,18 +281,20 @@ class RSSFeedsPlugin < Plugin def add_rss(m, params, force=false) handle = params[:handle] url = params[:url] + unless url.match(/https?/) + m.reply "I only deal with feeds from HTTP sources, so I can't use #{url} (maybe you forgot the handle?)" + return + end type = params[:type] - if @feeds.fetch(handle, nil) && !force - m.reply "There is already a feed named #{handle} (URL: #{@feeds[handle].url})" + if @feeds.fetch(handle.downcase, nil) && !force + m.reply "There is already a feed named #{handle} (URL: #{@feeds[handle.downcase].url})" return end unless url m.reply "You must specify both a handle and an url to add an RSS feed" return end - @@mutex.synchronize { - @feeds[handle] = RssBlob.new(url,handle,type) - } + @feeds[handle.downcase] = RssBlob.new(url,handle,type) reply = "Added RSS #{url} named #{handle}" if type reply << " (format: #{type})" @@ -234,19 +309,17 @@ class RSSFeedsPlugin < Plugin m.reply "someone else is watching #{feed.handle}, I won't remove it from my list" return end - @@mutex.synchronize { - @feeds.delete(feed.handle) - } + @feeds.delete(feed.handle.downcase) m.okay unless pass return end def replace_rss(m, params) handle = params[:handle] - if @feeds.key?(handle) + if @feeds.key?(handle.downcase) del_rss(m, {:handle => handle}, true) end - if @feeds.key?(handle) + if @feeds.key?(handle.downcase) m.reply "can't replace #{feed.handle}" else add_rss(m, params, true) @@ -264,26 +337,21 @@ class RSSFeedsPlugin < Plugin if url add_rss(m, params) end - feed = nil - @@mutex.synchronize { - feed = @feeds.fetch(handle, nil) - } + feed = @feeds.fetch(handle.downcase, nil) if feed - @@mutex.synchronize { - if feed.add_watch(m.replyto) - watchRss(feed, m) - m.okay - else - m.reply "Already watching #{feed.handle}" - end - } + if feed.add_watch(m.replyto) + watchRss(feed, m) + m.okay + else + m.reply "Already watching #{feed.handle}" + end else m.reply "Couldn't watch feed #{handle} (no such feed found)" end end def unwatch_rss(m, params, pass=false) - handle = params[:handle] + handle = params[:handle].downcase unless @feeds.has_key?(handle) m.reply("dunno that feed") return @@ -295,19 +363,13 @@ class RSSFeedsPlugin < Plugin m.reply("#{m.replyto} wasn't watching #{feed.handle}") unless pass end if !feed.watched? - @@mutex.synchronize { - if @@watchThreads[handle].kind_of? Thread - @@watchThreads[handle].kill - debug "rmwatch: Killed thread for #{handle}" - @@watchThreads.delete(handle) - end - } + stop_watch(handle) end return feed end - def rewatch_rss(m=nil) - kill_threads + def rewatch_rss(m=nil, params=nil) + stop_watches # Read watches from list. watchlist.each{ |handle, feed| @@ -318,26 +380,28 @@ class RSSFeedsPlugin < Plugin private def watchRss(feed, m=nil) - if @@watchThreads.has_key?(feed.handle) - report_problem("watcher thread for #{feed.handle} is already running", m) + if @watch.has_key?(feed.handle) + report_problem("watcher thread for #{feed.handle} is already running", nil, m) return end - @@watchThreads[feed.handle] = Thread.new do - debug 'watchRss thread started.' - oldItems = [] - firstRun = true - loop do - begin - debug 'Fetching rss feed...' - title = newItems = nil - @@mutex.synchronize { - title, newItems = fetchRss(feed) - } - unless newItems - m.reply "no items in feed" - break - end - debug "Checking if new items are available" + status = Hash.new + status[:oldItems] = [] + status[:firstRun] = true + status[:failures] = 0 + @watch[feed.handle] = @bot.timer.add(0, status) { + debug "watcher for #{feed} started" + oldItems = status[:oldItems] + firstRun = status[:firstRun] + failures = status[:failures] + begin + debug "fetching #{feed}" + title = newItems = nil + title, newItems = fetchRss(feed) + unless newItems + debug "no items in feed #{feed}" + failures +=1 + else + debug "Checking if new items are available for #{feed}" if firstRun debug "First run, we'll see next time" firstRun = false @@ -347,53 +411,82 @@ class RSSFeedsPlugin < Plugin otxt.include?(item.to_s) } if dispItems.length > 0 - debug "Found #{dispItems.length} new items" - dispItems.each { |item| - debug "showing #{item.title}" - @@mutex.synchronize { - printFormattedRss(feed.watchers, item, feed.type) - } + debug "Found #{dispItems.length} new items in #{feed}" + # When displaying watched feeds, publish them from older to newer + dispItems.reverse.each { |item| + printFormattedRss(feed, item) } else - debug "No new items found" + debug "No new items found in #{feed}" end end oldItems = newItems.dup - rescue Exception => e - error "IO failed: #{e.inspect}" - debug e.backtrace.join("\n") end - - seconds = 150 + rand(100) - debug "Thread going to sleep #{seconds} seconds.." - sleep seconds + rescue Exception => e + error "Error watching #{feed}: #{e.inspect}" + debug e.backtrace.join("\n") + failures += 1 end - end + + status[:oldItems] = oldItems + status[:firstRun] = firstRun + status[:failures] = failures + + seconds = @bot.config['rss.thread_sleep'] * (failures + 1) + seconds += seconds * (rand(100)-50)/100 + debug "watcher for #{feed} going to sleep #{seconds} seconds.." + @bot.timer.reschedule(@watch[feed.handle], seconds) + } + debug "watcher for #{feed} added" end - def printRssItem(loc,item) - if item.kind_of?(RSS::RDF::Item) - @bot.say loc, item.title.chomp.riphtml.shorten(20) + " @ " + item.link - else - @bot.say loc, "#{item.pubDate.to_s.chomp+": " if item.pubDate}#{item.title.chomp.riphtml.shorten(20)+" :: " if item.title}#{" @ "+item.link.chomp if item.link}" - end - end - - def printFormattedRss(locs, item, type) - locs.each { |loc| - case type - when 'amarokblog' - @bot.say loc, "::#{item.category.content} just blogged at #{item.link}::" - @bot.say loc, "::#{item.title.chomp.riphtml} - #{item.description.chomp.riphtml.shorten(60)}::" - when 'amarokforum' - @bot.say loc, "::Forum:: #{item.pubDate.to_s.chomp+": " if item.pubDate}#{item.title.chomp.riphtml+" :: " if item.title}#{" @ "+item.link.chomp if item.link}" - when 'mediawiki' - @bot.say loc, "::Wiki:: #{item.title} has been edited by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(60)} #{item.link} ::" - debug "mediawiki #{item.title}" - when "gmame" - @bot.say loc, "::amarok-devel:: Message #{item.title} sent by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(60)}::" + def printFormattedRss(feed, item, opts=nil) + places = feed.watchers + handle = "::#{feed.handle}:: " + date = String.new + if opts + places = opts[:places] if opts.key?(:places) + handle = opts[:handle].to_s if opts.key?(:handle) + if opts.key?(:date) && opts[:date] + if item.respond_to?(:pubDate) + if item.pubDate.class <= Time + date = item.pubDate.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.pubDate.to_s + end + elsif item.respond_to?(:date) + if item.date.class <= Time + date = item.date.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.date.to_s + end + else + date = "(no date)" + end + date += " :: " + end + end + title = "#{Bold}#{item.title.chomp.riphtml}#{Bold}" if item.title + desc = item.description.gsub(/\s+/,' ').strip.riphtml.shorten(@bot.config['rss.text_max']) if item.description + link = item.link.chomp if item.link + places.each { |loc| + case feed.type + when 'blog' + @bot.say loc, "#{handle}#{date}#{item.category.content} blogged at #{link}" + @bot.say loc, "#{handle}#{title} - #{desc}" + when 'forum' + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" + when 'wiki' + @bot.say loc, "#{handle}#{date}#{item.title} has been edited by #{item.dc_creator}. #{desc} #{link}" + when 'gmame' + @bot.say loc, "#{handle}#{date}Message #{title} sent by #{item.dc_creator}. #{desc}" + when 'trac' + @bot.say loc, "#{handle}#{date}#{title} @ #{link}" + unless item.title =~ /^Changeset \[(\d+)\]/ + @bot.say loc, "#{handle}#{date}#{desc}" + end else - printRssItem(loc,item) + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" end } end @@ -401,46 +494,47 @@ class RSSFeedsPlugin < Plugin def fetchRss(feed, m=nil) begin # Use 60 sec timeout, cause the default is too low - xml = @bot.httputil.get_cached(feed.url,60,60) + xml = @bot.httputil.get_cached(feed.url, 60, 60) rescue URI::InvalidURIError, URI::BadURIError => e - report_problem("invalid rss feed #{feed.url}", m) + report_problem("invalid rss feed #{feed.url}", e, m) + return + rescue => e + report_problem("error getting #{feed.url}", e, m) return end - debug 'fetched' + debug "fetched #{feed}" unless xml - report_problem("reading feed #{url} failed", m) + report_problem("reading feed #{feed} failed", nil, m) return end begin ## do validate parse rss = RSS::Parser.parse(xml) - debug 'parsed' + debug "parsed #{feed}" rescue RSS::InvalidRSSError ## do non validate parse for invalid RSS 1.0 begin rss = RSS::Parser.parse(xml, false) - rescue RSS::Error - report_problem("parsing rss stream failed, whoops =(", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, whoops =(", e, m) return end - rescue RSS::Error - report_problem("parsing rss stream failed, oioi", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, oioi", e, m) return rescue => e - report_problem("processing error occured, sorry =(", m) - debug e.inspect - debug e.backtrace.join("\n") + report_problem("processing error occured, sorry =(", e, m) return end items = [] if rss.nil? - report_problem("#{feed.url} does not include RSS 1.0 or 0.9x/2.0",m) + report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m) else begin - rss.output_encoding = "euc-jp" - rescue RSS::UnknownConvertMethod - report_problem("bah! something went wrong =(",m) + rss.output_encoding = 'UTF-8' + rescue RSS::UnknownConvertMethod => e + report_problem("bah! something went wrong =(", e, m) return end rss.channel.title ||= "Unknown" @@ -452,7 +546,7 @@ class RSSFeedsPlugin < Plugin end if items.empty? - report_problem("no items found in the feed, maybe try weed?",m) + report_problem("no items found in the feed, maybe try weed?", e, m) return end return [title, items] @@ -463,7 +557,7 @@ plugin = RSSFeedsPlugin.new plugin.map 'rss show :handle :limit', :action => 'show_rss', - :requirements => {:limit => /^\d+$/}, + :requirements => {:limit => /^\d+(?:\.\.\d+)?$/}, :defaults => {:limit => 5} plugin.map 'rss list :handle', :action => 'list_rss', @@ -493,5 +587,6 @@ plugin.map 'rss unwatch :handle', :action => 'unwatch_rss' plugin.map 'rss rmwatch :handle', :action => 'unwatch_rss' -plugin.map 'rss rewatch :handle', +plugin.map 'rss rewatch', :action => 'rewatch_rss' +