X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Frss.rb;h=7313f252236d2507691fa075d64b834bf477b9a6;hb=b565bf81cbe456205b9f9fcf9e3960109c5b7de6;hp=af7901275b2e5a211552a63bd88c0efbc2ef881c;hpb=87e3751b3cbfc0de98a0b0b2dd44943e696b9eb9;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index af790127..7313f252 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1,18 +1,23 @@ +#-- vim:sw=2:et +#++ +# # RSS feed plugin for RubyBot # (c) 2004 Stanislav Karchebny # (c) 2005 Ian Monroe # (c) 2005 Mark Kretschmann +# (c) 2006 Giuseppe Bilotta +# # Licensed under MIT License. require 'rss/parser' require 'rss/1.0' require 'rss/2.0' require 'rss/dublincore' -begin - # require 'rss/dublincore/2.0' -rescue - warning "Unable to load RSS libraries, RSS plugin functionality crippled" -end +# begin +# require 'rss/dublincore/2.0' +# rescue +# warning "Unable to load RSS libraries, RSS plugin functionality crippled" +# end class ::String def shorten(limit) @@ -54,19 +59,28 @@ class ::RssBlob end def watched_by?(who) - @watchers.include?(who) + # We need to check bot 'who' itself and the String form, because rss + # watches added before the new Irc framework represented watchers as + # Strings whereas they are now Channels. + # + @watchers.include?(who) || @watchers.include?(who.to_s) end def add_watch(who) if watched_by?(who) return nil end - @watchers << who unless watched_by?(who) + # TODO FIXME? should we just store watchers as Strings instead? + # This should then be @watchers << who.downcase + @watchers << who return who end def rm_watch(who) + # See comment to watched_by? + # @watchers.delete(who) + @watchers.delete(who.to_s) end def to_a @@ -92,6 +106,10 @@ class RSSFeedsPlugin < Plugin :default => 90, :validate => Proc.new{|v| v > 0 && v < 400}, :desc => "How many characters to use of a RSS item text") + BotConfig.register BotConfigIntegerValue.new('rss.thread_sleep', + :default => 300, :validate => Proc.new{|v| v > 30}, + :desc => "How many seconds to sleep before checking RSS feeds again") + @@watchThreads = Hash.new @@mutex = Mutex.new @@ -106,6 +124,10 @@ class RSSFeedsPlugin < Plugin rewatch_rss end + def name + "rss" + end + def watchlist @feeds.select { |h, f| f.watched? } end @@ -132,7 +154,7 @@ class RSSFeedsPlugin < Plugin def help(plugin,topic="") case topic when "show" - "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}" + "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}; #{Bold}limit#{Bold} can also be in the form a..b, to display a specific range of items" when "list" "rss list [#{Bold}handle#{Bold}] : list all rss feeds (matching #{Bold}handle#{Bold})" when "watched" @@ -156,37 +178,69 @@ class RSSFeedsPlugin < Plugin end end - def report_problem(report, m=nil) - if m + def report_problem(report, e=nil, m=nil) + if m && m.respond_to?(:reply) m.reply report else warning report end + if e + debug e.inspect + debug e.backtrace.join("\n") if e.respond_to?(:backtrace) + end end def show_rss(m, params) handle = params[:handle] - limit = params[:limit].to_i - limit = 15 if limit > 15 - limit = 1 if limit <= 0 + lims = params[:limit].to_s.match(/(\d+)(?:..(\d+))?/) + debug lims.to_a.inspect + if lims[2] + ll = [[lims[1].to_i-1,lims[2].to_i-1].min, 0].max + ul = [[lims[1].to_i-1,lims[2].to_i-1].max, 14].min + rev = lims[1].to_i > lims[2].to_i + else + ll = 0 + ul = [[lims[1].to_i-1, 1].max, 14].min + rev = false + end + feed = @feeds.fetch(handle, nil) unless feed m.reply "I don't know any feeds named #{handle}" return end - m.reply("Please wait, querying...") + + m.reply "lemme fetch it..." title = items = nil @@mutex.synchronize { title, items = fetchRss(feed, m) } return unless items - m.reply("Channel : #{title}") - # TODO: optional by-date sorting if dates present - items[0...limit].reverse.each do |item| - printRssItem(m.replyto,item) + + # We sort the feeds in freshness order (newer ones first) + items = freshness_sort(items) + disp = items[ll..ul] + disp.reverse! if rev + + m.reply "Channel : #{title}" + disp.each do |item| + printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true}) end end + def itemDate(item,ex=nil) + return item.pubDate if item.respond_to?(:pubDate) + return item.date if item.respond_to?(:date) + return ex + end + + def freshness_sort(items) + notime = Time.at(0) + items.sort { |a, b| + itemDate(b, notime) <=> itemDate(a, notime) + } + end + def list_rss(m, params) wanted = params[:handle] reply = String.new @@ -225,6 +279,10 @@ class RSSFeedsPlugin < Plugin def add_rss(m, params, force=false) handle = params[:handle] url = params[:url] + unless url.match(/https?/) + m.reply "I only deal with feeds from HTTP sources, so I can't use #{url} (maybe you forgot the handle?)" + return + end type = params[:type] if @feeds.fetch(handle, nil) && !force m.reply "There is already a feed named #{handle} (URL: #{@feeds[handle].url})" @@ -323,7 +381,7 @@ class RSSFeedsPlugin < Plugin return feed end - def rewatch_rss(m=nil) + def rewatch_rss(m=nil, params=nil) kill_threads # Read watches from list. @@ -336,88 +394,109 @@ class RSSFeedsPlugin < Plugin private def watchRss(feed, m=nil) if @@watchThreads.has_key?(feed.handle) - report_problem("watcher thread for #{feed.handle} is already running", m) + report_problem("watcher thread for #{feed.handle} is already running", nil, m) return end @@watchThreads[feed.handle] = Thread.new do - debug 'watchRss thread started.' + debug "watcher for #{feed} started" oldItems = [] firstRun = true + failures = 0 loop do begin - debug 'Fetching rss feed...' + debug "fetching #{feed}" title = newItems = nil @@mutex.synchronize { title, newItems = fetchRss(feed) } unless newItems - m.reply "no items in feed" - break - end - debug "Checking if new items are available" - if firstRun - debug "First run, we'll see next time" - firstRun = false + debug "no items in feed #{feed}" + failures +=1 else - otxt = oldItems.map { |item| item.to_s } - dispItems = newItems.reject { |item| - otxt.include?(item.to_s) - } - if dispItems.length > 0 - debug "Found #{dispItems.length} new items" - dispItems.each { |item| - debug "showing #{item.title}" - @@mutex.synchronize { - printFormattedRss(feed, item) - } - } + debug "Checking if new items are available for #{feed}" + if firstRun + debug "First run, we'll see next time" + firstRun = false else - debug "No new items found" + otxt = oldItems.map { |item| item.to_s } + dispItems = newItems.reject { |item| + otxt.include?(item.to_s) + } + if dispItems.length > 0 + debug "Found #{dispItems.length} new items in #{feed}" + # When displaying watched feeds, publish them from older to newer + dispItems.reverse.each { |item| + @@mutex.synchronize { + printFormattedRss(feed, item) + } + } + else + debug "No new items found in #{feed}" + end end + oldItems = newItems.dup end - oldItems = newItems.dup rescue Exception => e - error "IO failed: #{e.inspect}" + error "Error watching #{feed}: #{e.inspect}" debug e.backtrace.join("\n") + failures += 1 end - seconds = 150 + rand(100) - debug "Thread going to sleep #{seconds} seconds.." + seconds = @bot.config['rss.thread_sleep'] * (failures + 1) + seconds += seconds * (rand(100)-50)/100 + debug "watcher for #{feed} going to sleep #{seconds} seconds.." sleep seconds end end end - def printRssItem(loc,item) - if item.kind_of?(RSS::RDF::Item) - @bot.say loc, item.title.chomp.riphtml.shorten(@bot.config['rss.head_max']) + " @ " + item.link - else - desc = String.new - desc << item.pubDate.to_s.chomp + ": " if item.pubDate - desc << item.title.chomp.riphtml.shorten(@bot.config['rss.head_max']) + " :: " if item.title - desc << " @ " + item.link.chomp if item.link - @bot.say loc, desc + def printFormattedRss(feed, item, opts=nil) + places = feed.watchers + handle = "::#{feed.handle}:: " + date = String.new + if opts + places = opts[:places] if opts.key?(:places) + handle = opts[:handle].to_s if opts.key?(:handle) + if opts.key?(:date) && opts[:date] + if item.respond_to?(:pubDate) + if item.pubDate.class <= Time + date = item.pubDate.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.pubDate.to_s + end + elsif item.respond_to?(:date) + if item.date.class <= Time + date = item.date.strftime("%Y/%m/%d %H.%M.%S") + else + date = item.date.to_s + end + else + date = "(no date)" + end + date += " :: " + end end - end - - def printFormattedRss(feed, item) - debug "Printing formatted item #{item.inspect} for feed #{feed.to_s}" - feed.watchers.each { |loc| + title = "#{Bold}#{item.title.chomp.riphtml}#{Bold}" if item.title + desc = item.description.gsub(/\s+/,' ').strip.riphtml.shorten(@bot.config['rss.text_max']) if item.description + link = item.link.chomp if item.link + places.each { |loc| case feed.type when 'blog' - @bot.say loc, "::#{feed.handle}:: #{item.category.content} just blogged at #{item.link}::" - @bot.say loc, "::#{feed.handle}:: #{item.title.chomp.riphtml} - #{item.description.chomp.riphtml.shorten(@bot.config['rss.text_max'])}::" + @bot.say loc, "#{handle}#{date}#{item.category.content} blogged at #{link}" + @bot.say loc, "#{handle}#{title} - #{desc}" when 'forum' - @bot.say loc, "::#{feed.handle}:: #{item.pubDate.to_s.chomp+": " if item.pubDate}#{item.title.chomp.riphtml+" :: " if item.title}#{" @ "+item.link.chomp if item.link}" + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" when 'wiki' - @bot.say loc, "::#{feed.handle}:: #{item.title} has been edited by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(@bot.config['rss.text_max'])} #{item.link} ::" + @bot.say loc, "#{handle}#{date}#{item.title} has been edited by #{item.dc_creator}. #{desc} #{link}" when 'gmame' - @bot.say loc, "::#{feed.handle}:: Message #{item.title} sent by #{item.dc_creator}. #{item.description.split("\n")[0].chomp.riphtml.shorten(@bot.config['rss.text_max'])} ::" + @bot.say loc, "#{handle}#{date}Message #{title} sent by #{item.dc_creator}. #{desc}" when 'trac' - @bot.say loc, "::#{feed.handle}:: #{item.title} :: #{item.link}" - @bot.say loc, "::#{feed.handle}:: #{item.description.gsub(/\s+/,' ').strip.riphtml.shorten(@bot.config['rss.text_max'])}" + @bot.say loc, "#{handle}#{date}#{title} @ #{link}" + unless item.title =~ /^Changeset \[(\d+)\]/ + @bot.say loc, "#{handle}#{date}#{desc}" + end else - printRssItem(loc,item) + @bot.say loc, "#{handle}#{date}#{title}#{' @ ' if item.title && item.link}#{link}" end } end @@ -425,46 +504,49 @@ class RSSFeedsPlugin < Plugin def fetchRss(feed, m=nil) begin # Use 60 sec timeout, cause the default is too low - xml = @bot.httputil.get_cached(feed.url,60,60) + # Do not use get_cached for RSS until we have proper cache handling + # xml = @bot.httputil.get_cached(feed.url,60,60) + xml = @bot.httputil.get(feed.url,60,60) rescue URI::InvalidURIError, URI::BadURIError => e - report_problem("invalid rss feed #{feed.url}", m) + report_problem("invalid rss feed #{feed.url}", e, m) + return + rescue => e + report_problem("error getting #{feed.url}", e, m) return end - debug 'fetched' + debug "fetched #{feed}" unless xml - report_problem("reading feed #{url} failed", m) + report_problem("reading feed #{feed} failed", nil, m) return end begin ## do validate parse rss = RSS::Parser.parse(xml) - debug 'parsed' + debug "parsed #{feed}" rescue RSS::InvalidRSSError ## do non validate parse for invalid RSS 1.0 begin rss = RSS::Parser.parse(xml, false) - rescue RSS::Error - report_problem("parsing rss stream failed, whoops =(", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, whoops =(", e, m) return end - rescue RSS::Error - report_problem("parsing rss stream failed, oioi", m) + rescue RSS::Error => e + report_problem("parsing rss stream failed, oioi", e, m) return rescue => e - report_problem("processing error occured, sorry =(", m) - debug e.inspect - debug e.backtrace.join("\n") + report_problem("processing error occured, sorry =(", e, m) return end items = [] if rss.nil? - report_problem("#{feed.url} does not include RSS 1.0 or 0.9x/2.0",m) + report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m) else begin - rss.output_encoding = "euc-jp" - rescue RSS::UnknownConvertMethod - report_problem("bah! something went wrong =(",m) + rss.output_encoding = 'UTF-8' + rescue RSS::UnknownConvertMethod => e + report_problem("bah! something went wrong =(", e, m) return end rss.channel.title ||= "Unknown" @@ -476,7 +558,7 @@ class RSSFeedsPlugin < Plugin end if items.empty? - report_problem("no items found in the feed, maybe try weed?",m) + report_problem("no items found in the feed, maybe try weed?", e, m) return end return [title, items] @@ -487,7 +569,7 @@ plugin = RSSFeedsPlugin.new plugin.map 'rss show :handle :limit', :action => 'show_rss', - :requirements => {:limit => /^\d+$/}, + :requirements => {:limit => /^\d+(?:\.\.\d+)?$/}, :defaults => {:limit => 5} plugin.map 'rss list :handle', :action => 'list_rss', @@ -517,5 +599,6 @@ plugin.map 'rss unwatch :handle', :action => 'unwatch_rss' plugin.map 'rss rmwatch :handle', :action => 'unwatch_rss' -plugin.map 'rss rewatch :handle', +plugin.map 'rss rewatch', :action => 'rewatch_rss' +