X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Frss.rb;h=f7e559f7601ed86a10a1e17c26da681e25e812cf;hb=90656f4203a0a989b6fb110d4a07598dd186b84c;hp=6d8d3c268384046fe70a0fd96218d634d9cff944;hpb=37f9fdca2fe1305febe801d169071e35d2ac3e91;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 6d8d3c26..f7e559f7 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -16,7 +16,7 @@ require 'rss' -# Try to load rss/content/2.0 so we can access the data in +# Try to load rss/content/2.0 so we can access the data in # tags. begin require 'rss/content/2.0' @@ -151,6 +151,28 @@ module ::RSS SlashModel::ELEMENTS.collect! {|name| "#{SLASH_PREFIX}_#{name}"} end + if self.const_defined? :Atom + # There are improper Atom feeds around that use the non-standard + # 'modified' element instead of the correct 'updated' one. Let's + # support it too. + module Atom + class Feed + class Modified < RSS::Element + include CommonModel + include DateConstruct + end + __send__("install_have_child_element", + "modified", URI, nil, "modified", :content) + + class Entry + Modified = Feed::Modified + __send__("install_have_child_element", + "modified", URI, nil, "modified", :content) + end + end + end + end + class Element class << self def def_bang(name, chain) @@ -162,18 +184,27 @@ module ::RSS end end + # Atom categories are squashed to their label only { :link => %w{link.href link}, :guid => %w{guid.content guid}, :content => %w{content.content content}, :description => %w{description.content description}, :title => %w{title.content title}, - :category => %w{category.content category}, + :category => %w{category.content category.label category}, :dc_subject => %w{dc_subject}, :author => %w{author.name.content author.name author}, :dc_creator => %w{dc_creator} }.each { |name, chain| def_bang name, chain } + def categories! + return nil unless self.respond_to? :categories + cats = categories.map do |c| + blank2nil { c.content rescue c.label rescue c rescue nil } + end.compact + cats.empty? ? nil : cats + end + protected def blank2nil(&block) x = yield @@ -185,7 +216,7 @@ end class ::RssBlob attr_accessor :url, :handle, :type, :refresh_rate, :xml, :title, :items, - :mutex, :watchers, :last_fetched, :http_cache + :mutex, :watchers, :last_fetched, :http_cache, :last_success def initialize(url,handle=nil,type=nil,watchers=[], xml=nil, lf = nil) @url = url @@ -203,6 +234,7 @@ class ::RssBlob @items = nil @mutex = Mutex.new @last_fetched = lf + @last_success = nil sanitize_watchers(watchers) end @@ -277,6 +309,14 @@ class RSSFeedsPlugin < Plugin :default => 300, :validate => Proc.new{|v| v > 30}, :desc => "How many seconds to sleep before checking RSS feeds again") + Config.register Config::IntegerValue.new('rss.announce_timeout', + :default => 0, + :desc => "Don't announce watched feed if these many seconds elapsed since the last successful update") + + Config.register Config::IntegerValue.new('rss.announce_max', + :default => 3, + :desc => "Maximum number of new items to announce when a watched feed is updated") + Config.register Config::BooleanValue.new('rss.show_updated', :default => true, :desc => "Whether feed items for which the description was changed should be shown as new") @@ -285,10 +325,15 @@ class RSSFeedsPlugin < Plugin :default => true, :desc => "Whether to display links from the text of a feed item.") + Config.register Config::EnumValue.new('rss.announce_method', + :values => ['say', 'notice'], + :default => 'say', + :desc => "Whether to display links from the text of a feed item.") + # Make an 'unique' ID for a given item, based on appropriate bot options - # Currently only suppored is bot.config['rss.show_updated']: when false, + # Currently only supported is bot.config['rss.show_updated']: when false, # only the guid/link is accounted for. - + def make_uid(item) uid = [item.guid! || item.link!] if @bot.config['rss.show_updated'] @@ -317,88 +362,98 @@ class RSSFeedsPlugin < Plugin # Auxiliary method used to collect two lines for rss output filters, # running substitutions against DataStream _s_ optionally joined - # with hash _h_ + # with hash _h_. + # + # For substitutions, *_wrap keys can be used to alter the content of + # other nonempty keys. If the value of *_wrap is a String, it will be + # put before and after the corresponding key; if it's an Array, the first + # and second elements will be used for wrapping; if it's nil, no wrapping + # will be done (useful to override a default wrapping). + # + # For example: + # :handle_wrap => '::':: + # will wrap s[:handle] by prefixing and postfixing it with '::' + # :date_wrap => [nil, ' :: ']:: + # will put ' :: ' after s[:date] def make_stream(line1, line2, s, h={}) ss = s.merge(h) - DataStream.new([line1, line2].compact.join("\n") % ss, ss) + subs = {} + wraps = {} + ss.each do |k, v| + kk = k.to_s.chomp!('_wrap') + if kk + nk = kk.intern + case v + when String + wraps[nk] = ss[nk].wrap_nonempty(v, v) + when Array + wraps[nk] = ss[nk].wrap_nonempty(*v) + when nil + # do nothing + else + warning "ignoring #{v.inspect} wrapping of unknown class" + end unless ss[nk].nil? + else + subs[k] = v + end + end + subs.merge! wraps + DataStream.new([line1, line2].compact.join("\n") % subs, ss) end - # Define default RSS filters + # Auxiliary method used to define rss output filters + def rss_type(key, &block) + @bot.register_filter(key, @outkey, &block) + end + + # Define default output filters (rss types), and load custom ones. + # Custom filters are looked for in the plugin's default filter locations + # and in rss/types.rb under botclass. + # Preferably, the rss_type method should be used in these files, e.g.: + # rss_type :my_type do |s| + # line1 = "%{handle} and some %{author} info" + # make_stream(line1, nil, s) + # end + # to define the new type 'my_type'. The keys available in the DataStream + # are: + # item:: + # the actual rss item + # handle:: + # the item handle + # date:: + # the item date + # title:: + # the item title + # desc, link, category, author:: + # the item description, link, category, author + # at:: + # the string ' @ ' if the item has both an title and a link + # handle_wrap, date_wrap, title_wrap, ...:: + # these keys can be defined to wrap the corresponding elements if they + # are nonempty. By default handle is wrapped with '::', date has a ' ::' + # appended and title is enbolden # - # TODO: load personal ones def define_filters - @outkey = :"rss.out" - @bot.register_filter(:headlines, @outkey) { |s| - line1 = (s[:handle].empty? ? "%{date}" : "%{handle}") << "%{title}" - make_stream(line1, nil, s) - } - @bot.register_filter(:blog, @outkey) { |s| - author = s[:author] ? (s[:author] + " ") : "" - abt = s[:category] ? "about #{s[:category]} " : "" - line1 = "%{handle}%{date}%{author}blogged %{abt}at %{link}" - line2 = "%{handle}%{title} - %{desc}" - make_stream(line1, line2, s, :author => author, :abt => abt) - } - @bot.register_filter(:photoblog, @outkey) { |s| - author = s[:author] ? (s[:author] + " ") : "" - abt = s[:category] ? "under #{s[:category]} " : "" - line1 = "%{handle}%{date}%{author}added an image %{abt}at %{link}" - line2 = "%{handle}%{title} - %{desc}" - make_stream(line1, line2, s, :author => author, :abt => abt) - } - @bot.register_filter(:news, @outkey) { |s| - line1 = "%{handle}%{date}%{title}%{at}%{link}" % s - line2 = "%{handle}%{date}%{desc}" % s - make_stream(line1, line2, s) - } - @bot.register_filter(:git, @outkey) { |s| - author = s[:author].sub(/@\S+?\s*>/, "@...>") + " " if s[:author] - line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}" - make_stream(line1, nil, s, :author => author) - } - @bot.register_filter(:forum, @outkey) { |s| - line1 = "%{handle}%{date}%{title}%{at}%{link}" - make_stream(line1, nil, s) - } - @bot.register_filter(:wiki, @outkey) { |s| - line1 = "%{handle}%{date}%{title}%{at}%{link}" - line1 << "has been edited by %{author}. %{desc}" - make_stream(line1, nil, s) - } - @bot.register_filter(:gmane, @outkey) { |s| - line1 = "%{handle}%{date}Message %{title} sent by %{author}. %{desc}" - make_stream(line1, nil, s) - } - @bot.register_filter(:trac, @outkey) { |s| - author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author] - line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}" - line2 = nil - unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/ - line2 = "%{handle}%{date}%{desc}" - end - make_stream(line1, line2, s, :author => author) - } - @bot.register_filter(:"/.", @outkey) { |s| - dept = "(from the #{s[:item].slash_department} dept) " rescue nil - sec = " in section #{s[:item].slash_section}" rescue nil - line1 = "%{handle}%{date}%{dept}%{title}%{at}%{link} " - line1 << "(posted by %{author}%{sec})" - make_stream(line1, nil, s, :dept => dept, :sec => sec) - } - @bot.register_filter(:default, @outkey) { |s| - line1 = "%{handle}%{date}%{title}%{at}%{link}" - line1 << " (by %{author})" if s[:author] - make_stream(line1, nil, s) - } + @outkey ||= :"rss.out" - # Define an HTML info filter too + # Define an HTML info filter @bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) } - # This is the output format used by the input filter - @bot.register_filter(:htmlinfo, @outkey) { |s| + rss_type :htmlinfo do |s| line1 = "%{title}%{at}%{link}" make_stream(line1, nil, s) - } + end + + # the default filter + rss_type :default do |s| + line1 = "%{handle}%{date}%{title}%{at}%{link}" + line1 << " (by %{author})" if s[:author] + make_stream(line1, nil, s) + end + + @user_types ||= datafile 'types.rb' + load_filters + load_filters :path => @user_types end FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)} @@ -625,7 +680,12 @@ class RSSFeedsPlugin < Plugin m.reply "Channel : #{title}" disp.each do |item| - printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true}) + printFormattedRss(feed, item, { + :places => [m.replyto], + :handle => nil, + :date => true, + :announce_method => :say + }) end end @@ -644,19 +704,34 @@ class RSSFeedsPlugin < Plugin def list_rss(m, params) wanted = params[:handle] - reply = String.new - @feeds.each { |handle, feed| - next if wanted and !handle.match(/#{wanted}/i) - reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" - (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate - (reply << " (watched)") if feed.watched_by?(m.replyto) - reply << "\n" - } - if reply.empty? + listed = @feeds.keys + if wanted + wanted_rx = Regexp.new(wanted, true) + listed.reject! { |handle| !handle.match(wanted_rx) } + end + listed.sort! + debug listed + if @bot.config['send.max_lines'] > 0 and listed.size > @bot.config['send.max_lines'] + reply = listed.inject([]) do |ar, handle| + feed = @feeds[handle] + string = handle.dup + (string << " (#{feed.type})") if feed.type + (string << " (watched)") if feed.watched_by?(m.replyto) + ar << string + end.join(', ') + elsif listed.size > 0 + reply = listed.inject([]) do |ar, handle| + feed = @feeds[handle] + string = "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})" + (string << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate + (string << " (watched)") if feed.watched_by?(m.replyto) + ar << string + end.join("\n") + else reply = "no feeds found" reply << " matching #{wanted}" if wanted end - m.reply reply, :max_lines => reply.length + m.reply reply, :max_lines => 0 end def watched_rss(m, params) @@ -728,15 +803,28 @@ class RSSFeedsPlugin < Plugin end case params[:what].intern when :handle - new = params[:new].downcase - if @feeds.key?(new) and @feeds[new] + # preserve rename case, but beware of key + realnew = params[:new] + new = realnew.downcase + if feed.handle.downcase == new + if feed.handle == realnew + m.reply _("You want me to rename %{handle} to itself?") % { + :handle => feed.handle + } + return false + else + feed.mutex.synchronize do + feed.handle = realnew + end + end + elsif @feeds.key?(new) and @feeds[new] m.reply "There already is a feed with handle #{new}" return else feed.mutex.synchronize do @feeds[new] = feed @feeds.delete(handle) - feed.handle = new + feed.handle = realnew end handle = new end @@ -847,8 +935,8 @@ class RSSFeedsPlugin < Plugin stop_watches # Read watches from list. - watchlist.each{ |handle, feed| - watchRss(feed, m) + watchlist.each{ |hndl, fd| + watchRss(fd, m) } m.okay if m end @@ -857,7 +945,7 @@ class RSSFeedsPlugin < Plugin private def watchRss(feed, m=nil) if @watch.has_key?(feed.handle) - report_problem("watcher thread for #{feed.handle} is already running", nil, m) + # report_problem("watcher thread for #{feed.handle} is already running", nil, m) return end status = Hash.new @@ -873,7 +961,13 @@ class RSSFeedsPlugin < Plugin failures = status[:failures] begin debug "fetching #{feed}" - first_run = !feed.last_fetched + + first_run = !feed.last_success + if (!first_run && @bot.config['rss.announce_timeout'] > 0 && + (Time.now - feed.last_success > @bot.config['rss.announce_timeout'])) + debug "#{feed} wasn't polled for too long, supressing output" + first_run = true + end oldxml = feed.xml ? feed.xml.dup : nil unless fetchRss(feed, nil, feed.http_cache) failures += 1 @@ -931,7 +1025,19 @@ class RSSFeedsPlugin < Plugin } if dispItems.length > 0 + max = @bot.config['rss.announce_max'] debug "Found #{dispItems.length} new items in #{feed}" + if max > 0 and dispItems.length > max + debug "showing only the latest #{dispItems.length}" + feed.watchers.each do |loc| + @bot.say loc, (_("feed %{feed} had %{num} updates, showing the latest %{max}") % { + :feed => feed.handle, + :num => dispItems.length, + :max => max + }) + end + dispItems.slice!(max..-1) + end # When displaying watched feeds, publish them from older to newer dispItems.reverse.each { |item| printFormattedRss(feed, item) @@ -971,44 +1077,45 @@ class RSSFeedsPlugin < Plugin return seconds end - def printFormattedRss(feed, item, opts=nil) + def make_date(obj) + if obj.kind_of? Time + obj.strftime("%Y/%m/%d %H:%M") + else + obj.to_s + end + end + + def printFormattedRss(feed, item, options={}) # debug item - places = feed.watchers - handle = feed.handle.empty? ? "" : "::#{feed.handle}:: " - date = String.new - if opts - places = opts[:places] if opts.key?(:places) - handle = opts[:handle].to_s if opts.key?(:handle) - if opts.key?(:date) && opts[:date] - if item.respond_to?(:updated) - if item.updated.content.class <= Time - date = item.updated.content.strftime("%Y/%m/%d %H:%M") - else - date = item.updated.content.to_s - end - elsif item.respond_to?(:source) and item.source.respond_to?(:updated) - if item.source.updated.content.class <= Time - date = item.source.updated.content.strftime("%Y/%m/%d %H:%M") - else - date = item.source.updated.content.to_s - end - elsif item.respond_to?(:pubDate) - if item.pubDate.class <= Time - date = item.pubDate.strftime("%Y/%m/%d %H:%M") - else - date = item.pubDate.to_s - end - elsif item.respond_to?(:date) - if item.date.class <= Time - date = item.date.strftime("%Y/%m/%d %H:%M") - else - date = item.date.to_s - end - else - date = "(no date)" - end - date += " :: " + opts = { + :places => feed.watchers, + :handle => feed.handle, + :date => false, + :announce_method => @bot.config['rss.announce_method'] + }.merge options + + places = opts[:places] + announce_method = opts[:announce_method] + + handle = opts[:handle].to_s + + date = \ + if opts[:date] + if item.respond_to?(:updated) and item.updated + make_date(item.updated.content) + elsif item.respond_to?(:modified) and item.modified + make_date(item.modified.content) + elsif item.respond_to?(:source) and item.source.respond_to?(:updated) + make_date(item.source.updated.content) + elsif item.respond_to?(:pubDate) + make_date(item.pubDate) + elsif item.respond_to?(:date) + make_date(item.date) + else + "(no date)" end + else + String.new end tit_opt = {} @@ -1022,14 +1129,14 @@ class RSSFeedsPlugin < Plugin # visible in the URL anyway # TODO make this optional? base_title.sub!(/^Changeset \[([\da-f]{40})\]:/) { |c| "(git commit)"} if feed.type == 'trac' - title = "#{Bold}#{base_title.ircify_html(tit_opt)}#{Bold}" + title = base_title.ircify_html(tit_opt) end desc_opt = {} desc_opt[:limit] = @bot.config['rss.text_max'] desc_opt[:a_href] = :link_out if @bot.config['rss.show_links'] - # We prefer content_encoded here as it tends to provide more html formatting + # We prefer content_encoded here as it tends to provide more html formatting # for use with ircify_html. if item.respond_to?(:content_encoded) && item.content_encoded desc = item.content_encoded.ircify_html(desc_opt) @@ -1051,6 +1158,7 @@ class RSSFeedsPlugin < Plugin link = item.link! link.strip! if link + categories = item.categories! category = item.category! || item.dc_subject! category.strip! if category author = item.dc_creator! || item.author! @@ -1064,15 +1172,25 @@ class RSSFeedsPlugin < Plugin key = @bot.global_filter_name(feed.type, @outkey) key = @bot.global_filter_name(:default, @outkey) unless @bot.has_filter?(key) - output = @bot.filter(key, :item => item, :handle => handle, :date => date, - :title => title, :desc => desc, :link => link, - :category => category, :author => author, :at => at) + stream_hash = { + :item => item, + :handle => handle, + :handle_wrap => ['::', ':: '], + :date => date, + :date_wrap => [nil, ' :: '], + :title => title, + :title_wrap => Bold, + :desc => desc, :link => link, + :categories => categories, + :category => category, :author => author, :at => at + } + output = @bot.filter(key, stream_hash) return output if places.empty? places.each { |loc| output.to_s.each_line { |line| - @bot.say loc, line, :overlong => :truncate + @bot.__send__(announce_method, loc, line, :overlong => :truncate) } } end @@ -1111,6 +1229,7 @@ class RSSFeedsPlugin < Plugin end feed.mutex.synchronize do feed.xml = xml + feed.last_success = Time.now end return true end @@ -1119,25 +1238,33 @@ class RSSFeedsPlugin < Plugin return nil unless feed.xml feed.mutex.synchronize do xml = feed.xml - begin - ## do validate parse - rss = RSS::Parser.parse(xml) - debug "parsed and validated #{feed}" - rescue RSS::InvalidRSSError - ## do non validate parse for invalid RSS 1.0 + rss = nil + errors = [] + RSS::AVAILABLE_PARSERS.each do |parser| begin - rss = RSS::Parser.parse(xml, false) - debug "parsed but not validated #{feed}" + ## do validate parse + rss = RSS::Parser.parse(xml, true, true, parser) + debug "parsed and validated #{feed} with #{parser}" + break + rescue RSS::InvalidRSSError + begin + ## do non validate parse for invalid RSS 1.0 + rss = RSS::Parser.parse(xml, false, true, parser) + debug "parsed but not validated #{feed} with #{parser}" + break + rescue RSS::Error => e + errors << [parser, e, "parsing rss stream failed, whoops =("] + end rescue RSS::Error => e - report_problem("parsing rss stream failed, whoops =(", e, m) - return nil + errors << [parser, e, "parsing rss stream failed, oioi"] + rescue => e + errors << [parser, e, "processing error occured, sorry =("] end - rescue RSS::Error => e - report_problem("parsing rss stream failed, oioi", e, m) - return nil - rescue => e - report_problem("processing error occured, sorry =(", e, m) - return nil + end + unless errors.empty? + debug errors + self.send(:report_problem, errors.last[2], errors.last[1], m) + return nil unless rss end items = [] if rss.nil?