X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Frss.rb;h=9e85b416bb6bc6db5a50994008653e29a13c5087;hb=41ea4232df0ed0b3f922b0476444ca8e38f3301f;hp=3b8e8c7dcc51c8a2cda6a4df493eae357b8cd470;hpb=6f9bfa43ac907700fcba394e0f6b9d987b1192fb;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 3b8e8c7d..9e85b416 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -25,21 +25,6 @@ end module ::RSS - # Make an 'unique' ID for a given item, based on appropriate bot options - # Currently only suppored is bot.config['rss.show_updated']: when true, the - # description is included in the uid hashing, otherwise it's not - # - def RSS.item_uid_for_bot(item, opts={}) - options = { :show_updated => true}.merge(opts) - desc = nil - if options[:show_updated] - desc = item.content.content rescue item.description rescue nil - end - [(item.title.content rescue item.title rescue nil), - (item.link.href rescue item.link), - desc].hash - end - # Add support for Slashdot namespace in RDF. The code is just an adaptation # of the DublinCore code. unless defined?(SLASH_PREFIX) @@ -269,6 +254,38 @@ class RSSFeedsPlugin < Plugin :default => true, :desc => "Whether to display links from the text of a feed item.") + # Make an 'unique' ID for a given item, based on appropriate bot options + # Currently only suppored is bot.config['rss.show_updated']: when false, + # only the guid/link is accounted for. + + def block_rescue(df = nil, &block) + v = block.call rescue nil + (String === v && '' != v) ? v : nil + end + + def make_uid(item) + uid = [ + (block_rescue do item.guid.content end || + block_rescue do item.guid end || + block_rescue do item.link.href end || + block_rescue do item.link end + ) + ] + if @bot.config['rss.show_updated'] + uid.push( + block_rescue do item.content.content end || + block_rescue do item.description end + ) + uid.unshift( + block_rescue do item.title.content end || + block_rescue do item.title end + ) + end + # debug "taking hash of #{uid.inspect}" + uid.hash + end + + # We used to save the Mutex with the RssBlob, which was idiotic. And # since Mutexes dumped in one version might not be resotrable in another, # we need a few tricks to be able to restore data from other versions of Ruby @@ -284,10 +301,125 @@ class RSSFeedsPlugin < Plugin end end + # Auxiliary method used to collect two lines for rss output filters, + # running substitutions against DataStream _s_ optionally joined + # with hash _h_ + def make_stream(line1, line2, s, h={}) + ss = s.merge(h) + DataStream.new([line1, line2].compact.join("\n") % ss, ss) + end + + # Define default RSS filters + # + # TODO: load personal ones + def define_filters + @outkey = :"rss.out" + @bot.register_filter(:headlines, @outkey) { |s| + line1 = (s[:handle].empty? ? "%{date}" : "%{handle}") << "%{title}" + make_stream(line1, nil, s) + } + @bot.register_filter(:blog, @outkey) { |s| + author = s[:author] ? (s[:author] + " ") : "" + abt = s[:category] ? "about #{s[:category]} " : "" + line1 = "%{handle}%{date}%{author}blogged %{abt}at %{link}" + line2 = "%{handle}%{title} - %{desc}" + make_stream(line1, line2, s, :author => author, :abt => abt) + } + @bot.register_filter(:photoblog, @outkey) { |s| + author = s[:author] ? (s[:author] + " ") : "" + abt = s[:category] ? "under #{s[:category]} " : "" + line1 = "%{handle}%{date}%{author}added an image %{abt}at %{link}" + line2 = "%{handle}%{title} - %{desc}" + make_stream(line1, line2, s, :author => author, :abt => abt) + } + @bot.register_filter(:news, @outkey) { |s| + line1 = "%{handle}%{date}%{title}%{at}%{link}" % s + line2 = "%{handle}%{date}%{desc}" % s + make_stream(line1, line2, s) + } + @bot.register_filter(:git, @outkey) { |s| + author = s[:author] ? (s[:author] + " ") : "" + line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}" + make_stream(line1, nil, s, :author => author) + } + @bot.register_filter(:forum, @outkey) { |s| + line1 = "%{handle}%{date}%{title}%{at}%{link}" + make_stream(line1, nil, s) + } + @bot.register_filter(:wiki, @outkey) { |s| + line1 = "%{handle}%{date}%{title}%{at}%{link}" + line1 << "has been edited by %{author}. %{desc}" + make_stream(line1, nil, s) + } + @bot.register_filter(:gmane, @outkey) { |s| + line1 = "%{handle}%{date}Message %{title} sent by %{author}. %{desc}" + make_stream(line1, nil, s) + } + @bot.register_filter(:trac, @outkey) { |s| + author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author] + line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}" + line2 = nil + unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/ + line2 = "%{handle}%{date}%{desc}" + end + make_stream(line1, line2, s, :author => author) + } + @bot.register_filter(:"/.", @outkey) { |s| + dept = "(from the #{s[:item].slash_department} dept) " rescue nil + sec = " in section #{s[:item].slash_section}" rescue nil + line1 = "%{handle}%{date}%{dept}%{title}%{at}%{link} " + line1 << "(posted by %{author}%{sec})" + make_stream(line1, nil, s, :dept => dept, :sec => sec) + } + @bot.register_filter(:default, @outkey) { |s| + line1 = "%{handle}%{date}%{title}%{at}%{link}" + line1 << " (by %{author})" if s[:author] + make_stream(line1, nil, s) + } + + # Define an HTML info filter too + @bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) } + + # This is the output format used by the input filter + @bot.register_filter(:htmlinfo, @outkey) { |s| + line1 = "%{title}%{at}%{link}" + make_stream(line1, nil, s) + } + end + + FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)} + def htmlinfo_filter(s) + return nil unless s[:headers] and s[:headers]['x-rbot-location'] + return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or + (s[:text].include?(" blob.title, :content => output.join(" | ")} + end + + # Display the known rss types + def rss_types(m, params) + ar = @bot.filter_names(@outkey) + ar.delete(:default) + m.reply ar.map { |k| k.to_s }.sort!.join(", ") + end + attr_reader :feeds def initialize super + + define_filters + if @registry.has_key?(:feeds) # When migrating from Ruby 1.8.5 to 1.8.6, dumped Mutexes may render the # data unrestorable. If this happens, we patch the data, thus allowing @@ -402,8 +534,10 @@ class RSSFeedsPlugin < Plugin "rss who watches #{Bold}handle#{Bold}: lists watches for rss #{Bold}handle#{Bold}" when "rewatch" "rss rewatch : restart threads that watch for changes in watched rss" + when "types" + "rss types : show the rss types for which an output format existi (all other types will use the default one)" else - "manage RSS feeds: rss show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches" + "manage RSS feeds: rss types|show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches" end end @@ -461,13 +595,12 @@ class RSSFeedsPlugin < Plugin fetched = fetchRss(feed, m, false) end return unless fetched or feed.xml - if not fetched and feed.items - m.reply "using old data" - else + if fetched or not feed.items parsed = parseRss(feed, m) - m.reply "using old data" unless parsed end return unless feed.items + m.reply "using old data" unless fetched and parsed + title = feed.title items = feed.items @@ -690,7 +823,7 @@ class RSSFeedsPlugin < Plugin if params and handle = params[:handle] feed = @feeds.fetch(handle.downcase, nil) if feed - @bot.timer.reschedule(@watch[feed.handle], 0) + @bot.timer.reschedule(@watch[feed.handle], (params[:delay] || 0).to_f) m.okay if m else m.reply _("no such feed %{handle}") % { :handle => handle } if m @@ -746,9 +879,8 @@ class RSSFeedsPlugin < Plugin otxt = [] # These are used for checking new items vs old ones - uid_opts = { :show_updated => @bot.config['rss.show_updated'] } oids = Set.new feed.items.map { |item| - uid = RSS.item_uid_for_bot(item, uid_opts) + uid = make_uid item otxt << item.to_s debug [uid, item].inspect debug [uid, otxt.last].inspect @@ -767,7 +899,7 @@ class RSSFeedsPlugin < Plugin # debug feed.xml dispItems = feed.items.reject { |item| - uid = RSS.item_uid_for_bot(item, uid_opts) + uid = make_uid item txt = item.to_s if oids.include?(uid) debug "rejecting old #{uid} #{item.inspect}" @@ -824,15 +956,15 @@ class RSSFeedsPlugin < Plugin end def select_nonempty(*ar) - debug ar - ret = ar.map { |i| (i && i.empty?) ? nil : i }.compact.first - (ret && ret.empty?) ? nil : ret + # debug ar + ar.each { |i| return i unless i.nil_or_empty? } + return nil end def printFormattedRss(feed, item, opts=nil) - debug item + # debug item places = feed.watchers - handle = "::#{feed.handle}:: " + handle = feed.handle.empty? ? "" : "::#{feed.handle}:: " date = String.new if opts places = opts[:places] if opts.key?(:places) @@ -893,7 +1025,7 @@ class RSSFeedsPlugin < Plugin desc = item.content_encoded.ircify_html(desc_opt) elsif item.respond_to?(:description) && item.description desc = item.description.ircify_html(desc_opt) - else + elsif item.respond_to?(:content) && item.content if item.content.type == "html" desc = item.content.content.ircify_html(desc_opt) else @@ -902,59 +1034,36 @@ class RSSFeedsPlugin < Plugin desc = desc.slice(0, desc_opt[:limit]) + "#{Reverse}...#{Reverse}" end end + else + desc = "(?)" end - link = item.link.href rescue item.link.chomp rescue nil + link = item.link.href rescue item.link rescue nil + link.strip! if link category = select_nonempty((item.category.content rescue nil), (item.dc_subject rescue nil)) + category.strip! if category author = select_nonempty((item.author.name.content rescue nil), (item.dc_creator rescue nil), (item.author rescue nil)) + author.strip! if author line1 = nil line2 = nil at = ((item.title && item.link) ? ' @ ' : '') - case feed.type - when 'blog' - author += " " if author - abt = category ? "about #{category} " : "" - line1 = "#{handle}#{date}#{author}blogged #{abt}at #{link}" - line2 = "#{handle}#{title} - #{desc}" - when 'photoblog' - author += " " if author - abt = category ? "under #{category} " : "" - line1 = "#{handle}#{date}#{author}added an image #{abt}at #{link}" - line2 = "#{handle}#{title} - #{desc}" - when 'news' - line1 = "#{handle}#{date}#{title} @ #{link}" - line2 = line2 = "#{handle}#{date}#{desc}" - when 'git' - author += " " if author - line1 = "#{handle}#{date}#{author}commited #{title} @ #{link}" - when 'forum' - line1 = "#{handle}#{date}#{title}#{at}#{link}" - when 'wiki' - line1 = "#{handle}#{date}#{title}#{at}#{link} has been edited by #{author}. #{desc}" - when 'gmane' - line1 = "#{handle}#{date}Message #{title} sent by #{author}. #{desc}" - when 'trac' - line1 = "#{handle}#{date}#{title} @ #{link}" - unless item.title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/ - line2 = "#{handle}#{date}#{desc}" - end - when '/.' - dept = "(from the #{item.slash_department} dept) " rescue nil - sec = " in section #{item.slash_section}" rescue nil + key = @bot.global_filter_name(feed.type, @outkey) + key = @bot.global_filter_name(:default, @outkey) unless @bot.has_filter?(key) + + output = @bot.filter(key, :item => item, :handle => handle, :date => date, + :title => title, :desc => desc, :link => link, + :category => category, :author => author, :at => at) + + return output if places.empty? - line1 = "#{handle}#{date}#{dept}#{title}#{at}#{link} (posted by #{author}#{sec})" - else - line1 = "#{handle}#{date}#{title}#{at}#{link}" - line1 << " (by #{author})" if author - end places.each { |loc| - @bot.say loc, line1, :overlong => :truncate - next unless line2 - @bot.say loc, line2, :overlong => :truncate + output.to_s.each_line { |line| + @bot.say loc, line, :overlong => :truncate + } } end @@ -1015,7 +1124,12 @@ class RSSFeedsPlugin < Plugin end items = [] if rss.nil? - report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m) + if xml.match(/xmlns\s*=\s*(['"])http:\/\/www.w3.org\/2005\/Atom\1/) and not defined?(RSS::Atom) + report_problem("#{feed.handle} @ #{feed.url} looks like an Atom feed, but your Ruby/RSS library doesn't seem to support it. Consider getting the latest version from http://raa.ruby-lang.org/project/rss/", nil, m) + else + report_problem("#{feed.handle} @ #{feed.url} doesn't seem to contain an RSS or Atom feed I can read", nil, m) + end + return nil else begin rss.output_encoding = 'UTF-8' @@ -1024,13 +1138,13 @@ class RSSFeedsPlugin < Plugin return nil end if rss.respond_to? :channel - rss.channel.title ||= "Unknown" + rss.channel.title ||= "(?)" title = rss.channel.title else title = rss.title.content end rss.items.each do |item| - item.title ||= "Unknown" + item.title ||= "(?)" items << item end end @@ -1039,7 +1153,7 @@ class RSSFeedsPlugin < Plugin report_problem("no items found in the feed, maybe try weed?", e, m) return nil end - feed.title = title + feed.title = title.strip feed.items = items return true end @@ -1103,5 +1217,7 @@ plugin.map 'rss unwatch :handle [in :chan]', :action => 'unwatch_rss' plugin.map 'rss rmwatch :handle [in :chan]', :action => 'unwatch_rss' -plugin.map 'rss rewatch [:handle]', +plugin.map 'rss rewatch [:handle] [:delay]', :action => 'rewatch_rss' +plugin.map 'rss types', + :action => 'rss_types'