require 'rss'
-# Try to load rss/content/2.0 so we can access the data in <content:encoded>
+# Try to load rss/content/2.0 so we can access the data in <content:encoded>
# tags.
begin
require 'rss/content/2.0'
SlashModel::ELEMENTS.collect! {|name| "#{SLASH_PREFIX}_#{name}"}
end
+
+ class Element
+ class << self
+ def def_bang(name, chain)
+ class_eval %<
+ def #{name}!
+ blank2nil { #{chain.join(' rescue ')} rescue nil }
+ end
+ >, *get_file_and_line_from_caller(0)
+ end
+ end
+
+ {
+ :link => %w{link.href link},
+ :guid => %w{guid.content guid},
+ :content => %w{content.content content},
+ :description => %w{description.content description},
+ :title => %w{title.content title},
+ :category => %w{category.content category},
+ :dc_subject => %w{dc_subject},
+ :author => %w{author.name.content author.name author},
+ :dc_creator => %w{dc_creator}
+ }.each { |name, chain| def_bang name, chain }
+
+ def categories!
+ return nil unless self.respond_to? :categories
+ cats = categories.map do |c|
+ blank2nil { c.content rescue c rescue nil }
+ end.compact
+ cats.empty? ? nil : cats
+ end
+
+ protected
+ def blank2nil(&block)
+ x = yield
+ (x && !x.empty?) ? x : nil
+ end
+ end
end
class ::RssBlob
attr_accessor :url, :handle, :type, :refresh_rate, :xml, :title, :items,
- :mutex, :watchers, :last_fetched
+ :mutex, :watchers, :last_fetched, :http_cache, :last_success
def initialize(url,handle=nil,type=nil,watchers=[], xml=nil, lf = nil)
@url = url
@type = type
@watchers=[]
@refresh_rate = nil
+ @http_cache = false
@xml = xml
@title = nil
@items = nil
@mutex = Mutex.new
@last_fetched = lf
+ @last_success = nil
sanitize_watchers(watchers)
end
:default => 300, :validate => Proc.new{|v| v > 30},
:desc => "How many seconds to sleep before checking RSS feeds again")
+ Config.register Config::IntegerValue.new('rss.announce_timeout',
+ :default => 0,
+ :desc => "Don't announce watched feed if these many seconds elapsed since the last successful update")
+
+ Config.register Config::IntegerValue.new('rss.announce_max',
+ :default => 3,
+ :desc => "Maximum number of new items to announce when a watched feed is updated")
+
Config.register Config::BooleanValue.new('rss.show_updated',
:default => true,
:desc => "Whether feed items for which the description was changed should be shown as new")
:default => true,
:desc => "Whether to display links from the text of a feed item.")
+ Config.register Config::EnumValue.new('rss.announce_method',
+ :values => ['say', 'notice'],
+ :default => 'say',
+ :desc => "Whether to display links from the text of a feed item.")
+
# Make an 'unique' ID for a given item, based on appropriate bot options
# Currently only suppored is bot.config['rss.show_updated']: when false,
# only the guid/link is accounted for.
-
- def block_rescue(df = nil, &block)
- v = block.call rescue nil
- (String === v && '' != v) ? v : nil
- end
def make_uid(item)
- uid = [
- (block_rescue do item.guid.content end ||
- block_rescue do item.guid end ||
- block_rescue do item.link.href end ||
- block_rescue do item.link end
- )
- ]
+ uid = [item.guid! || item.link!]
if @bot.config['rss.show_updated']
- uid.push(
- block_rescue do item.content.content end ||
- block_rescue do item.description end
- )
- uid.unshift(
- block_rescue do item.title.content end ||
- block_rescue do item.title end
- )
+ uid.push(item.content! || item.description!)
+ uid.unshift item.title!
end
# debug "taking hash of #{uid.inspect}"
uid.hash
# We used to save the Mutex with the RssBlob, which was idiotic. And
- # since Mutexes dumped in one version might not be resotrable in another,
+ # since Mutexes dumped in one version might not be restorable in another,
# we need a few tricks to be able to restore data from other versions of Ruby
#
# When migrating 1.8.6 => 1.8.5, all we need to do is define an empty
# Auxiliary method used to collect two lines for rss output filters,
# running substitutions against DataStream _s_ optionally joined
- # with hash _h_
+ # with hash _h_.
+ #
+ # For substitutions, *_wrap keys can be used to alter the content of
+ # other nonempty keys. If the value of *_wrap is a String, it will be
+ # put before and after the corresponding key; if it's an Array, the first
+ # and second elements will be used for wrapping; if it's nil, no wrapping
+ # will be done (useful to override a default wrapping).
+ #
+ # For example:
+ # :handle_wrap => '::'::
+ # will wrap s[:handle] by prefixing and postfixing it with '::'
+ # :date_wrap => [nil, ' :: ']::
+ # will put ' :: ' after s[:date]
def make_stream(line1, line2, s, h={})
ss = s.merge(h)
- DataStream.new([line1, line2].compact.join("\n") % ss, ss)
+ subs = {}
+ wraps = {}
+ ss.each do |k, v|
+ kk = k.to_s.chomp!('_wrap')
+ if kk
+ nk = kk.intern
+ case v
+ when String
+ wraps[nk] = ss[nk].wrap_nonempty(v, v)
+ when Array
+ wraps[nk] = ss[nk].wrap_nonempty(*v)
+ when nil
+ # do nothing
+ else
+ warning "ignoring #{v.inspect} wrapping of unknown class"
+ end
+ else
+ subs[k] = v
+ end
+ end
+ subs.merge! wraps
+ DataStream.new([line1, line2].compact.join("\n") % subs, ss)
+ end
+
+ # Auxiliary method used to define rss output filters
+ def rss_type(key, &block)
+ @bot.register_filter(key, @outkey, &block)
end
- # Define default RSS filters
+ # Define default output filters (rss types), and load custom ones.
+ # Custom filters are looked for in the plugin's default filter locations
+ # and in rss/types.rb under botclass.
+ # Preferably, the rss_type method should be used in these files, e.g.:
+ # rss_type :my_type do |s|
+ # line1 = "%{handle} and some %{author} info"
+ # make_stream(line1, nil, s)
+ # end
+ # to define the new type 'my_type'. The keys available in the DataStream
+ # are:
+ # item::
+ # the actual rss item
+ # handle::
+ # the item handle
+ # date::
+ # the item date
+ # title::
+ # the item title
+ # desc, link, category, author::
+ # the item description, link, category, author
+ # at::
+ # the string ' @ ' if the item has both an title and a link
+ # handle_wrap, date_wrap, title_wrap, ...::
+ # these keys can be defined to wrap the corresponding elements if they
+ # are nonempty. By default handle is wrapped with '::', date has a ' ::'
+ # appended and title is enbolden
#
- # TODO: load personal ones
def define_filters
- @outkey = :"rss.out"
- @bot.register_filter(:headlines, @outkey) { |s|
- line1 = "%{handle}%{title}"
- make_stream(line1, nil, s)
- }
- @bot.register_filter(:blog, @outkey) { |s|
- author = s[:author] ? (s[:author] + " ") : ""
- abt = s[:category] ? "about #{s[:category]} " : ""
- line1 = "%{handle}%{date}%{author}blogged %{abt}at %{link}"
- line2 = "%{handle}%{title} - %{desc}"
- make_stream(line1, line2, s, :author => author, :abt => abt)
- }
- @bot.register_filter(:photoblog, @outkey) { |s|
- author = s[:author] ? (s[:author] + " ") : ""
- abt = s[:category] ? "under #{s[:category]} " : ""
- line1 = "%{handle}%{date}%{author}added an image %{abt}at %{link}"
- line2 = "%{handle}%{title} - %{desc}"
- make_stream(line1, line2, s, :author => author, :abt => abt)
- }
- @bot.register_filter(:news, @outkey) { |s|
- line1 = "%{handle}%{date}%{title}%{at}%{link}" % s
- line2 = "%{handle}%{date}%{desc}" % s
- make_stream(line1, line2, s)
- }
- @bot.register_filter(:git, @outkey) { |s|
- author = s[:author] ? (s[:author] + " ") : ""
- line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}"
- make_stream(line1, nil, s, :author => author)
- }
- @bot.register_filter(:forum, @outkey) { |s|
- line1 = "%{handle}%{date}%{title}%{at}%{link}"
- make_stream(line1, nil, s)
- }
- @bot.register_filter(:wiki, @outkey) { |s|
- line1 = "%{handle}%{date}%{title}%{at}%{link}"
- line1 << "has been edited by %{author}. %{desc}"
- make_stream(line1, nil, s)
- }
- @bot.register_filter(:gmane, @outkey) { |s|
- line1 = "%{handle}%{date}Message %{title} sent by %{author}. %{desc}"
- make_stream(line1, nil, s)
- }
- @bot.register_filter(:trac, @outkey) { |s|
- author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author]
- line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}"
- line2 = nil
- unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/
- line2 = "%{handle}%{date}%{desc}"
- end
- make_stream(line1, line2, s, :author => author)
- }
- @bot.register_filter(:"/.", @outkey) { |s|
- dept = "(from the #{s[:item].slash_department} dept) " rescue nil
- sec = " in section #{s[:item].slash_section}" rescue nil
- line1 = "%{handle}%{date}%{dept}%{title}%{at}%{link} "
- line1 << "(posted by %{author}%{sec})"
- make_stream(line1, nil, s, :dept => dept, :sec => sec)
- }
- @bot.register_filter(:default, @outkey) { |s|
- line1 = "%{handle}%{date}%{title}%{at}%{link}"
- line1 << " (by %{author})" if s[:author]
- make_stream(line1, nil, s)
- }
+ @outkey ||= :"rss.out"
- # Define an HTML info filter too
+ # Define an HTML info filter
@bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) }
-
# This is the output format used by the input filter
- @bot.register_filter(:htmlinfo, @outkey) { |s|
+ rss_type :htmlinfo do |s|
line1 = "%{title}%{at}%{link}"
make_stream(line1, nil, s)
- }
+ end
+
+ # the default filter
+ rss_type :default do |s|
+ line1 = "%{handle}%{date}%{title}%{at}%{link}"
+ line1 << " (by %{author})" if s[:author]
+ make_stream(line1, nil, s)
+ end
+
+ @user_types ||= datafile 'types.rb'
+ load_filters
+ load_filters :path => @user_types
end
FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)}
}
@feeds = @registry[:feeds]
- raise unless @feeds
+ raise LoadError, "corrupted feed database" unless @feeds
@registry.recovery = nil
when "rewatch"
"rss rewatch : restart threads that watch for changes in watched rss"
when "types"
- "rss types : show the rss types for which an output format existi (all other types will use the default one)"
+ "rss types : show the rss types for which an output format exist (all other types will use the default one)"
else
"manage RSS feeds: rss types|show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches"
end
parsed = parseRss(feed, m)
end
return unless feed.items
- m.reply "using old data" unless fetched and parsed
+ m.reply "using old data" unless fetched and parsed and parsed > 0
title = feed.title
items = feed.items
m.reply "Channel : #{title}"
disp.each do |item|
- printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true})
+ printFormattedRss(feed, item, {
+ :places => [m.replyto],
+ :handle => nil,
+ :date => true,
+ :announce_method => :say
+ })
end
end
def list_rss(m, params)
wanted = params[:handle]
- reply = String.new
- @feeds.each { |handle, feed|
- next if wanted and !handle.match(/#{wanted}/i)
- reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
- (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
- (reply << " (watched)") if feed.watched_by?(m.replyto)
- reply << "\n"
- }
- if reply.empty?
+ listed = @feeds.keys
+ if wanted
+ wanted_rx = Regexp.new(wanted, true)
+ listed.reject! { |handle| !handle.match(wanted_rx) }
+ end
+ listed.sort!
+ debug listed
+ if @bot.config['send.max_lines'] > 0 and listed.size > @bot.config['send.max_lines']
+ reply = listed.inject([]) do |ar, handle|
+ feed = @feeds[handle]
+ string = handle.dup
+ (string << " (#{feed.type})") if feed.type
+ (string << " (watched)") if feed.watched_by?(m.replyto)
+ ar << string
+ end.join(', ')
+ elsif listed.size > 0
+ reply = listed.inject([]) do |ar, handle|
+ feed = @feeds[handle]
+ string = "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
+ (string << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
+ (string << " (watched)") if feed.watched_by?(m.replyto)
+ ar << string
+ end.join("\n")
+ else
reply = "no feeds found"
reply << " matching #{wanted}" if wanted
end
- m.reply reply, :max_lines => reply.length
+ m.reply reply, :max_lines => 0
end
def watched_rss(m, params)
if params and handle = params[:handle]
feed = @feeds.fetch(handle.downcase, nil)
if feed
+ feed.http_cache = false
@bot.timer.reschedule(@watch[feed.handle], (params[:delay] || 0).to_f)
m.okay if m
else
private
def watchRss(feed, m=nil)
if @watch.has_key?(feed.handle)
- report_problem("watcher thread for #{feed.handle} is already running", nil, m)
+ # report_problem("watcher thread for #{feed.handle} is already running", nil, m)
return
end
status = Hash.new
failures = status[:failures]
begin
debug "fetching #{feed}"
- first_run = !feed.last_fetched
+
+ first_run = !feed.last_success
+ if (@bot.config['rss.announce_timeout'] > 0 &&
+ (Time.now - feed.last_success > @bot.config['rss.announce_timeout']))
+ debug "#{feed} wasn't polled for too long, supressing output"
+ first_run = true
+ end
oldxml = feed.xml ? feed.xml.dup : nil
- unless fetchRss(feed)
+ unless fetchRss(feed, nil, feed.http_cache)
failures += 1
else
+ feed.http_cache = true
if first_run
debug "first run for #{feed}, getting items"
parseRss(feed)
debug "xml for #{feed} didn't change"
failures -= 1 if failures > 0
else
- if not feed.items
- debug "no previous items in feed #{feed}"
- parseRss(feed)
- failures -= 1 if failures > 0
- else
- # This one is used for debugging
- otxt = []
+ # This one is used for debugging
+ otxt = []
+ if feed.items.nil?
+ oids = []
+ else
# These are used for checking new items vs old ones
oids = Set.new feed.items.map { |item|
uid = make_uid item
debug [uid, otxt.last].inspect
uid
}
+ end
- unless parseRss(feed)
- debug "no items in feed #{feed}"
+ nitems = parseRss(feed)
+ if nitems.nil?
failures += 1
+ elsif nitems == 0
+ debug "no items in feed #{feed}"
else
debug "Checking if new items are available for #{feed}"
failures -= 1 if failures > 0
}
if dispItems.length > 0
+ max = @bot.config['rss.announce_max']
debug "Found #{dispItems.length} new items in #{feed}"
+ if max > 0 and dispItems.length > max
+ debug "showing only the latest #{dispItems.length}"
+ feed.watchers.each do |loc|
+ @bot.say loc, (_("feed %{feed} had %{num} updates, showing the latest %{max}") % {
+ :feed => feed.handle,
+ :num => dispItems.length,
+ :max => max
+ })
+ end
+ dispItems.slice!(max..-1)
+ end
# When displaying watched feeds, publish them from older to newer
dispItems.reverse.each { |item|
printFormattedRss(feed, item)
debug "No new items found in #{feed}"
end
end
- end
end
end
rescue Exception => e
return seconds
end
- def select_nonempty(*ar)
- # debug ar
- ar.each { |i| return i unless i.nil_or_empty? }
- return nil
+ def make_date(obj)
+ if obj.kind_of? Time
+ obj.strftime("%Y/%m/%d %H:%M")
+ else
+ obj.to_s
+ end
end
- def printFormattedRss(feed, item, opts=nil)
+ def printFormattedRss(feed, item, options={})
# debug item
- places = feed.watchers
- handle = feed.handle.empty? ? "" : "::#{feed.handle}:: "
- date = String.new
- if opts
- places = opts[:places] if opts.key?(:places)
- handle = opts[:handle].to_s if opts.key?(:handle)
- if opts.key?(:date) && opts[:date]
- if item.respond_to?(:updated)
- if item.updated.content.class <= Time
- date = item.updated.content.strftime("%Y/%m/%d %H:%M")
- else
- date = item.updated.content.to_s
- end
- elsif item.respond_to?(:source) and item.source.respond_to?(:updated)
- if item.source.updated.content.class <= Time
- date = item.source.updated.content.strftime("%Y/%m/%d %H:%M")
- else
- date = item.source.updated.content.to_s
- end
- elsif item.respond_to?(:pubDate)
- if item.pubDate.class <= Time
- date = item.pubDate.strftime("%Y/%m/%d %H:%M")
- else
- date = item.pubDate.to_s
- end
- elsif item.respond_to?(:date)
- if item.date.class <= Time
- date = item.date.strftime("%Y/%m/%d %H:%M")
- else
- date = item.date.to_s
- end
- else
- date = "(no date)"
- end
- date += " :: "
+ opts = {
+ :places => feed.watchers,
+ :handle => feed.handle,
+ :date => false,
+ :announce_method => @bot.config['rss.announce_method']
+ }.merge options
+
+ places = opts[:places]
+ announce_method = opts[:announce_method]
+
+ handle = opts[:handle].to_s
+
+ date = \
+ if opts[:date]
+ if item.respond_to?(:updated)
+ make_date(item.updated.content)
+ elsif item.respond_to?(:source) and item.source.respond_to?(:updated)
+ make_date(item.source.updated.content)
+ elsif item.respond_to?(:pubDate)
+ make_date(item.pubDate)
+ elsif item.respond_to?(:date)
+ make_date(item.date)
+ else
+ "(no date)"
end
+ else
+ String.new
end
tit_opt = {}
# visible in the URL anyway
# TODO make this optional?
base_title.sub!(/^Changeset \[([\da-f]{40})\]:/) { |c| "(git commit)"} if feed.type == 'trac'
- title = "#{Bold}#{base_title.ircify_html(tit_opt)}#{Bold}"
+ title = base_title.ircify_html(tit_opt)
end
desc_opt = {}
desc_opt[:limit] = @bot.config['rss.text_max']
desc_opt[:a_href] = :link_out if @bot.config['rss.show_links']
- # We prefer content_encoded here as it tends to provide more html formatting
+ # We prefer content_encoded here as it tends to provide more html formatting
# for use with ircify_html.
if item.respond_to?(:content_encoded) && item.content_encoded
desc = item.content_encoded.ircify_html(desc_opt)
desc = "(?)"
end
- link = item.link.href rescue item.link rescue nil
+ link = item.link!
link.strip! if link
- category = select_nonempty((item.category.content rescue nil), (item.dc_subject rescue nil))
+ categories = item.categories!
+ category = item.category! || item.dc_subject!
category.strip! if category
- author = select_nonempty((item.author.name.content rescue nil), (item.dc_creator rescue nil), (item.author rescue nil))
+ author = item.dc_creator! || item.author!
author.strip! if author
line1 = nil
key = @bot.global_filter_name(feed.type, @outkey)
key = @bot.global_filter_name(:default, @outkey) unless @bot.has_filter?(key)
- output = @bot.filter(key, :item => item, :handle => handle, :date => date,
- :title => title, :desc => desc, :link => link,
- :category => category, :author => author, :at => at)
+ stream_hash = {
+ :item => item,
+ :handle => handle,
+ :handle_wrap => ['::', ':: '],
+ :date => date,
+ :date_wrap => [nil, ' :: '],
+ :title => title,
+ :title_wrap => Bold,
+ :desc => desc, :link => link,
+ :categories => categories,
+ :category => category, :author => author, :at => at
+ }
+ output = @bot.filter(key, stream_hash)
return output if places.empty?
places.each { |loc|
output.to_s.each_line { |line|
- @bot.say loc, line, :overlong => :truncate
+ @bot.__send__(announce_method, loc, line, :overlong => :truncate)
}
}
end
# reassign the 0.9 RDFs to 1.0, and hope it goes right.
xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"",
"xmlns=\"http://purl.org/rss/1.0/\"")
+ # make sure the parser doesn't double-convert in case the feed is not UTF-8
+ xml.sub!(/<\?xml (.*?)\?>/) do |match|
+ if /\bencoding=(['"])(.*?)\1/.match(match)
+ match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"')
+ end
+ match
+ end
feed.mutex.synchronize do
feed.xml = xml
+ feed.last_success = Time.now
end
return true
end
return nil unless feed.xml
feed.mutex.synchronize do
xml = feed.xml
- begin
- ## do validate parse
- rss = RSS::Parser.parse(xml)
- debug "parsed and validated #{feed}"
- rescue RSS::InvalidRSSError
- ## do non validate parse for invalid RSS 1.0
+ rss = nil
+ errors = []
+ RSS::AVAILABLE_PARSERS.each do |parser|
begin
- rss = RSS::Parser.parse(xml, false)
- debug "parsed but not validated #{feed}"
+ ## do validate parse
+ rss = RSS::Parser.parse(xml, true, true, parser)
+ debug "parsed and validated #{feed} with #{parser}"
+ break
+ rescue RSS::InvalidRSSError
+ begin
+ ## do non validate parse for invalid RSS 1.0
+ rss = RSS::Parser.parse(xml, false, true, parser)
+ debug "parsed but not validated #{feed} with #{parser}"
+ break
+ rescue RSS::Error => e
+ errors << [parser, e, "parsing rss stream failed, whoops =("]
+ end
rescue RSS::Error => e
- report_problem("parsing rss stream failed, whoops =(", e, m)
- return nil
+ errors << [parser, e, "parsing rss stream failed, oioi"]
+ rescue => e
+ errors << [parser, e, "processing error occured, sorry =("]
end
- rescue RSS::Error => e
- report_problem("parsing rss stream failed, oioi", e, m)
- return nil
- rescue => e
- report_problem("processing error occured, sorry =(", e, m)
- return nil
+ end
+ unless errors.empty?
+ debug errors
+ self.send(:report_problem, errors.last[2], errors.last[1], m)
+ return nil unless rss
end
items = []
if rss.nil?
if items.empty?
report_problem("no items found in the feed, maybe try weed?", e, m)
- return nil
+ else
+ feed.title = title.strip
+ feed.items = items
end
- feed.title = title.strip
- feed.items = items
- return true
+ return items.length
end
end
end