require 'rss'
-# Try to load rss/content/2.0 so we can access the data in <content:encoded>
+# Try to load rss/content/2.0 so we can access the data in <content:encoded>
# tags.
begin
require 'rss/content/2.0'
module ::RSS
- # Make an 'unique' ID for a given item, based on appropriate bot options
- # Currently only suppored is bot.config['rss.show_updated']: when true, the
- # description is included in the uid hashing, otherwise it's not
- #
- def RSS.item_uid_for_bot(item, opts={})
- options = { :show_updated => true}.merge(opts)
- desc = nil
- if options[:show_updated]
- desc = item.content.content rescue item.description rescue nil
- end
- [(item.title.content rescue item.title rescue nil),
- (item.link.href rescue item.link),
- desc].hash
- end
-
# Add support for Slashdot namespace in RDF. The code is just an adaptation
# of the DublinCore code.
unless defined?(SLASH_PREFIX)
SlashModel::ELEMENTS.collect! {|name| "#{SLASH_PREFIX}_#{name}"}
end
+
+ class Element
+ class << self
+ def def_bang(name, chain)
+ class_eval %<
+ def #{name}!
+ blank2nil { #{chain.join(' rescue ')} rescue nil }
+ end
+ >, *get_file_and_line_from_caller(0)
+ end
+ end
+
+ {
+ :link => %w{link.href link},
+ :guid => %w{guid.content guid},
+ :content => %w{content.content content},
+ :description => %w{description.content description},
+ :title => %w{title.content title},
+ :category => %w{category.content category},
+ :dc_subject => %w{dc_subject},
+ :author => %w{author.name.content author.name author},
+ :dc_creator => %w{dc_creator}
+ }.each { |name, chain| def_bang name, chain }
+
+ protected
+ def blank2nil(&block)
+ x = yield
+ (x && !x.empty?) ? x : nil
+ end
+ end
end
class ::RssBlob
attr_accessor :url, :handle, :type, :refresh_rate, :xml, :title, :items,
- :mutex, :watchers, :last_fetched
+ :mutex, :watchers, :last_fetched, :http_cache, :last_success
def initialize(url,handle=nil,type=nil,watchers=[], xml=nil, lf = nil)
@url = url
@type = type
@watchers=[]
@refresh_rate = nil
+ @http_cache = false
@xml = xml
@title = nil
@items = nil
@mutex = Mutex.new
@last_fetched = lf
+ @last_success = nil
sanitize_watchers(watchers)
end
:default => 300, :validate => Proc.new{|v| v > 30},
:desc => "How many seconds to sleep before checking RSS feeds again")
+ Config.register Config::IntegerValue.new('rss.announce_timeout',
+ :default => 0,
+ :desc => "Don't announce watched feed if these many seconds elapsed since the last successful update")
+
+ Config.register Config::IntegerValue.new('rss.announce_max',
+ :default => 3,
+ :desc => "Maximum number of new items to announce when a watched feed is updated")
+
Config.register Config::BooleanValue.new('rss.show_updated',
:default => true,
:desc => "Whether feed items for which the description was changed should be shown as new")
:default => true,
:desc => "Whether to display links from the text of a feed item.")
+ # Make an 'unique' ID for a given item, based on appropriate bot options
+ # Currently only suppored is bot.config['rss.show_updated']: when false,
+ # only the guid/link is accounted for.
+
+ def make_uid(item)
+ uid = [item.guid! || item.link!]
+ if @bot.config['rss.show_updated']
+ uid.push(item.content! || item.description!)
+ uid.unshift item.title!
+ end
+ # debug "taking hash of #{uid.inspect}"
+ uid.hash
+ end
+
+
# We used to save the Mutex with the RssBlob, which was idiotic. And
- # since Mutexes dumped in one version might not be resotrable in another,
+ # since Mutexes dumped in one version might not be restorable in another,
# we need a few tricks to be able to restore data from other versions of Ruby
#
# When migrating 1.8.6 => 1.8.5, all we need to do is define an empty
# Auxiliary method used to collect two lines for rss output filters,
# running substitutions against DataStream _s_ optionally joined
# with hash _h_
- def make_stream(line1, line2, s, h)
- DataStream.new([line1, line2].compact.join("\n") % s.merge(h))
+ def make_stream(line1, line2, s, h={})
+ ss = s.merge(h)
+ DataStream.new([line1, line2].compact.join("\n") % ss, ss)
end
# Define default RSS filters
# TODO: load personal ones
def define_filters
@outkey = :"rss.out"
+ @bot.register_filter(:headlines, @outkey) { |s|
+ line1 = (s[:handle].empty? ? "%{date}" : "%{handle}") << "%{title}"
+ make_stream(line1, nil, s)
+ }
@bot.register_filter(:blog, @outkey) { |s|
author = s[:author] ? (s[:author] + " ") : ""
abt = s[:category] ? "about #{s[:category]} " : ""
make_stream(line1, line2, s, :author => author, :abt => abt)
}
@bot.register_filter(:news, @outkey) { |s|
- line1 = "%{handle}%{date}%{title} @ %{link}" % s
+ line1 = "%{handle}%{date}%{title}%{at}%{link}" % s
line2 = "%{handle}%{date}%{desc}" % s
make_stream(line1, line2, s)
}
@bot.register_filter(:git, @outkey) { |s|
- author = s[:author] ? (s[:author] + " ") : ""
- line1 = "%{handle}%{date}%{author}committed %{title} @ %{link}"
+ author = s[:author].sub(/@\S+?\s*>/, "@...>") + " " if s[:author]
+ line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}"
make_stream(line1, nil, s, :author => author)
}
@bot.register_filter(:forum, @outkey) { |s|
}
@bot.register_filter(:trac, @outkey) { |s|
author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author]
- line1 = "%{handle}%{date}%{author}%{title} @ %{link}"
+ line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}"
line2 = nil
unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/
line2 = "%{handle}%{date}%{desc}"
line1 << " (by %{author})" if s[:author]
make_stream(line1, nil, s)
}
+
+ # Define an HTML info filter too
+ @bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) }
+
+ # This is the output format used by the input filter
+ @bot.register_filter(:htmlinfo, @outkey) { |s|
+ line1 = "%{title}%{at}%{link}"
+ make_stream(line1, nil, s)
+ }
+ end
+
+ FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)}
+ def htmlinfo_filter(s)
+ return nil unless s[:headers] and s[:headers]['x-rbot-location']
+ return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or
+ (s[:text].include?("<rdf:RDF") and s[:text].include?("<channel")) or
+ s[:text].include?("<rss") or s[:text].include?("<feed") or
+ s[:text].match(FEED_NS)
+ blob = RssBlob.new(s[:headers]['x-rbot-location'],"", :htmlinfo)
+ unless (fetchRss(blob, nil) and parseRss(blob, nil) rescue nil)
+ debug "#{s.pretty_inspect} is not an RSS feed, despite the appearances"
+ return nil
+ end
+ output = []
+ blob.items.each { |it|
+ output << printFormattedRss(blob, it)[:text]
+ }
+ return {:title => blob.title, :content => output.join(" | ")}
+ end
+
+ # Display the known rss types
+ def rss_types(m, params)
+ ar = @bot.filter_names(@outkey)
+ ar.delete(:default)
+ m.reply ar.map { |k| k.to_s }.sort!.join(", ")
end
attr_reader :feeds
}
@feeds = @registry[:feeds]
- raise unless @feeds
+ raise LoadError, "corrupted feed database" unless @feeds
@registry.recovery = nil
"rss who watches #{Bold}handle#{Bold}: lists watches for rss #{Bold}handle#{Bold}"
when "rewatch"
"rss rewatch : restart threads that watch for changes in watched rss"
+ when "types"
+ "rss types : show the rss types for which an output format exist (all other types will use the default one)"
else
- "manage RSS feeds: rss show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches"
+ "manage RSS feeds: rss types|show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches"
end
end
fetched = fetchRss(feed, m, false)
end
return unless fetched or feed.xml
- if not fetched and feed.items
- m.reply "using old data"
- else
+ if fetched or not feed.items
parsed = parseRss(feed, m)
- m.reply "using old data" unless parsed
end
return unless feed.items
+ m.reply "using old data" unless fetched and parsed and parsed > 0
+
title = feed.title
items = feed.items
def list_rss(m, params)
wanted = params[:handle]
- reply = String.new
- @feeds.each { |handle, feed|
- next if wanted and !handle.match(/#{wanted}/i)
- reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
- (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
- (reply << " (watched)") if feed.watched_by?(m.replyto)
- reply << "\n"
- }
- if reply.empty?
+ listed = @feeds.keys
+ if wanted
+ wanted_rx = Regexp.new(wanted, true)
+ listed.reject! { |handle| !handle.match(wanted_rx) }
+ end
+ listed.sort!
+ debug listed
+ if @bot.config['send.max_lines'] > 0 and listed.size > @bot.config['send.max_lines']
+ reply = listed.inject([]) do |ar, handle|
+ feed = @feeds[handle]
+ string = handle.dup
+ (string << " (#{feed.type})") if feed.type
+ (string << " (watched)") if feed.watched_by?(m.replyto)
+ ar << string
+ end.join(', ')
+ elsif listed.size > 0
+ reply = listed.inject([]) do |ar, handle|
+ feed = @feeds[handle]
+ string = "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
+ (string << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
+ (string << " (watched)") if feed.watched_by?(m.replyto)
+ ar << string
+ end.join("\n")
+ else
reply = "no feeds found"
reply << " matching #{wanted}" if wanted
end
- m.reply reply, :max_lines => reply.length
+ m.reply reply, :max_lines => 0
end
def watched_rss(m, params)
if params and handle = params[:handle]
feed = @feeds.fetch(handle.downcase, nil)
if feed
- @bot.timer.reschedule(@watch[feed.handle], 0)
+ feed.http_cache = false
+ @bot.timer.reschedule(@watch[feed.handle], (params[:delay] || 0).to_f)
m.okay if m
else
m.reply _("no such feed %{handle}") % { :handle => handle } if m
private
def watchRss(feed, m=nil)
if @watch.has_key?(feed.handle)
- report_problem("watcher thread for #{feed.handle} is already running", nil, m)
+ # report_problem("watcher thread for #{feed.handle} is already running", nil, m)
return
end
status = Hash.new
failures = status[:failures]
begin
debug "fetching #{feed}"
- first_run = !feed.last_fetched
+
+ first_run = !feed.last_success
+ if (@bot.config['rss.announce_timeout'] > 0 &&
+ (Time.now - feed.last_success > @bot.config['rss.announce_timeout']))
+ debug "#{feed} wasn't polled for too long, supressing output"
+ first_run = true
+ end
oldxml = feed.xml ? feed.xml.dup : nil
- unless fetchRss(feed)
+ unless fetchRss(feed, nil, feed.http_cache)
failures += 1
else
+ feed.http_cache = true
if first_run
debug "first run for #{feed}, getting items"
parseRss(feed)
debug "xml for #{feed} didn't change"
failures -= 1 if failures > 0
else
- if not feed.items
- debug "no previous items in feed #{feed}"
- parseRss(feed)
- failures -= 1 if failures > 0
- else
- # This one is used for debugging
- otxt = []
+ # This one is used for debugging
+ otxt = []
+ if feed.items.nil?
+ oids = []
+ else
# These are used for checking new items vs old ones
- uid_opts = { :show_updated => @bot.config['rss.show_updated'] }
oids = Set.new feed.items.map { |item|
- uid = RSS.item_uid_for_bot(item, uid_opts)
+ uid = make_uid item
otxt << item.to_s
debug [uid, item].inspect
debug [uid, otxt.last].inspect
uid
}
+ end
- unless parseRss(feed)
- debug "no items in feed #{feed}"
+ nitems = parseRss(feed)
+ if nitems.nil?
failures += 1
+ elsif nitems == 0
+ debug "no items in feed #{feed}"
else
debug "Checking if new items are available for #{feed}"
failures -= 1 if failures > 0
# debug feed.xml
dispItems = feed.items.reject { |item|
- uid = RSS.item_uid_for_bot(item, uid_opts)
+ uid = make_uid item
txt = item.to_s
if oids.include?(uid)
debug "rejecting old #{uid} #{item.inspect}"
}
if dispItems.length > 0
+ max = @bot.config['rss.announce_max']
debug "Found #{dispItems.length} new items in #{feed}"
+ if max > 0 and dispItems.length > max
+ debug "showing only the latest #{dispItems.length}"
+ feed.watchers.each do |loc|
+ @bot.say loc, (_("feed %{feed} had %{num} updates, showing the latest %{max}") % {
+ :feed => feed.handle,
+ :num => dispItems.length,
+ :max => max
+ })
+ end
+ dispItems.slice!(max..-1)
+ end
# When displaying watched feeds, publish them from older to newer
dispItems.reverse.each { |item|
printFormattedRss(feed, item)
debug "No new items found in #{feed}"
end
end
- end
end
end
rescue Exception => e
return seconds
end
- def select_nonempty(*ar)
- debug ar
- ret = ar.map { |i| (i && i.empty?) ? nil : i }.compact.first
- (ret && ret.empty?) ? nil : ret
- end
-
def printFormattedRss(feed, item, opts=nil)
- debug item
+ # debug item
places = feed.watchers
- handle = "::#{feed.handle}:: "
+ handle = feed.handle.empty? ? "" : "::#{feed.handle}:: "
date = String.new
if opts
places = opts[:places] if opts.key?(:places)
else
date = item.source.updated.content.to_s
end
- elsif item.respond_to?(:pubDate)
+ elsif item.respond_to?(:pubDate)
if item.pubDate.class <= Time
date = item.pubDate.strftime("%Y/%m/%d %H:%M")
else
desc_opt[:limit] = @bot.config['rss.text_max']
desc_opt[:a_href] = :link_out if @bot.config['rss.show_links']
- # We prefer content_encoded here as it tends to provide more html formatting
+ # We prefer content_encoded here as it tends to provide more html formatting
# for use with ircify_html.
if item.respond_to?(:content_encoded) && item.content_encoded
desc = item.content_encoded.ircify_html(desc_opt)
elsif item.respond_to?(:description) && item.description
desc = item.description.ircify_html(desc_opt)
- else
+ elsif item.respond_to?(:content) && item.content
if item.content.type == "html"
desc = item.content.content.ircify_html(desc_opt)
else
desc = desc.slice(0, desc_opt[:limit]) + "#{Reverse}...#{Reverse}"
end
end
+ else
+ desc = "(?)"
end
- link = item.link.href rescue item.link.chomp rescue nil
+ link = item.link!
+ link.strip! if link
- category = select_nonempty((item.category.content rescue nil), (item.dc_subject rescue nil))
- author = select_nonempty((item.author.name.content rescue nil), (item.dc_creator rescue nil), (item.author rescue nil))
+ category = item.category! || item.dc_subject!
+ category.strip! if category
+ author = item.dc_creator! || item.author!
+ author.strip! if author
line1 = nil
line2 = nil
:title => title, :desc => desc, :link => link,
:category => category, :author => author, :at => at)
+ return output if places.empty?
+
places.each { |loc|
output.to_s.each_line { |line|
@bot.say loc, line, :overlong => :truncate
# reassign the 0.9 RDFs to 1.0, and hope it goes right.
xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"",
"xmlns=\"http://purl.org/rss/1.0/\"")
+ # make sure the parser doesn't double-convert in case the feed is not UTF-8
+ xml.sub!(/<\?xml (.*?)\?>/) do |match|
+ if /\bencoding=(['"])(.*?)\1/.match(match)
+ match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"')
+ end
+ match
+ end
feed.mutex.synchronize do
feed.xml = xml
+ feed.last_success = Time.now
end
return true
end
return nil unless feed.xml
feed.mutex.synchronize do
xml = feed.xml
- begin
- ## do validate parse
- rss = RSS::Parser.parse(xml)
- debug "parsed and validated #{feed}"
- rescue RSS::InvalidRSSError
- ## do non validate parse for invalid RSS 1.0
+ rss = nil
+ errors = []
+ RSS::AVAILABLE_PARSERS.each do |parser|
begin
- rss = RSS::Parser.parse(xml, false)
- debug "parsed but not validated #{feed}"
+ ## do validate parse
+ rss = RSS::Parser.parse(xml, true, true, parser)
+ debug "parsed and validated #{feed} with #{parser}"
+ break
+ rescue RSS::InvalidRSSError
+ begin
+ ## do non validate parse for invalid RSS 1.0
+ rss = RSS::Parser.parse(xml, false, true, parser)
+ debug "parsed but not validated #{feed} with #{parser}"
+ break
+ rescue RSS::Error => e
+ errors << [parser, e, "parsing rss stream failed, whoops =("]
+ end
rescue RSS::Error => e
- report_problem("parsing rss stream failed, whoops =(", e, m)
- return nil
+ errors << [parser, e, "parsing rss stream failed, oioi"]
+ rescue => e
+ errors << [parser, e, "processing error occured, sorry =("]
end
- rescue RSS::Error => e
- report_problem("parsing rss stream failed, oioi", e, m)
- return nil
- rescue => e
- report_problem("processing error occured, sorry =(", e, m)
- return nil
+ end
+ unless errors.empty?
+ debug errors
+ self.send(:report_problem, errors.last[2], errors.last[1], m)
+ return nil unless rss
end
items = []
if rss.nil?
- report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m)
+ if xml.match(/xmlns\s*=\s*(['"])http:\/\/www.w3.org\/2005\/Atom\1/) and not defined?(RSS::Atom)
+ report_problem("#{feed.handle} @ #{feed.url} looks like an Atom feed, but your Ruby/RSS library doesn't seem to support it. Consider getting the latest version from http://raa.ruby-lang.org/project/rss/", nil, m)
+ else
+ report_problem("#{feed.handle} @ #{feed.url} doesn't seem to contain an RSS or Atom feed I can read", nil, m)
+ end
+ return nil
else
begin
rss.output_encoding = 'UTF-8'
return nil
end
if rss.respond_to? :channel
- rss.channel.title ||= "Unknown"
+ rss.channel.title ||= "(?)"
title = rss.channel.title
else
title = rss.title.content
end
rss.items.each do |item|
- item.title ||= "Unknown"
+ item.title ||= "(?)"
items << item
end
end
if items.empty?
report_problem("no items found in the feed, maybe try weed?", e, m)
- return nil
+ else
+ feed.title = title.strip
+ feed.items = items
end
- feed.title = title
- feed.items = items
- return true
+ return items.length
end
end
end
:action => 'unwatch_rss'
plugin.map 'rss rmwatch :handle [in :chan]',
:action => 'unwatch_rss'
-plugin.map 'rss rewatch [:handle]',
+plugin.map 'rss rewatch [:handle] [:delay]',
:action => 'rewatch_rss'
+plugin.map 'rss types',
+ :action => 'rss_types'