module ::RSS
- # Make an 'unique' ID for a given item, based on appropriate bot options
- # Currently only suppored is bot.config['rss.show_updated']: when true, the
- # description is included in the uid hashing, otherwise it's not
- #
- def RSS.item_uid_for_bot(item, opts={})
- options = { :show_updated => true}.merge(opts)
- desc = nil
- if options[:show_updated]
- desc = item.content.content rescue item.description rescue nil
- end
- [(item.title.content rescue item.title rescue nil),
- (item.link.href rescue item.link),
- desc].hash
- end
-
# Add support for Slashdot namespace in RDF. The code is just an adaptation
# of the DublinCore code.
unless defined?(SLASH_PREFIX)
:default => true,
:desc => "Whether to display links from the text of a feed item.")
+ # Make an 'unique' ID for a given item, based on appropriate bot options
+ # Currently only suppored is bot.config['rss.show_updated']: when false,
+ # only the guid/link is accounted for.
+
+ def block_rescue(df = nil, &block)
+ v = block.call rescue nil
+ (String === v && '' != v) ? v : nil
+ end
+
+ def make_uid(item)
+ uid = [
+ (block_rescue do item.guid.content end ||
+ block_rescue do item.guid end ||
+ block_rescue do item.link.href end ||
+ block_rescue do item.link end
+ )
+ ]
+ if @bot.config['rss.show_updated']
+ uid.push(
+ block_rescue do item.content.content end ||
+ block_rescue do item.description end
+ )
+ uid.unshift(
+ block_rescue do item.title.content end ||
+ block_rescue do item.title end
+ )
+ end
+ # debug "taking hash of #{uid.inspect}"
+ uid.hash
+ end
+
+
# We used to save the Mutex with the RssBlob, which was idiotic. And
# since Mutexes dumped in one version might not be resotrable in another,
# we need a few tricks to be able to restore data from other versions of Ruby
# Auxiliary method used to collect two lines for rss output filters,
# running substitutions against DataStream _s_ optionally joined
# with hash _h_
- def make_stream(line1, line2, s, h)
- DataStream.new([line1, line2].compact.join("\n") % s.merge(h))
+ def make_stream(line1, line2, s, h={})
+ ss = s.merge(h)
+ DataStream.new([line1, line2].compact.join("\n") % ss, ss)
end
# Define default RSS filters
# TODO: load personal ones
def define_filters
@outkey = :"rss.out"
+ @bot.register_filter(:headlines, @outkey) { |s|
+ line1 = (s[:handle].empty? ? "%{date}" : "%{handle}") << "%{title}"
+ make_stream(line1, nil, s)
+ }
@bot.register_filter(:blog, @outkey) { |s|
author = s[:author] ? (s[:author] + " ") : ""
abt = s[:category] ? "about #{s[:category]} " : ""
make_stream(line1, line2, s, :author => author, :abt => abt)
}
@bot.register_filter(:news, @outkey) { |s|
- line1 = "%{handle}%{date}%{title} @ %{link}" % s
+ line1 = "%{handle}%{date}%{title}%{at}%{link}" % s
line2 = "%{handle}%{date}%{desc}" % s
make_stream(line1, line2, s)
}
@bot.register_filter(:git, @outkey) { |s|
author = s[:author] ? (s[:author] + " ") : ""
- line1 = "%{handle}%{date}%{author}committed %{title} @ %{link}"
+ line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}"
make_stream(line1, nil, s, :author => author)
}
@bot.register_filter(:forum, @outkey) { |s|
}
@bot.register_filter(:trac, @outkey) { |s|
author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author]
- line1 = "%{handle}%{date}%{author}%{title} @ %{link}"
+ line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}"
line2 = nil
unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/
line2 = "%{handle}%{date}%{desc}"
line1 << " (by %{author})" if s[:author]
make_stream(line1, nil, s)
}
+
+ # Define an HTML info filter too
+ @bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) }
+
+ # This is the output format used by the input filter
+ @bot.register_filter(:htmlinfo, @outkey) { |s|
+ line1 = "%{title}%{at}%{link}"
+ make_stream(line1, nil, s)
+ }
+ end
+
+ FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)}
+ def htmlinfo_filter(s)
+ return nil unless s[:headers] and s[:headers]['x-rbot-location']
+ return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or
+ (s[:text].include?("<rdf:RDF") and s[:text].include?("<channel")) or
+ s[:text].include?("<rss") or s[:text].include?("<feed") or
+ s[:text].match(FEED_NS)
+ blob = RssBlob.new(s[:headers]['x-rbot-location'],"", :htmlinfo)
+ unless (fetchRss(blob, nil) and parseRss(blob, nil) rescue nil)
+ debug "#{s.pretty_inspect} is not an RSS feed, despite the appearances"
+ return nil
+ end
+ output = []
+ blob.items.each { |it|
+ output << printFormattedRss(blob, it)[:text]
+ }
+ return {:title => blob.title, :content => output.join(" | ")}
end
# Display the known rss types
}
@feeds = @registry[:feeds]
- raise unless @feeds
+ raise LoadError, "corrupted feed database" unless @feeds
@registry.recovery = nil
fetched = fetchRss(feed, m, false)
end
return unless fetched or feed.xml
- if not fetched and feed.items
- m.reply "using old data"
- else
+ if fetched or not feed.items
parsed = parseRss(feed, m)
- m.reply "using old data" unless parsed
end
return unless feed.items
+ m.reply "using old data" unless fetched and parsed
+
title = feed.title
items = feed.items
if params and handle = params[:handle]
feed = @feeds.fetch(handle.downcase, nil)
if feed
- @bot.timer.reschedule(@watch[feed.handle], 0)
+ @bot.timer.reschedule(@watch[feed.handle], (params[:delay] || 0).to_f)
m.okay if m
else
m.reply _("no such feed %{handle}") % { :handle => handle } if m
otxt = []
# These are used for checking new items vs old ones
- uid_opts = { :show_updated => @bot.config['rss.show_updated'] }
oids = Set.new feed.items.map { |item|
- uid = RSS.item_uid_for_bot(item, uid_opts)
+ uid = make_uid item
otxt << item.to_s
debug [uid, item].inspect
debug [uid, otxt.last].inspect
# debug feed.xml
dispItems = feed.items.reject { |item|
- uid = RSS.item_uid_for_bot(item, uid_opts)
+ uid = make_uid item
txt = item.to_s
if oids.include?(uid)
debug "rejecting old #{uid} #{item.inspect}"
end
def select_nonempty(*ar)
- debug ar
- ret = ar.map { |i| (i && i.empty?) ? nil : i }.compact.first
- (ret && ret.empty?) ? nil : ret
+ # debug ar
+ ar.each { |i| return i unless i.nil_or_empty? }
+ return nil
end
def printFormattedRss(feed, item, opts=nil)
- debug item
+ # debug item
places = feed.watchers
- handle = "::#{feed.handle}:: "
+ handle = feed.handle.empty? ? "" : "::#{feed.handle}:: "
date = String.new
if opts
places = opts[:places] if opts.key?(:places)
desc = item.content_encoded.ircify_html(desc_opt)
elsif item.respond_to?(:description) && item.description
desc = item.description.ircify_html(desc_opt)
- else
+ elsif item.respond_to?(:content) && item.content
if item.content.type == "html"
desc = item.content.content.ircify_html(desc_opt)
else
desc = desc.slice(0, desc_opt[:limit]) + "#{Reverse}...#{Reverse}"
end
end
+ else
+ desc = "(?)"
end
- link = item.link.href rescue item.link.chomp rescue nil
+ link = item.link.href rescue item.link rescue nil
+ link.strip! if link
category = select_nonempty((item.category.content rescue nil), (item.dc_subject rescue nil))
+ category.strip! if category
author = select_nonempty((item.author.name.content rescue nil), (item.dc_creator rescue nil), (item.author rescue nil))
+ author.strip! if author
line1 = nil
line2 = nil
:title => title, :desc => desc, :link => link,
:category => category, :author => author, :at => at)
+ return output if places.empty?
+
places.each { |loc|
output.to_s.each_line { |line|
@bot.say loc, line, :overlong => :truncate
# reassign the 0.9 RDFs to 1.0, and hope it goes right.
xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"",
"xmlns=\"http://purl.org/rss/1.0/\"")
+ # make sure the parser doesn't double-convert in case the feed is not UTF-8
+ xml.sub!(/<\?xml (.*?)\?>/) do |match|
+ if /\bencoding=(['"])(.*?)\1/.match(match)
+ match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"')
+ end
+ match
+ end
feed.mutex.synchronize do
feed.xml = xml
end
end
items = []
if rss.nil?
- report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m)
+ if xml.match(/xmlns\s*=\s*(['"])http:\/\/www.w3.org\/2005\/Atom\1/) and not defined?(RSS::Atom)
+ report_problem("#{feed.handle} @ #{feed.url} looks like an Atom feed, but your Ruby/RSS library doesn't seem to support it. Consider getting the latest version from http://raa.ruby-lang.org/project/rss/", nil, m)
+ else
+ report_problem("#{feed.handle} @ #{feed.url} doesn't seem to contain an RSS or Atom feed I can read", nil, m)
+ end
+ return nil
else
begin
rss.output_encoding = 'UTF-8'
return nil
end
if rss.respond_to? :channel
- rss.channel.title ||= "Unknown"
+ rss.channel.title ||= "(?)"
title = rss.channel.title
else
title = rss.title.content
end
rss.items.each do |item|
- item.title ||= "Unknown"
+ item.title ||= "(?)"
items << item
end
end
report_problem("no items found in the feed, maybe try weed?", e, m)
return nil
end
- feed.title = title
+ feed.title = title.strip
feed.items = items
return true
end
:action => 'unwatch_rss'
plugin.map 'rss rmwatch :handle [in :chan]',
:action => 'unwatch_rss'
-plugin.map 'rss rewatch [:handle]',
+plugin.map 'rss rewatch [:handle] [:delay]',
:action => 'rewatch_rss'
plugin.map 'rss types',
:action => 'rss_types'