4 # :title: RSS feed plugin for rbot
6 # Author:: Stanislav Karchebny <berkus@madfire.net>
7 # Author:: Ian Monroe <ian@monroe.nu>
8 # Author:: Mark Kretschmann <markey@web.de>
9 # Author:: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
11 # Copyright:: (C) 2004 Stanislav Karchebny
12 # Copyright:: (C) 2005 Ian Monroe, Mark Kretschmann
13 # Copyright:: (C) 2006-2007 Giuseppe Bilotta
15 # License:: MIT license
19 # Try to load rss/content/2.0 so we can access the data in <content:encoded>
22 require 'rss/content/2.0'
28 # Add support for Slashdot namespace in RDF. The code is just an adaptation
29 # of the DublinCore code.
30 unless defined?(SLASH_PREFIX)
31 SLASH_PREFIX = 'slash'
32 SLASH_URI = "http://purl.org/rss/1.0/modules/slash/"
34 RDF.install_ns(SLASH_PREFIX, SLASH_URI)
37 def append_features(klass)
40 return if klass.instance_of?(Module)
41 SlashModel::ELEMENT_NAME_INFOS.each do |name, plural_name|
42 plural = plural_name || "#{name}s"
43 full_name = "#{SLASH_PREFIX}_#{name}"
44 full_plural_name = "#{SLASH_PREFIX}_#{plural}"
45 klass_name = "Slash#{Utils.to_class_name(name)}"
47 # This will fail with older version of the Ruby RSS module
49 klass.install_have_children_element(name, SLASH_URI, "*",
50 full_name, full_plural_name)
51 klass.install_must_call_validator(SLASH_PREFIX, SLASH_URI)
53 klass.module_eval("install_have_children_element(#{full_name.dump}, #{full_plural_name.dump})")
56 klass.module_eval(<<-EOC, *get_file_and_line_from_caller(0))
57 remove_method :#{full_name} if method_defined? :#{full_name}
58 remove_method :#{full_name}= if method_defined? :#{full_name}=
59 remove_method :set_#{full_name} if method_defined? :set_#{full_name}
62 @#{full_name}.first and @#{full_name}.first.value
65 def #{full_name}=(new_value)
66 @#{full_name}[0] = Utils.new_with_value_if_need(#{klass_name}, new_value)
68 alias set_#{full_name} #{full_name}=
85 ELEMENT_NAME_INFOS = SlashModel::TEXT_ELEMENTS.to_a
87 ELEMENTS = TEXT_ELEMENTS.keys
89 ELEMENTS.each do |name, plural_name|
90 module_eval(<<-EOC, *get_file_and_line_from_caller(0))
91 class Slash#{Utils.to_class_name(name)} < Element
106 @tag_name = #{name.dump}
108 alias_method(:value, :content)
109 alias_method(:value=, :content=)
111 def initialize(*args)
113 if Utils.element_initialize_arguments?(args)
117 self.content = args[0]
119 # Older Ruby RSS module
122 self.content = args[0]
127 tag_name_with_prefix(SLASH_PREFIX)
130 def maker_target(target)
134 def setup_maker_attributes(#{name})
135 #{name}.content = content
143 class Item; include SlashModel; end
146 SlashModel::ELEMENTS.each do |name|
147 class_name = Utils.to_class_name(name)
148 BaseListener.install_class_name(SLASH_URI, name, "Slash#{class_name}")
151 SlashModel::ELEMENTS.collect! {|name| "#{SLASH_PREFIX}_#{name}"}
156 def def_bang(name, chain)
159 blank2nil { #{chain.join(' rescue ')} rescue nil }
161 >, *get_file_and_line_from_caller(0)
166 :link => %w{link.href link},
167 :guid => %w{guid.content guid},
168 :content => %w{content.content content},
169 :description => %w{description.content description},
170 :title => %w{title.content title},
171 :category => %w{category.content category},
172 :dc_subject => %w{dc_subject},
173 :author => %w{author.name.content author.name author},
174 :dc_creator => %w{dc_creator}
175 }.each { |name, chain| def_bang name, chain }
178 def blank2nil(&block)
180 (x && !x.empty?) ? x : nil
187 attr_accessor :url, :handle, :type, :refresh_rate, :xml, :title, :items,
188 :mutex, :watchers, :last_fetched
190 def initialize(url,handle=nil,type=nil,watchers=[], xml=nil, lf = nil)
205 sanitize_watchers(watchers)
209 @mutex.synchronize do
212 @type ? @type.dup : nil,
214 @xml ? @xml.dup : nil,
219 # Downcase all watchers, possibly turning them into Strings if they weren't
220 def sanitize_watchers(list=@watchers)
233 @watchers.include?(who.downcase)
240 @mutex.synchronize do
241 @watchers << who.downcase
247 @mutex.synchronize do
248 @watchers.delete(who.downcase)
253 [@handle,@url,@type,@refresh_rate,@watchers]
256 def to_s(watchers=false)
258 a = self.to_a.flatten
262 a.compact.join(" | ")
266 class RSSFeedsPlugin < Plugin
267 Config.register Config::IntegerValue.new('rss.head_max',
268 :default => 100, :validate => Proc.new{|v| v > 0 && v < 200},
269 :desc => "How many characters to use of a RSS item header")
271 Config.register Config::IntegerValue.new('rss.text_max',
272 :default => 200, :validate => Proc.new{|v| v > 0 && v < 400},
273 :desc => "How many characters to use of a RSS item text")
275 Config.register Config::IntegerValue.new('rss.thread_sleep',
276 :default => 300, :validate => Proc.new{|v| v > 30},
277 :desc => "How many seconds to sleep before checking RSS feeds again")
279 Config.register Config::BooleanValue.new('rss.show_updated',
281 :desc => "Whether feed items for which the description was changed should be shown as new")
283 Config.register Config::BooleanValue.new('rss.show_links',
285 :desc => "Whether to display links from the text of a feed item.")
287 # Make an 'unique' ID for a given item, based on appropriate bot options
288 # Currently only suppored is bot.config['rss.show_updated']: when false,
289 # only the guid/link is accounted for.
292 uid = [item.guid! || item.link!]
293 if @bot.config['rss.show_updated']
294 uid.push(item.content! || item.description!)
295 uid.unshift item.title!
297 # debug "taking hash of #{uid.inspect}"
302 # We used to save the Mutex with the RssBlob, which was idiotic. And
303 # since Mutexes dumped in one version might not be restorable in another,
304 # we need a few tricks to be able to restore data from other versions of Ruby
306 # When migrating 1.8.6 => 1.8.5, all we need to do is define an empty
307 # #marshal_load() method for Mutex. For 1.8.5 => 1.8.6 we need something
308 # dirtier, as seen later on in the initialization code.
309 unless Mutex.new.respond_to?(:marshal_load)
311 def marshal_load(str)
317 # Auxiliary method used to collect two lines for rss output filters,
318 # running substitutions against DataStream _s_ optionally joined
320 def make_stream(line1, line2, s, h={})
322 DataStream.new([line1, line2].compact.join("\n") % ss, ss)
325 # Define default RSS filters
327 # TODO: load personal ones
330 @bot.register_filter(:headlines, @outkey) { |s|
331 line1 = (s[:handle].empty? ? "%{date}" : "%{handle}") << "%{title}"
332 make_stream(line1, nil, s)
334 @bot.register_filter(:blog, @outkey) { |s|
335 author = s[:author] ? (s[:author] + " ") : ""
336 abt = s[:category] ? "about #{s[:category]} " : ""
337 line1 = "%{handle}%{date}%{author}blogged %{abt}at %{link}"
338 line2 = "%{handle}%{title} - %{desc}"
339 make_stream(line1, line2, s, :author => author, :abt => abt)
341 @bot.register_filter(:photoblog, @outkey) { |s|
342 author = s[:author] ? (s[:author] + " ") : ""
343 abt = s[:category] ? "under #{s[:category]} " : ""
344 line1 = "%{handle}%{date}%{author}added an image %{abt}at %{link}"
345 line2 = "%{handle}%{title} - %{desc}"
346 make_stream(line1, line2, s, :author => author, :abt => abt)
348 @bot.register_filter(:news, @outkey) { |s|
349 line1 = "%{handle}%{date}%{title}%{at}%{link}" % s
350 line2 = "%{handle}%{date}%{desc}" % s
351 make_stream(line1, line2, s)
353 @bot.register_filter(:git, @outkey) { |s|
354 author = s[:author].sub(/@\S+?\s*>/, "@...>") + " " if s[:author]
355 line1 = "%{handle}%{date}%{author}committed %{title}%{at}%{link}"
356 make_stream(line1, nil, s, :author => author)
358 @bot.register_filter(:forum, @outkey) { |s|
359 line1 = "%{handle}%{date}%{title}%{at}%{link}"
360 make_stream(line1, nil, s)
362 @bot.register_filter(:wiki, @outkey) { |s|
363 line1 = "%{handle}%{date}%{title}%{at}%{link}"
364 line1 << "has been edited by %{author}. %{desc}"
365 make_stream(line1, nil, s)
367 @bot.register_filter(:gmane, @outkey) { |s|
368 line1 = "%{handle}%{date}Message %{title} sent by %{author}. %{desc}"
369 make_stream(line1, nil, s)
371 @bot.register_filter(:trac, @outkey) { |s|
372 author = s[:author].sub(/@\S+?\s*>/, "@...>") + ": " if s[:author]
373 line1 = "%{handle}%{date}%{author}%{title}%{at}%{link}"
375 unless s[:item].title =~ /^(?:Changeset \[(?:[\da-f]+)\]|\(git commit\))/
376 line2 = "%{handle}%{date}%{desc}"
378 make_stream(line1, line2, s, :author => author)
380 @bot.register_filter(:"/.", @outkey) { |s|
381 dept = "(from the #{s[:item].slash_department} dept) " rescue nil
382 sec = " in section #{s[:item].slash_section}" rescue nil
383 line1 = "%{handle}%{date}%{dept}%{title}%{at}%{link} "
384 line1 << "(posted by %{author}%{sec})"
385 make_stream(line1, nil, s, :dept => dept, :sec => sec)
387 @bot.register_filter(:default, @outkey) { |s|
388 line1 = "%{handle}%{date}%{title}%{at}%{link}"
389 line1 << " (by %{author})" if s[:author]
390 make_stream(line1, nil, s)
393 # Define an HTML info filter too
394 @bot.register_filter(:rss, :htmlinfo) { |s| htmlinfo_filter(s) }
396 # This is the output format used by the input filter
397 @bot.register_filter(:htmlinfo, @outkey) { |s|
398 line1 = "%{title}%{at}%{link}"
399 make_stream(line1, nil, s)
403 FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/1999/02/22-rdf)}
404 def htmlinfo_filter(s)
405 return nil unless s[:headers] and s[:headers]['x-rbot-location']
406 return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or
407 (s[:text].include?("<rdf:RDF") and s[:text].include?("<channel")) or
408 s[:text].include?("<rss") or s[:text].include?("<feed") or
409 s[:text].match(FEED_NS)
410 blob = RssBlob.new(s[:headers]['x-rbot-location'],"", :htmlinfo)
411 unless (fetchRss(blob, nil) and parseRss(blob, nil) rescue nil)
412 debug "#{s.pretty_inspect} is not an RSS feed, despite the appearances"
416 blob.items.each { |it|
417 output << printFormattedRss(blob, it)[:text]
419 return {:title => blob.title, :content => output.join(" | ")}
422 # Display the known rss types
423 def rss_types(m, params)
424 ar = @bot.filter_names(@outkey)
426 m.reply ar.map { |k| k.to_s }.sort!.join(", ")
436 if @registry.has_key?(:feeds)
437 # When migrating from Ruby 1.8.5 to 1.8.6, dumped Mutexes may render the
438 # data unrestorable. If this happens, we patch the data, thus allowing
439 # the restore to work.
441 # This is actually pretty safe for a number of reasons:
442 # * the code is only called if standard marshalling fails
443 # * the string we look for is quite unlikely to appear randomly
444 # * if the string appears somewhere and the patched string isn't recoverable
445 # either, we'll get another (unrecoverable) error, which makes the rss
446 # plugin unsable, just like it was if no recovery was attempted
447 # * if the string appears somewhere and the patched string is recoverable,
448 # we may get a b0rked feed, which is eventually overwritten by a clean
449 # one, so the worst thing that can happen is that a feed update spams
451 @registry.recovery = Proc.new { |val|
452 patched = val.sub(":\v@mutexo:\nMutex", ":\v@mutexo:\vObject")
453 ret = Marshal.restore(patched)
454 ret.each_value { |blob|
460 @feeds = @registry[:feeds]
461 raise LoadError, "corrupted feed database" unless @feeds
463 @registry.recovery = nil
465 @feeds.keys.grep(/[A-Z]/) { |k|
466 @feeds[k.downcase] = @feeds[k]
486 @feeds.select { |h, f| f.watched? }
495 unparsed = Hash.new()
498 # we don't want to save the mutex
499 unparsed[k].mutex = nil
501 @registry[:feeds] = unparsed
504 def stop_watch(handle)
505 if @watch.has_key?(handle)
507 debug "Stopping watch #{handle}"
508 @bot.timer.remove(@watch[handle])
509 @watch.delete(handle)
510 rescue Exception => e
511 report_problem("Failed to stop watch for #{handle}", e, nil)
517 @watch.each_key { |k|
522 def help(plugin,topic="")
525 "rss show #{Bold}handle#{Bold} [#{Bold}limit#{Bold}] : show #{Bold}limit#{Bold} (default: 5, max: 15) entries from rss #{Bold}handle#{Bold}; #{Bold}limit#{Bold} can also be in the form a..b, to display a specific range of items"
527 "rss list [#{Bold}handle#{Bold}] : list all rss feeds (matching #{Bold}handle#{Bold})"
529 "rss watched [#{Bold}handle#{Bold}] [in #{Bold}chan#{Bold}]: list all watched rss feeds (matching #{Bold}handle#{Bold}) (in channel #{Bold}chan#{Bold})"
530 when "who", "watches", "who watches"
531 "rss who watches [#{Bold}handle#{Bold}]]: list all watchers for rss feeds (matching #{Bold}handle#{Bold})"
533 "rss add #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : add a new rss called #{Bold}handle#{Bold} from url #{Bold}url#{Bold} (of type #{Bold}type#{Bold})"
535 "rss change #{Bold}what#{Bold} of #{Bold}handle#{Bold} to #{Bold}new#{Bold} : change the #{Underline}handle#{Underline}, #{Underline}url#{Underline}, #{Underline}type#{Underline} or #{Underline}refresh#{Underline} rate of rss called #{Bold}handle#{Bold} to value #{Bold}new#{Bold}"
536 when /^(del(ete)?|rm)$/
537 "rss del(ete)|rm #{Bold}handle#{Bold} : delete rss feed #{Bold}handle#{Bold}"
539 "rss replace #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : try to replace the url of rss called #{Bold}handle#{Bold} with #{Bold}url#{Bold} (of type #{Bold}type#{Bold}); only works if nobody else is watching it"
541 "rss forcereplace #{Bold}handle#{Bold} #{Bold}url#{Bold} [#{Bold}type#{Bold}] : replace the url of rss called #{Bold}handle#{Bold} with #{Bold}url#{Bold} (of type #{Bold}type#{Bold})"
543 "rss watch #{Bold}handle#{Bold} [#{Bold}url#{Bold} [#{Bold}type#{Bold}]] [in #{Bold}chan#{Bold}]: watch rss #{Bold}handle#{Bold} for changes (in channel #{Bold}chan#{Bold}); when the other parameters are present, the feed will be created if it doesn't exist yet"
545 "rss unwatch|rmwatch #{Bold}handle#{Bold} [in #{Bold}chan#{Bold}]: stop watching rss #{Bold}handle#{Bold} (in channel #{Bold}chan#{Bold}) for changes"
546 when /who(?: watche?s?)?/
547 "rss who watches #{Bold}handle#{Bold}: lists watches for rss #{Bold}handle#{Bold}"
549 "rss rewatch : restart threads that watch for changes in watched rss"
551 "rss types : show the rss types for which an output format exist (all other types will use the default one)"
553 "manage RSS feeds: rss types|show|list|watched|add|change|del(ete)|rm|(force)replace|watch|unwatch|rmwatch|rewatch|who watches"
557 def report_problem(report, e=nil, m=nil)
558 if m && m.respond_to?(:reply)
565 debug e.backtrace.join("\n") if e.respond_to?(:backtrace)
569 def show_rss(m, params)
570 handle = params[:handle]
571 lims = params[:limit].to_s.match(/(\d+)(?:..(\d+))?/)
572 debug lims.to_a.inspect
574 ll = [[lims[1].to_i-1,lims[2].to_i-1].min, 0].max
575 ul = [[lims[1].to_i-1,lims[2].to_i-1].max, 14].min
576 rev = lims[1].to_i > lims[2].to_i
579 ul = [[lims[1].to_i-1, 0].max, 14].min
583 feed = @feeds.fetch(handle.downcase, nil)
585 m.reply "I don't know any feeds named #{handle}"
589 m.reply "lemme fetch it..."
591 we_were_watching = false
593 if @watch.key?(feed.handle)
594 # If a feed is being watched, we run the watcher thread
595 # so that all watchers can be informed of changes to
596 # the feed. Before we do that, though, we remove the
597 # show requester from the watchlist, if present, lest
598 # he gets the update twice.
599 if feed.watched_by?(m.replyto)
600 we_were_watching = true
601 feed.rm_watch(m.replyto)
603 @bot.timer.reschedule(@watch[feed.handle], 0)
605 feed.add_watch(m.replyto)
608 fetched = fetchRss(feed, m, false)
610 return unless fetched or feed.xml
611 if fetched or not feed.items
612 parsed = parseRss(feed, m)
614 return unless feed.items
615 m.reply "using old data" unless fetched and parsed and parsed > 0
620 # We sort the feeds in freshness order (newer ones first)
621 items = freshness_sort(items)
625 m.reply "Channel : #{title}"
627 printFormattedRss(feed, item, {:places=>[m.replyto],:handle=>nil,:date=>true})
631 def itemDate(item,ex=nil)
632 return item.pubDate if item.respond_to?(:pubDate) and item.pubDate
633 return item.date if item.respond_to?(:date) and item.date
637 def freshness_sort(items)
640 itemDate(b, notime) <=> itemDate(a, notime)
644 def list_rss(m, params)
645 wanted = params[:handle]
647 @feeds.each { |handle, feed|
648 next if wanted and !handle.match(/#{wanted}/i)
649 reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
650 (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
651 (reply << " (watched)") if feed.watched_by?(m.replyto)
655 reply = "no feeds found"
656 reply << " matching #{wanted}" if wanted
658 m.reply reply, :max_lines => reply.length
661 def watched_rss(m, params)
662 wanted = params[:handle]
663 chan = params[:chan] || m.replyto
665 watchlist.each { |handle, feed|
666 next if wanted and !handle.match(/#{wanted}/i)
667 next unless feed.watched_by?(chan)
668 reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
669 (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
673 reply = "no watched feeds"
674 reply << " matching #{wanted}" if wanted
679 def who_watches(m, params)
680 wanted = params[:handle]
682 watchlist.each { |handle, feed|
683 next if wanted and !handle.match(/#{wanted}/i)
684 reply << "#{feed.handle}: #{feed.url} (in format: #{feed.type ? feed.type : 'default'})"
685 (reply << " refreshing every #{Utils.secs_to_string(feed.refresh_rate)}") if feed.refresh_rate
686 reply << ": watched by #{feed.watchers.join(', ')}"
690 reply = "no watched feeds"
691 reply << " matching #{wanted}" if wanted
696 def add_rss(m, params, force=false)
697 handle = params[:handle]
699 unless url.match(/https?/)
700 m.reply "I only deal with feeds from HTTP sources, so I can't use #{url} (maybe you forgot the handle?)"
704 if @feeds.fetch(handle.downcase, nil) && !force
705 m.reply "There is already a feed named #{handle} (URL: #{@feeds[handle.downcase].url})"
709 m.reply "You must specify both a handle and an url to add an RSS feed"
712 @feeds[handle.downcase] = RssBlob.new(url,handle,type)
713 reply = "Added RSS #{url} named #{handle}"
715 reply << " (format: #{type})"
721 def change_rss(m, params)
722 handle = params[:handle].downcase
723 feed = @feeds.fetch(handle, nil)
725 m.reply "No such feed with handle #{handle}"
728 case params[:what].intern
730 new = params[:new].downcase
731 if @feeds.key?(new) and @feeds[new]
732 m.reply "There already is a feed with handle #{new}"
735 feed.mutex.synchronize do
737 @feeds.delete(handle)
744 feed.mutex.synchronize do
749 new = nil if new == 'default'
750 feed.mutex.synchronize do
754 new = params[:new].to_i
755 new = nil if new == 0
756 feed.mutex.synchronize do
757 feed.refresh_rate = new
760 m.reply "Don't know how to change #{params[:what]} for feeds"
763 m.reply "Feed changed:"
764 list_rss(m, {:handle => handle})
767 def del_rss(m, params, pass=false)
768 feed = unwatch_rss(m, params, true)
771 m.reply "someone else is watching #{feed.handle}, I won't remove it from my list"
774 @feeds.delete(feed.handle.downcase)
779 def replace_rss(m, params)
780 handle = params[:handle]
781 if @feeds.key?(handle.downcase)
782 del_rss(m, {:handle => handle}, true)
784 if @feeds.key?(handle.downcase)
785 m.reply "can't replace #{feed.handle}"
787 add_rss(m, params, true)
791 def forcereplace_rss(m, params)
792 add_rss(m, params, true)
795 def watch_rss(m, params)
796 handle = params[:handle]
797 chan = params[:chan] || m.replyto
803 feed = @feeds.fetch(handle.downcase, nil)
805 if feed.add_watch(chan)
809 m.reply "Already watching #{feed.handle} in #{chan}"
812 m.reply "Couldn't watch feed #{handle} (no such feed found)"
816 def unwatch_rss(m, params, pass=false)
817 handle = params[:handle].downcase
818 chan = params[:chan] || m.replyto
819 unless @feeds.has_key?(handle)
820 m.reply("dunno that feed")
823 feed = @feeds[handle]
824 if feed.rm_watch(chan)
825 m.reply "#{chan} has been removed from the watchlist for #{feed.handle}"
827 m.reply("#{chan} wasn't watching #{feed.handle}") unless pass
835 def rewatch_rss(m=nil, params=nil)
836 if params and handle = params[:handle]
837 feed = @feeds.fetch(handle.downcase, nil)
839 @bot.timer.reschedule(@watch[feed.handle], (params[:delay] || 0).to_f)
842 m.reply _("no such feed %{handle}") % { :handle => handle } if m
847 # Read watches from list.
848 watchlist.each{ |handle, feed|
856 def watchRss(feed, m=nil)
857 if @watch.has_key?(feed.handle)
858 report_problem("watcher thread for #{feed.handle} is already running", nil, m)
862 status[:failures] = 0
865 tmout = feed.last_fetched + calculate_timeout(feed) - Time.now
866 tmout = 0 if tmout < 0
868 debug "scheduling a watcher for #{feed} in #{tmout} seconds"
869 @watch[feed.handle] = @bot.timer.add(tmout) {
870 debug "watcher for #{feed} wakes up"
871 failures = status[:failures]
873 debug "fetching #{feed}"
874 first_run = !feed.last_fetched
875 oldxml = feed.xml ? feed.xml.dup : nil
876 unless fetchRss(feed)
880 debug "first run for #{feed}, getting items"
882 elsif oldxml and oldxml == feed.xml
883 debug "xml for #{feed} didn't change"
884 failures -= 1 if failures > 0
887 debug "no previous items in feed #{feed}"
889 failures -= 1 if failures > 0
891 # This one is used for debugging
894 # These are used for checking new items vs old ones
895 oids = Set.new feed.items.map { |item|
898 debug [uid, item].inspect
899 debug [uid, otxt.last].inspect
903 nitems = parseRss(feed)
907 debug "no items in feed #{feed}"
909 debug "Checking if new items are available for #{feed}"
910 failures -= 1 if failures > 0
916 dispItems = feed.items.reject { |item|
919 if oids.include?(uid)
920 debug "rejecting old #{uid} #{item.inspect}"
921 debug [uid, txt].inspect
924 debug "accepting new #{uid} #{item.inspect}"
925 debug [uid, txt].inspect
926 warning "same text! #{txt}" if otxt.include?(txt)
931 if dispItems.length > 0
932 debug "Found #{dispItems.length} new items in #{feed}"
933 # When displaying watched feeds, publish them from older to newer
934 dispItems.reverse.each { |item|
935 printFormattedRss(feed, item)
938 debug "No new items found in #{feed}"
944 rescue Exception => e
945 error "Error watching #{feed}: #{e.inspect}"
946 debug e.backtrace.join("\n")
950 status[:failures] = failures
952 seconds = calculate_timeout(feed, failures)
953 debug "watcher for #{feed} going to sleep #{seconds} seconds.."
955 @bot.timer.reschedule(@watch[feed.handle], seconds)
957 warning "watcher for #{feed} failed to reschedule: #{$!.inspect}"
960 debug "watcher for #{feed} added"
963 def calculate_timeout(feed, failures = 0)
964 seconds = @bot.config['rss.thread_sleep']
965 feed.mutex.synchronize do
966 seconds = feed.refresh_rate if feed.refresh_rate
968 seconds *= failures + 1
969 seconds += seconds * (rand(100)-50)/100
973 def printFormattedRss(feed, item, opts=nil)
975 places = feed.watchers
976 handle = feed.handle.empty? ? "" : "::#{feed.handle}:: "
979 places = opts[:places] if opts.key?(:places)
980 handle = opts[:handle].to_s if opts.key?(:handle)
981 if opts.key?(:date) && opts[:date]
982 if item.respond_to?(:updated)
983 if item.updated.content.class <= Time
984 date = item.updated.content.strftime("%Y/%m/%d %H:%M")
986 date = item.updated.content.to_s
988 elsif item.respond_to?(:source) and item.source.respond_to?(:updated)
989 if item.source.updated.content.class <= Time
990 date = item.source.updated.content.strftime("%Y/%m/%d %H:%M")
992 date = item.source.updated.content.to_s
994 elsif item.respond_to?(:pubDate)
995 if item.pubDate.class <= Time
996 date = item.pubDate.strftime("%Y/%m/%d %H:%M")
998 date = item.pubDate.to_s
1000 elsif item.respond_to?(:date)
1001 if item.date.class <= Time
1002 date = item.date.strftime("%Y/%m/%d %H:%M")
1004 date = item.date.to_s
1014 # Twitters don't need a cap on the title length since they have a hard
1015 # limit to 160 characters, and most of them are under 140 characters
1016 tit_opt[:limit] = @bot.config['rss.head_max'] unless feed.type == 'twitter'
1019 base_title = item.title.to_s.dup
1020 # git changesets are SHA1 hashes (40 hex digits), way too long, get rid of them, as they are
1021 # visible in the URL anyway
1022 # TODO make this optional?
1023 base_title.sub!(/^Changeset \[([\da-f]{40})\]:/) { |c| "(git commit)"} if feed.type == 'trac'
1024 title = "#{Bold}#{base_title.ircify_html(tit_opt)}#{Bold}"
1028 desc_opt[:limit] = @bot.config['rss.text_max']
1029 desc_opt[:a_href] = :link_out if @bot.config['rss.show_links']
1031 # We prefer content_encoded here as it tends to provide more html formatting
1032 # for use with ircify_html.
1033 if item.respond_to?(:content_encoded) && item.content_encoded
1034 desc = item.content_encoded.ircify_html(desc_opt)
1035 elsif item.respond_to?(:description) && item.description
1036 desc = item.description.ircify_html(desc_opt)
1037 elsif item.respond_to?(:content) && item.content
1038 if item.content.type == "html"
1039 desc = item.content.content.ircify_html(desc_opt)
1041 desc = item.content.content
1042 if desc.size > desc_opt[:limit]
1043 desc = desc.slice(0, desc_opt[:limit]) + "#{Reverse}...#{Reverse}"
1053 category = item.category! || item.dc_subject!
1054 category.strip! if category
1055 author = item.dc_creator! || item.author!
1056 author.strip! if author
1061 at = ((item.title && item.link) ? ' @ ' : '')
1063 key = @bot.global_filter_name(feed.type, @outkey)
1064 key = @bot.global_filter_name(:default, @outkey) unless @bot.has_filter?(key)
1066 output = @bot.filter(key, :item => item, :handle => handle, :date => date,
1067 :title => title, :desc => desc, :link => link,
1068 :category => category, :author => author, :at => at)
1070 return output if places.empty?
1073 output.to_s.each_line { |line|
1074 @bot.say loc, line, :overlong => :truncate
1079 def fetchRss(feed, m=nil, cache=true)
1080 feed.last_fetched = Time.now
1082 # Use 60 sec timeout, cause the default is too low
1083 xml = @bot.httputil.get(feed.url,
1084 :read_timeout => 60,
1085 :open_timeout => 60,
1087 rescue URI::InvalidURIError, URI::BadURIError => e
1088 report_problem("invalid rss feed #{feed.url}", e, m)
1091 report_problem("error getting #{feed.url}", e, m)
1094 debug "fetched #{feed}"
1096 report_problem("reading feed #{feed} failed", nil, m)
1099 # Ok, 0.9 feeds are not supported, maybe because
1100 # Netscape happily removed the DTD. So what we do is just to
1101 # reassign the 0.9 RDFs to 1.0, and hope it goes right.
1102 xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"",
1103 "xmlns=\"http://purl.org/rss/1.0/\"")
1104 # make sure the parser doesn't double-convert in case the feed is not UTF-8
1105 xml.sub!(/<\?xml (.*?)\?>/) do |match|
1106 if /\bencoding=(['"])(.*?)\1/.match(match)
1107 match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"')
1111 feed.mutex.synchronize do
1117 def parseRss(feed, m=nil)
1118 return nil unless feed.xml
1119 feed.mutex.synchronize do
1122 ## do validate parse
1123 rss = RSS::Parser.parse(xml)
1124 debug "parsed and validated #{feed}"
1125 rescue RSS::InvalidRSSError
1126 ## do non validate parse for invalid RSS 1.0
1128 rss = RSS::Parser.parse(xml, false)
1129 debug "parsed but not validated #{feed}"
1130 rescue RSS::Error => e
1131 report_problem("parsing rss stream failed, whoops =(", e, m)
1134 rescue RSS::Error => e
1135 report_problem("parsing rss stream failed, oioi", e, m)
1138 report_problem("processing error occured, sorry =(", e, m)
1143 if xml.match(/xmlns\s*=\s*(['"])http:\/\/www.w3.org\/2005\/Atom\1/) and not defined?(RSS::Atom)
1144 report_problem("#{feed.handle} @ #{feed.url} looks like an Atom feed, but your Ruby/RSS library doesn't seem to support it. Consider getting the latest version from http://raa.ruby-lang.org/project/rss/", nil, m)
1146 report_problem("#{feed.handle} @ #{feed.url} doesn't seem to contain an RSS or Atom feed I can read", nil, m)
1151 rss.output_encoding = 'UTF-8'
1152 rescue RSS::UnknownConvertMethod => e
1153 report_problem("bah! something went wrong =(", e, m)
1156 if rss.respond_to? :channel
1157 rss.channel.title ||= "(?)"
1158 title = rss.channel.title
1160 title = rss.title.content
1162 rss.items.each do |item|
1163 item.title ||= "(?)"
1169 report_problem("no items found in the feed, maybe try weed?", e, m)
1171 feed.title = title.strip
1179 plugin = RSSFeedsPlugin.new
1181 plugin.default_auth( 'edit', false )
1182 plugin.default_auth( 'edit:add', true)
1184 plugin.map 'rss show :handle :limit',
1185 :action => 'show_rss',
1186 :requirements => {:limit => /^\d+(?:\.\.\d+)?$/},
1187 :defaults => {:limit => 5}
1188 plugin.map 'rss list :handle',
1189 :action => 'list_rss',
1190 :defaults => {:handle => nil}
1191 plugin.map 'rss watched :handle [in :chan]',
1192 :action => 'watched_rss',
1193 :defaults => {:handle => nil}
1194 plugin.map 'rss who watches :handle',
1195 :action => 'who_watches',
1196 :defaults => {:handle => nil}
1197 plugin.map 'rss add :handle :url :type',
1198 :action => 'add_rss',
1199 :auth_path => 'edit',
1200 :defaults => {:type => nil}
1201 plugin.map 'rss change :what of :handle to :new',
1202 :action => 'change_rss',
1203 :auth_path => 'edit',
1204 :requirements => { :what => /handle|url|format|type|refresh/ }
1205 plugin.map 'rss change :what for :handle to :new',
1206 :action => 'change_rss',
1207 :auth_path => 'edit',
1208 :requirements => { :what => /handle|url|format|type|refesh/ }
1209 plugin.map 'rss del :handle',
1210 :auth_path => 'edit:rm!',
1211 :action => 'del_rss'
1212 plugin.map 'rss delete :handle',
1213 :auth_path => 'edit:rm!',
1214 :action => 'del_rss'
1215 plugin.map 'rss rm :handle',
1216 :auth_path => 'edit:rm!',
1217 :action => 'del_rss'
1218 plugin.map 'rss replace :handle :url :type',
1219 :auth_path => 'edit',
1220 :action => 'replace_rss',
1221 :defaults => {:type => nil}
1222 plugin.map 'rss forcereplace :handle :url :type',
1223 :auth_path => 'edit',
1224 :action => 'forcereplace_rss',
1225 :defaults => {:type => nil}
1226 plugin.map 'rss watch :handle [in :chan]',
1227 :action => 'watch_rss',
1228 :defaults => {:url => nil, :type => nil}
1229 plugin.map 'rss watch :handle :url :type [in :chan]',
1230 :action => 'watch_rss',
1231 :defaults => {:url => nil, :type => nil}
1232 plugin.map 'rss unwatch :handle [in :chan]',
1233 :action => 'unwatch_rss'
1234 plugin.map 'rss rmwatch :handle [in :chan]',
1235 :action => 'unwatch_rss'
1236 plugin.map 'rss rewatch [:handle] [:delay]',
1237 :action => 'rewatch_rss'
1238 plugin.map 'rss types',
1239 :action => 'rss_types'