summaryrefslogtreecommitdiff
path: root/data/rbot/plugins/rss.rb
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-02-05 15:19:11 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-02-05 15:19:11 +0000
commite27c1b84c73110f65752f933e3070a1704b84538 (patch)
tree6e2eee045be942dabc0172eafbdf12b191f5e6c6 /data/rbot/plugins/rss.rb
parent55b032137fa453dd00dde9fc5df055a3d064917d (diff)
rss plugin: only parse feeds when xml changed. also, the xml is now saved to the registry
Diffstat (limited to 'data/rbot/plugins/rss.rb')
-rw-r--r--data/rbot/plugins/rss.rb191
1 files changed, 121 insertions, 70 deletions
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb
index ff326d52..03b6406d 100644
--- a/data/rbot/plugins/rss.rb
+++ b/data/rbot/plugins/rss.rb
@@ -30,8 +30,12 @@ class ::RssBlob
attr :handle
attr :type
attr :watchers
+ attr_accessor :xml
+ attr_accessor :title
+ attr_accessor :items
+ attr_accessor :mutex
- def initialize(url,handle=nil,type=nil,watchers=[])
+ def initialize(url,handle=nil,type=nil,watchers=[], xml=nil)
@url = url
if handle
@handle = handle
@@ -40,9 +44,21 @@ class ::RssBlob
end
@type = type
@watchers=[]
+ @xml = xml
+ @title = nil
+ @items = nil
+ @mutex = Mutex.new
sanitize_watchers(watchers)
end
+ def dup
+ self.class.new(@url,
+ @handle,
+ @type ? @type.dup : nil,
+ @watchers.dup,
+ @xml ? @xml.dup : nil)
+ end
+
# Downcase all watchers, possibly turning them into Strings if they weren't
def sanitize_watchers(list=@watchers)
ls = list.dup
@@ -108,7 +124,9 @@ class RSSFeedsPlugin < Plugin
@feeds.delete(k)
}
@feeds.each { |k, f|
+ f.mutex = Mutex.new unless f.mutex
f.sanitize_watchers
+ parseRss(f) if f.xml
}
else
@feeds = Hash.new
@@ -130,7 +148,16 @@ class RSSFeedsPlugin < Plugin
end
def save
- @registry[:feeds] = @feeds
+ unparsed = Hash.new()
+ @feeds.each { |k, f|
+ f.mutex.synchronize do
+ unparsed[k] = f.dup
+ end
+ }
+ unparsed.each { |k, f|
+ debug f.inspect
+ }
+ @registry[:feeds] = unparsed
end
def stop_watch(handle)
@@ -212,8 +239,17 @@ class RSSFeedsPlugin < Plugin
m.reply "lemme fetch it..."
title = items = nil
- title, items = fetchRss(feed, m)
- return unless items
+ fetched = fetchRss(feed, m)
+ return unless fetched or feed.xml
+ if not fetched and feed.items
+ m.reply "using old data"
+ else
+ parsed = parseRss(feed, m)
+ m.reply "using old data" unless parsed
+ end
+ return unless feed.items
+ title = feed.title
+ items = feed.items
# We sort the feeds in freshness order (newer ones first)
items = freshness_sort(items)
@@ -377,42 +413,47 @@ class RSSFeedsPlugin < Plugin
return
end
status = Hash.new
- status[:oldItems] = []
- status[:firstRun] = true
status[:failures] = 0
@watch[feed.handle] = @bot.timer.add(0, status) {
debug "watcher for #{feed} started"
- oldItems = status[:oldItems]
- firstRun = status[:firstRun]
failures = status[:failures]
begin
debug "fetching #{feed}"
- title = newItems = nil
- title, newItems = fetchRss(feed)
- unless newItems
- debug "no items in feed #{feed}"
- failures +=1
+ oldxml = feed.xml ? feed.xml.dup : nil
+ unless fetchRss(feed)
+ failures += 1
else
- debug "Checking if new items are available for #{feed}"
- if firstRun
- debug "First run, we'll see next time"
- firstRun = false
+ if oldxml and oldxml == feed.xml
+ debug "xml for #{feed} didn't change"
+ failures -= 1 if failures > 0
else
- otxt = oldItems.map { |item| item.to_s }
- dispItems = newItems.reject { |item|
- otxt.include?(item.to_s)
- }
- if dispItems.length > 0
- debug "Found #{dispItems.length} new items in #{feed}"
- # When displaying watched feeds, publish them from older to newer
- dispItems.reverse.each { |item|
- printFormattedRss(feed, item)
- }
+ if not feed.items
+ debug "no previous items in feed #{feed}"
+ parseRss(feed)
+ failures -= 1 if failures > 0
else
- debug "No new items found in #{feed}"
+ otxt = feed.items.map { |item| item.to_s }
+ unless parseRss(feed)
+ debug "no items in feed #{feed}"
+ failures += 1
+ else
+ debug "Checking if new items are available for #{feed}"
+ failures -= 1 if failures > 0
+ dispItems = feed.items.reject { |item|
+ otxt.include?(item.to_s)
+ }
+ if dispItems.length > 0
+ debug "Found #{dispItems.length} new items in #{feed}"
+ # When displaying watched feeds, publish them from older to newer
+ dispItems.reverse.each { |item|
+ printFormattedRss(feed, item)
+ }
+ else
+ debug "No new items found in #{feed}"
+ end
+ end
end
end
- oldItems = newItems.dup
end
rescue Exception => e
error "Error watching #{feed}: #{e.inspect}"
@@ -420,8 +461,6 @@ class RSSFeedsPlugin < Plugin
failures += 1
end
- status[:oldItems] = oldItems
- status[:firstRun] = firstRun
status[:failures] = failures
seconds = @bot.config['rss.thread_sleep'] * (failures + 1)
@@ -494,59 +533,71 @@ class RSSFeedsPlugin < Plugin
xml = @bot.httputil.get_cached(feed.url, 60, 60)
rescue URI::InvalidURIError, URI::BadURIError => e
report_problem("invalid rss feed #{feed.url}", e, m)
- return
+ return nil
rescue => e
report_problem("error getting #{feed.url}", e, m)
- return
+ return nil
end
debug "fetched #{feed}"
unless xml
report_problem("reading feed #{feed} failed", nil, m)
- return
+ return nil
+ end
+ feed.mutex.synchronize do
+ feed.xml = xml
end
+ return true
+ end
- begin
- ## do validate parse
- rss = RSS::Parser.parse(xml)
- debug "parsed #{feed}"
- rescue RSS::InvalidRSSError
- ## do non validate parse for invalid RSS 1.0
+ def parseRss(feed, m=nil)
+ return nil unless feed.xml
+ feed.mutex.synchronize do
+ xml = feed.xml
begin
- rss = RSS::Parser.parse(xml, false)
+ ## do validate parse
+ rss = RSS::Parser.parse(xml)
+ debug "parsed #{feed}"
+ rescue RSS::InvalidRSSError
+ ## do non validate parse for invalid RSS 1.0
+ begin
+ rss = RSS::Parser.parse(xml, false)
+ rescue RSS::Error => e
+ report_problem("parsing rss stream failed, whoops =(", e, m)
+ return nil
+ end
rescue RSS::Error => e
- report_problem("parsing rss stream failed, whoops =(", e, m)
- return
- end
- rescue RSS::Error => e
- report_problem("parsing rss stream failed, oioi", e, m)
- return
- rescue => e
- report_problem("processing error occured, sorry =(", e, m)
- return
- end
- items = []
- if rss.nil?
- report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m)
- else
- begin
- rss.output_encoding = 'UTF-8'
- rescue RSS::UnknownConvertMethod => e
- report_problem("bah! something went wrong =(", e, m)
- return
+ report_problem("parsing rss stream failed, oioi", e, m)
+ return nil
+ rescue => e
+ report_problem("processing error occured, sorry =(", e, m)
+ return nil
end
- rss.channel.title ||= "Unknown"
- title = rss.channel.title
- rss.items.each do |item|
- item.title ||= "Unknown"
- items << item
+ items = []
+ if rss.nil?
+ report_problem("#{feed} does not include RSS 1.0 or 0.9x/2.0", nil, m)
+ else
+ begin
+ rss.output_encoding = 'UTF-8'
+ rescue RSS::UnknownConvertMethod => e
+ report_problem("bah! something went wrong =(", e, m)
+ return nil
+ end
+ rss.channel.title ||= "Unknown"
+ title = rss.channel.title
+ rss.items.each do |item|
+ item.title ||= "Unknown"
+ items << item
+ end
end
- end
- if items.empty?
- report_problem("no items found in the feed, maybe try weed?", e, m)
- return
+ if items.empty?
+ report_problem("no items found in the feed, maybe try weed?", e, m)
+ return nil
+ end
+ feed.title = title
+ feed.items = items
+ return true
end
- return [title, items]
end
end