diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-03-23 01:15:27 +0100 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-03-23 01:15:27 +0100 |
commit | 9d19d55d6492b45392890dbe7cbb11ab1bfb4ee2 (patch) | |
tree | a10f788b744570bc0e758d4bafda524fb1675045 | |
parent | 55a13ec9c487860975f0fe491fbc1a7c2357c6ac (diff) |
rss plugin: make htmlinfo input filter less greedy
-rw-r--r-- | data/rbot/plugins/rss.rb | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 8f16509e..226579dd 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -366,11 +366,17 @@ class RSSFeedsPlugin < Plugin } end + FEED_NS = %r{xmlns.*http://(purl\.org/rss|www.w3c.org/199/02/22-rdf)} def htmlinfo_filter(s) return nil unless s[:headers] and s[:headers]['x-rbot-location'] + return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or + s[:text].include?("<rdf:RDF") or s[:text].include?("<rss") or s[:text].include?("<feed") or + s[:text].match(FEED_NS) blob = RssBlob.new(s[:headers]['x-rbot-location'],"", :htmlinfo) - return nil unless fetchRss(blob, nil) - return nil unless parseRss(blob, nil) + unless fetchRss(blob, nil) and parseRss(blob, nil) + debug "tried to filter #{s.inspect} which is not an RSS feed" + return nil + end output = [] blob.items.each { |it| output << printFormattedRss(blob, it)[:text] |