From ac9eb1b02d4200006566ccd630dd678345008963 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Thu, 20 Nov 2008 15:17:27 +0100 Subject: rss plugin: prevent double UTF-8 deconding The rss parser looks at the encoding specified into the XML file and converts everything to UTF-8. Since we do the UTF-8 conversion ourselves, monkey-patch the XML 'encoding' declaration to claim it's UTF-8 already (as it actually is). --- data/rbot/plugins/rss.rb | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'data/rbot/plugins') diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 9e85b416..45ee4a23 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1092,6 +1092,13 @@ class RSSFeedsPlugin < Plugin # reassign the 0.9 RDFs to 1.0, and hope it goes right. xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"", "xmlns=\"http://purl.org/rss/1.0/\"") + # make sure the parser doesn't double-convert in case the feed is not UTF-8 + xml.sub!(/<\?xml (.*?)\?>/) do |match| + if /\bencoding=(['"])(.*?)\1/.match(match) + match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"') + end + match + end feed.mutex.synchronize do feed.xml = xml end -- cgit v1.2.3