diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-11-20 15:17:27 +0100 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2008-11-20 15:27:24 +0100 |
commit | ac9eb1b02d4200006566ccd630dd678345008963 (patch) | |
tree | 4d5e11f76d1123987c9a251ab0a20e82f408f6a1 /data/rbot | |
parent | 62ca2eda0b2d42f79d06f85e0ce041be64055ad2 (diff) |
rss plugin: prevent double UTF-8 deconding
The rss parser looks at the encoding specified into the XML file and
converts everything to UTF-8. Since we do the UTF-8 conversion
ourselves, monkey-patch the XML 'encoding' declaration to claim it's
UTF-8 already (as it actually is).
Diffstat (limited to 'data/rbot')
-rw-r--r-- | data/rbot/plugins/rss.rb | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 9e85b416..45ee4a23 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1092,6 +1092,13 @@ class RSSFeedsPlugin < Plugin # reassign the 0.9 RDFs to 1.0, and hope it goes right. xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"", "xmlns=\"http://purl.org/rss/1.0/\"") + # make sure the parser doesn't double-convert in case the feed is not UTF-8 + xml.sub!(/<\?xml (.*?)\?>/) do |match| + if /\bencoding=(['"])(.*?)\1/.match(match) + match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"') + end + match + end feed.mutex.synchronize do feed.xml = xml end |