From: Giuseppe Bilotta Date: Thu, 20 Nov 2008 14:17:27 +0000 (+0100) Subject: rss plugin: prevent double UTF-8 deconding X-Git-Url: https://git.netwichtig.de/gitweb/?a=commitdiff_plain;h=ac9eb1b02d4200006566ccd630dd678345008963;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git rss plugin: prevent double UTF-8 deconding The rss parser looks at the encoding specified into the XML file and converts everything to UTF-8. Since we do the UTF-8 conversion ourselves, monkey-patch the XML 'encoding' declaration to claim it's UTF-8 already (as it actually is). --- diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb index 9e85b416..45ee4a23 100644 --- a/data/rbot/plugins/rss.rb +++ b/data/rbot/plugins/rss.rb @@ -1092,6 +1092,13 @@ class RSSFeedsPlugin < Plugin # reassign the 0.9 RDFs to 1.0, and hope it goes right. xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"", "xmlns=\"http://purl.org/rss/1.0/\"") + # make sure the parser doesn't double-convert in case the feed is not UTF-8 + xml.sub!(/<\?xml (.*?)\?>/) do |match| + if /\bencoding=(['"])(.*?)\1/.match(match) + match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"') + end + match + end feed.mutex.synchronize do feed.xml = xml end