summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2008-11-20 15:17:27 +0100
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2008-11-20 15:27:24 +0100
commitac9eb1b02d4200006566ccd630dd678345008963 (patch)
tree4d5e11f76d1123987c9a251ab0a20e82f408f6a1
parent62ca2eda0b2d42f79d06f85e0ce041be64055ad2 (diff)
rss plugin: prevent double UTF-8 deconding
The rss parser looks at the encoding specified into the XML file and converts everything to UTF-8. Since we do the UTF-8 conversion ourselves, monkey-patch the XML 'encoding' declaration to claim it's UTF-8 already (as it actually is).
-rw-r--r--data/rbot/plugins/rss.rb7
1 files changed, 7 insertions, 0 deletions
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb
index 9e85b416..45ee4a23 100644
--- a/data/rbot/plugins/rss.rb
+++ b/data/rbot/plugins/rss.rb
@@ -1092,6 +1092,13 @@ class RSSFeedsPlugin < Plugin
# reassign the 0.9 RDFs to 1.0, and hope it goes right.
xml.gsub!("xmlns=\"http://my.netscape.com/rdf/simple/0.9/\"",
"xmlns=\"http://purl.org/rss/1.0/\"")
+ # make sure the parser doesn't double-convert in case the feed is not UTF-8
+ xml.sub!(/<\?xml (.*?)\?>/) do |match|
+ if /\bencoding=(['"])(.*?)\1/.match(match)
+ match.sub!(/\bencoding=(['"])(?:.*?)\1/,'encoding="UTF-8"')
+ end
+ match
+ end
feed.mutex.synchronize do
feed.xml = xml
end