]> git.netwichtig.de Git - user/henk/code/ruby/rbot.git/commitdiff
+ (httputil) bruteforce truncated chars when guessing the encoding
authorDmitry Kim <dmitry point kim at gmail point com>
Thu, 26 Apr 2007 22:56:14 +0000 (22:56 +0000)
committerDmitry Kim <dmitry point kim at gmail point com>
Thu, 26 Apr 2007 22:56:14 +0000 (22:56 +0000)
lib/rbot/core/utils/httputil.rb

index 3c94968658f75d7b28910bb36487ecbf4c933a0c..448c8da1cc0e14ed8e963844d2703e71e39e2c88 100644 (file)
@@ -61,13 +61,21 @@ module ::Net
     def body_to_utf(str)
       charsets = self.body_charset(str) or return str
 
-      charsets.reverse_each { |charset|
-        begin
-          return Iconv.iconv('utf-8//ignore', charset, str).first
-        rescue
-          debug "conversion failed for #{charset}"
+      charsets.reverse_each do |charset|
+        # XXX: this one is really ugly, but i don't know how to make it better
+        #  -jsn
+
+        0.upto(5) do |off|
+          begin
+            debug "trying #{charset} / offset #{off}"
+            return Iconv.iconv('utf-8//ignore',
+                               charset,
+                               str.slice(0 .. (-1 - off))).first
+          rescue
+            debug "conversion failed for #{charset} / offset #{off}"
+          end
         end
-      }
+      end
       return str
     end