X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Fhttputil.rb;h=2328dc26d5b0eaf20a31a3601588297e84b39a2c;hb=b6812ac0c2fc9c031955635fc2134aebd3c9b526;hp=15586dc169d07a97e1512e2ba04233d039af1545;hpb=1467b66a0a18091efe71889c4dfe7f57bb9b3e04;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb index 15586dc1..2328dc26 100644 --- a/lib/rbot/core/utils/httputil.rb +++ b/lib/rbot/core/utils/httputil.rb @@ -13,11 +13,12 @@ require 'resolv' require 'net/http' +require 'cgi' require 'iconv' begin require 'net/https' rescue LoadError => e - error "Couldn't load 'net/https': #{e.inspect}" + error "Couldn't load 'net/https': #{e.pretty_inspect}" error "Secured HTTP connections will fail" end @@ -25,9 +26,9 @@ end require 'stringio' require 'zlib' -module ::Net - class HTTPResponse - attr_accessor :no_cache +module ::Net + class HTTPResponse + attr_accessor :no_cache if !instance_methods.include?('raw_body') alias :raw_body :body end @@ -60,13 +61,21 @@ module ::Net def body_to_utf(str) charsets = self.body_charset(str) or return str - charsets.reverse_each { |charset| - begin - return Iconv.iconv('utf-8//ignore', charset, str).first - rescue - debug "conversion failed for #{charset}" + charsets.reverse_each do |charset| + # XXX: this one is really ugly, but i don't know how to make it better + # -jsn + + 0.upto(5) do |off| + begin + debug "trying #{charset} / offset #{off}" + return Iconv.iconv('utf-8//ignore', + charset, + str.slice(0 .. (-1 - off))).first + rescue + debug "conversion failed for #{charset} / offset #{off}" + end end - } + end return str end @@ -75,34 +84,48 @@ module ::Net case method when nil return str - when 'gzip', 'x-gzip' + when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers debug "gunzipping body" - return Zlib::GzipReader.new(StringIO.new(str)).read + begin + return Zlib::GzipReader.new(StringIO.new(str)).read + rescue Zlib::Error => e + # If we can't unpack the whole stream (e.g. because we're doing a + # partial read + debug "full gunzipping failed (#{e}), trying to recover as much as possible" + ret = "" + begin + Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte| + ret << byte + } + rescue + end + return ret + end else raise "Unhandled content encoding #{method}" end end - def body + def cooked_body return self.body_to_utf(self.decompress_body(self.raw_body)) end - # Read chunks from the body until we have at least _size_ bytes, yielding - # the partial text at each chunk. Return the partial body. - def partial_body(size=0, &block) + # Read chunks from the body until we have at least _size_ bytes, yielding + # the partial text at each chunk. Return the partial body. + def partial_body(size=0, &block) self.no_cache = true - partial = String.new + partial = String.new - self.read_body { |chunk| - partial << chunk - yield self.body_to_utf(partial) if block_given? - break if size and size > 0 and partial.length >= size - } + self.read_body { |chunk| + partial << chunk + yield self.body_to_utf(self.decompress_body(partial)) if block_given? + break if size and size > 0 and partial.length >= size + } - return self.body_to_utf(partial) - end - end + return self.body_to_utf(self.decompress_body(partial)) + end + end end Net::HTTP.version_1_2 @@ -229,8 +252,7 @@ class HttpUtil self.revalidate self.response.raw_body rescue Exception => e - error e.message - error e.backtrace.join("\n") + error e raise e end end @@ -241,14 +263,15 @@ class HttpUtil @cache = Hash.new @headers = { 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8', + 'Accept-Encoding' => 'gzip;q=1, identity;q=0.8, *;q=0.2', 'User-Agent' => "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)" - } + } debug "starting http cache cleanup timer" @timer = @bot.timer.add(300) { self.remove_stale_cache unless @bot.config['http.no_expire_cache'] } - end + end def cleanup debug 'stopping http cache cleanup timer' @@ -304,7 +327,7 @@ class HttpUtil # proxying based on the bot's proxy configuration. # This will include per-url proxy configuration based on the bot config # +http_proxy_include/exclude+ options. - + def get_proxy(uri, options = {}) opts = { :read_timeout => 10, @@ -362,6 +385,10 @@ class HttpUtil warning ":| redirect w/o location?" end end + class << resp + undef_method :body + alias :body :cooked_body + end if block_given? yield(resp) else @@ -387,7 +414,7 @@ class HttpUtil # # Generic http transaction method # - # It will return a HTTP::Response object or raise an exception + # It will return a Net::HTTPResponse object or raise an exception # # If a block is given, it will yield the response (see :yield option) @@ -443,7 +470,7 @@ class HttpUtil return handle_response(uri, cached.response, opts, &block) end end - + headers = @headers.dup.merge(opts[:headers] || {}) headers['Range'] = opts[:range] if opts[:range] @@ -461,8 +488,7 @@ class HttpUtil begin cached.revalidate(resp) rescue Exception => e - error e.message - error e.backtrace.join("\n") + error e end debug "reusing cached" resp = cached.response @@ -496,8 +522,7 @@ class HttpUtil Net::HTTPPartialContent === resp return resp.body rescue Exception => e - error e.message - error e.backtrace.join("\n") + error e end return nil end @@ -510,8 +535,7 @@ class HttpUtil Net::HTTPServerError == resp return resp rescue Exception => e - error e.message - error e.backtrace.join("\n") + error e end return nil end @@ -523,8 +547,7 @@ class HttpUtil raise 'http error' unless Net::HTTPOK === resp return resp rescue Exception => e - error e.message - error e.backtrace.join("\n") + error e end return nil end @@ -545,7 +568,7 @@ class HttpUtil (now - val.last_used > max_last) || (now - val.first_used > max_first) } rescue => e - error "Failed to remove stale cache: #{e.inspect}" + error "Failed to remove stale cache: #{e.pretty_inspect}" end debug "#{@cache.size} pages after" end @@ -565,6 +588,7 @@ class HttpUtilPlugin < CoreBotModule debug 'shutting down httputil' @bot.httputil.cleanup @bot.httputil = nil + super end end