Utils: fix ircify_html (the final stripsvn diff lib/rbot/core/utils/extends.rb could...

[user/henk/code/ruby/rbot.git] / lib / rbot / core / utils / httputil.rb
diff --git a/lib/rbot/core/utils/httputil.rb b/lib/rbot/core/utils/httputil.rb

index ff2fb6c6c55e9f53f7c5b345c14fbbe653318797..904e194114ece4fbd71500d6c9aedf64401a20e1 100644 (file)
--- a/lib/rbot/core/utils/httputil.rb
+++ b/lib/rbot/core/utils/httputil.rb
@@ -10,9 +10,6 @@
  # Copyright:: (C) 2006 Tom Gilbert, Giuseppe Bilotta
  # Copyright:: (C) 2006,2007 Giuseppe Bilotta
  
-module ::Irc
-module Utils
-
  require 'resolv'
  require 'net/http'
  begin
@@ -22,8 +19,30 @@ rescue LoadError => e
    error "Secured HTTP connections will fail"
  end
  
+module ::Net
+  class HTTPResponse
+    # Read chunks from the body until we have at least _size_ bytes, yielding
+    # the partial text at each chunk. Return the partial body.
+    def partial_body(size=0, &block)
+
+      partial = String.new
+
+      self.read_body { |chunk|
+        partial << chunk
+        yield partial
+        break if size and size > 0 and partial.length >= size
+      }
+
+      return partial
+    end
+  end
+end
+
  Net::HTTP.version_1_2
  
+module ::Irc
+module Utils
+
  # class for making http requests easier (mainly for plugins to use)
  # this class can check the bot proxy configuration to determine if a proxy
  # needs to be used, which includes support for per-url proxy configuration.
@@ -56,6 +75,9 @@ class HttpUtil
      BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache',
        :default => false,
        :desc => "Set this to true if you want the bot to never expire the cached pages")
+    BotConfig.register BotConfigIntegerValue.new('http.info_bytes',
+      :default => 4096,
+      :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.")
  
    def initialize(bot)
      @bot = bot
@@ -264,6 +286,90 @@ class HttpUtil
      return nil
    end
  
+  # uri::         uri to query (Uri object or String)
+  # opts::        options. Currently used:
+  # :open_timeout::     open timeout for the proxy
+  # :read_timeout::     read timeout for the proxy
+  # :cache::            should we cache results?
+  #
+  # This method is used to get responses following redirections.
+  #
+  # It will return either a Net::HTTPResponse or an error.
+  #
+  # If a block is given, it will yield the response or error instead of
+  # returning it
+  #
+  def get_response(uri_or_str, opts={}, &block)
+    if uri_or_str.kind_of?(URI)
+      uri = uri_or_str
+    else
+      uri = URI.parse(uri_or_str.to_s)
+    end
+    debug "Getting #{uri}"
+
+    options = {
+      :read_timeout => 10,
+      :open_timeout => 5,
+      :max_redir => @bot.config["http.max_redir"],
+      :cache => false,
+      :yield => :none
+    }.merge(opts)
+
+    cache = options[:cache]
+
+    proxy = get_proxy(uri)
+    proxy.open_timeout = options[:open_timeout]
+    proxy.read_timeout = options[:read_timeout]
+
+    begin
+      proxy.start() {|http|
+        req = Net::HTTP::Get.new(uri.request_uri(), @headers)
+        if uri.user and uri.password
+          req.basic_auth(uri.user, uri.password)
+        end
+        http.request(req) { |resp|
+          case resp
+          when Net::HTTPSuccess
+            if cache
+              debug "Caching #{uri.to_s}"
+              cache_response(uri.to_s, resp)
+            end
+          when Net::HTTPRedirection
+            if resp.key?('location')
+              new_loc = URI.join(uri, resp['location']) rescue URI.parse(resp['location'])
+              debug "Redirecting #{uri} to #{new_loc}"
+              if options[:max_redir] > 0
+                new_opts = options.dup
+                new_opts[:max_redir] -= 1
+                return get_response(new_loc, new_opts, &block)
+              else
+                raise "Too many redirections"
+              end
+            end
+          end
+          if block_given?
+            yield resp
+          else
+            return resp
+          end
+        }
+      }
+    rescue StandardError, Timeout::Error => e
+      error "HttpUtil.get_response exception: #{e.inspect}, while trying to get #{uri}"
+      debug e.backtrace.join("\n")
+      def e.body
+        nil
+      end
+      if block_given?
+        yield e
+      else
+        return e
+      end
+    end
+
+    raise "This shouldn't happen"
+  end
+
    def cache_response(k, resp)
      begin
        if resp.key?('pragma') and resp['pragma'] == 'no-cache'