Minor tweaks to httputil: make last response available in @last_resp for get and...

[user/henk/code/ruby/rbot.git] / lib / rbot / httputil.rb
diff --git a/lib/rbot/httputil.rb b/lib/rbot/httputil.rb

index 7edc286dbe32d7aca663dc80b18b47ddb1fa409c..ddbd8227392016f0b18ed9858ac49234b2cb4ad9 100644 (file)
--- a/lib/rbot/httputil.rb
+++ b/lib/rbot/httputil.rb
@@ -45,7 +45,9 @@ class HttpUtil
      @headers = {
        'User-Agent' => "rbot http util #{$version} (http://linuxbrit.co.uk/rbot/)",
      }
+    @last_response = nil
    end
+  attr_reader :last_response
  
    # if http_proxy_include or http_proxy_exclude are set, then examine the
    # uri to see if this is a proxied uri
@@ -132,18 +134,26 @@ class HttpUtil
    #
    # simple get request, returns (if possible) response body following redirs
    # and caching if requested
-  # it yields the urls it gets redirected to, for future uses
-  def get(uri, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false)
+  # if a block is given, it yields the urls it gets redirected to
+  # TODO we really need something to implement proper caching
+  def get(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false)
+    if uri_or_str.kind_of?(URI)
+      uri = uri_or_str
+    else
+      uri = URI.parse(uri_or_str.to_s)
+    end
+
      proxy = get_proxy(uri)
      proxy.open_timeout = opentimeout
      proxy.read_timeout = readtimeout
  
      begin
        proxy.start() {|http|
+        yield uri.request_uri() if block_given?
          resp = http.get(uri.request_uri(), @headers)
          case resp
          when Net::HTTPSuccess
-          if cache
+          if cache && !(resp.key?('cache-control') && resp['cache-control']=='must-revalidate')
              k = uri.to_s
              @cache[k] = Hash.new
              @cache[k][:body] = resp.body
@@ -161,7 +171,7 @@ class HttpUtil
            return resp.body
          when Net::HTTPRedirection
            debug "Redirecting #{uri} to #{resp['location']}"
-          yield resp['location']
+          yield resp['location'] if block_given?
            if max_redir > 0
              return get( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1, cache)
            else
@@ -170,30 +180,39 @@ class HttpUtil
          else
            debug "HttpUtil.get return code #{resp.code} #{resp.body}"
          end
+        @last_response = resp
          return nil
        }
      rescue StandardError, Timeout::Error => e
        error "HttpUtil.get exception: #{e.inspect}, while trying to get #{uri}"
        debug e.backtrace.join("\n")
      end
+    @last_response = nil
      return nil
    end
  
    # just like the above, but only gets the head
-  def head(uri, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"])
+  def head(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"])
+    if uri_or_str.kind_of?(URI)
+      uri = uri_or_str
+    else
+      uri = URI.parse(uri_or_str.to_s)
+    end
+
      proxy = get_proxy(uri)
      proxy.open_timeout = opentimeout
      proxy.read_timeout = readtimeout
  
      begin
        proxy.start() {|http|
-        resp = http.head(uri.request_uri(), @headers)
+        yield uri.request_uri() if block_given?
+        resp = http.request_head(uri.request_uri(), @headers)
          case resp
          when Net::HTTPSuccess
            return resp
          when Net::HTTPRedirection
            debug "Redirecting #{uri} to #{resp['location']}"
-          yield resp['location']
+          yield resp['location'] if block_given?
            if max_redir > 0
              return head( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1)
            else
@@ -202,20 +221,28 @@ class HttpUtil
          else
            debug "HttpUtil.head return code #{resp.code}"
          end
+        @last_response = resp
          return nil
        }
      rescue StandardError, Timeout::Error => e
        error "HttpUtil.head exception: #{e.inspect}, while trying to get #{uri}"
        debug e.backtrace.join("\n")
      end
+    @last_response = nil
      return nil
    end
  
    # gets a page from the cache if it's still (assumed to be) valid
    # TODO remove stale cached pages, except when called with noexpire=true
-  def get_cached(uri, readtimeout=10, opentimeout=5,
+  def get_cached(uri_or_str, readtimeout=10, opentimeout=5,
                   max_redir=@bot.config['http.max_redir'],
                   noexpire=@bot.config['http.no_expire_cache'])
+    if uri_or_str.kind_of?(URI)
+      uri = uri_or_str
+    else
+      uri = URI.parse(uri_or_str.to_s)
+    end
+
      k = uri.to_s
      if !@cache.key?(k)
        remove_stale_cache unless noexpire
@@ -237,8 +264,8 @@ class HttpUtil
          h = head(uri, readtimeout, opentimeout, max_redir)
          if h.key?('last-modified')
            if Time.httpdate(h['last-modified']) == @cache[k][:last_mod]
-            if resp.key?('date')
-              @cache[k][:last_use] = Time.httpdate(resp['date'])
+            if h.key?('date')
+              @cache[k][:last_use] = Time.httpdate(h['date'])
              else
                @cache[k][:last_use] = now
              end
@@ -279,7 +306,7 @@ class HttpUtil
    def remove_stale_cache
      now = Time.new
      @cache.reject! { |k, val|
-      !val.key?[:last_modified] && expired?(val, now)
+      !val.key?(:last_modified) && expired?(val, now)
      }
    end