lib/rbot/httputil.rb

   1 # encoding: UTF-8
   2 #-- vim:sw=2:et
   3 #++
   4 #
   5 # :title: rbot HTTP provider
   6 #
   7 # Author:: Tom Gilbert <tom@linuxbrit.co.uk>
   8 # Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
   9 # Author:: Dmitry "jsn" Kim <dmitry point kim at gmail point com>
  10
  11 require 'resolv'
  12 require 'net/http'
  13 require 'cgi'
  14
  15 begin
  16   require 'nokogiri'
  17 rescue LoadError => e
  18   error "No nokogiri library found, some features might not be available!"
  19 end
  20
  21 # To handle Gzipped pages
  22 require 'stringio'
  23 require 'zlib'
  24
  25 module ::Net
  26   class HTTPResponse
  27     attr_accessor :no_cache
  28     unless method_defined? :raw_body
  29       alias :raw_body :body
  30     end
  31
  32     def body_charset(str=self.raw_body)
  33       ctype = self['content-type'] || 'text/html'
  34       return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i
  35
  36       charsets = ['ISO-8859-1'] # should be in config
  37
  38       if ctype.match(/charset=["']?([^\s"']+)["']?/i)
  39         charsets << $1
  40         debug "charset #{charsets.last} added from header"
  41       end
  42
  43       # str might be invalid utf-8 that will crash on the pattern match:
  44       str.encode!('UTF-8', 'UTF-8', :invalid => :replace)
  45       case str
  46       when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
  47         charsets << $1
  48         debug "xml charset #{charsets.last} added from xml pi"
  49       when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i
  50         meta = $1
  51         if meta =~ /charset=['"]?([^\s'";]+)['"]?/
  52           charsets << $1
  53           debug "html charset #{charsets.last} added from meta"
  54         end
  55       end
  56       return charsets.uniq
  57     end
  58
  59     def body_to_utf(str)
  60       charsets = self.body_charset(str) or return str
  61
  62       charsets.reverse_each do |charset|
  63         begin
  64           debug "try decoding using #{charset}"
  65           str.force_encoding(charset)
  66           tmp = str.encode('UTF-16le', :invalid => :replace, :replace => '').encode('UTF-8')
  67           if tmp
  68             str = tmp
  69             break
  70           end
  71         rescue
  72           error 'failed to use encoding'
  73           error $!
  74         end
  75       end
  76
  77       return str
  78     end
  79
  80     def decompress_body(str)
  81       method = self['content-encoding']
  82       case method
  83       when nil
  84         return str
  85       when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
  86         debug "gunzipping body"
  87         begin
  88           return Zlib::GzipReader.new(StringIO.new(str)).read
  89         rescue Zlib::Error => e
  90           # If we can't unpack the whole stream (e.g. because we're doing a
  91           # partial read
  92           debug "full gunzipping failed (#{e}), trying to recover as much as possible"
  93           ret = ''
  94           ret.force_encoding(Encoding::ASCII_8BIT)
  95           begin
  96             Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
  97               ret << byte
  98             }
  99           rescue
 100           end
 101           return ret
 102         end
 103       when 'deflate'
 104         debug "inflating body"
 105         # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
 106         # -MAX_WBITS stops zlib from looking for a zlib header
 107         inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
 108         begin
 109           return inflater.inflate(str)
 110         rescue Zlib::Error => e
 111           raise e
 112           # TODO
 113           # debug "full inflation failed (#{e}), trying to recover as much as possible"
 114         end
 115       when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i
 116         # B0rked servers (Freshmeat being one of them) sometimes return the charset
 117         # in the content-encoding; in this case we assume that the document has
 118         # a standard content-encoding
 119         old_hsh = self.to_hash
 120         self['content-type']= self['content-type']+"; charset="+method.downcase
 121         warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
 122         return str
 123       else
 124         debug self.to_hash
 125         raise "Unhandled content encoding #{method}"
 126       end
 127     end
 128
 129     def cooked_body
 130       return self.body_to_utf(self.decompress_body(self.raw_body))
 131     end
 132
 133     # Read chunks from the body until we have at least _size_ bytes, yielding
 134     # the partial text at each chunk. Return the partial body.
 135     def partial_body(size=0, &block)
 136
 137       partial = String.new
 138
 139       if @read
 140         debug "using body() as partial"
 141         partial = self.body
 142         yield self.body_to_utf(self.decompress_body(partial)) if block_given?
 143       else
 144         debug "disabling cache"
 145         self.no_cache = true
 146         self.read_body { |chunk|
 147           partial << chunk
 148           yield self.body_to_utf(self.decompress_body(partial)) if block_given?
 149           break if size and size > 0 and partial.length >= size
 150         }
 151       end
 152
 153       return self.body_to_utf(self.decompress_body(partial))
 154     end
 155
 156     def xpath(path)
 157       document = Nokogiri::HTML.parse(self.body)
 158       document.xpath(path)
 159     end
 160
 161     def to_json
 162       JSON::parse(self.body)
 163     end
 164   end
 165 end
 166
 167 module ::Irc
 168 module Utils
 169
 170 # class for making http requests easier (mainly for plugins to use)
 171 # this class can check the bot proxy configuration to determine if a proxy
 172 # needs to be used, which includes support for per-url proxy configuration.
 173 class HttpUtil
 174     Bot::Config.register Bot::Config::IntegerValue.new('http.read_timeout',
 175       :default => 10, :desc => "Default read timeout for HTTP connections")
 176     Bot::Config.register Bot::Config::IntegerValue.new('http.open_timeout',
 177       :default => 20, :desc => "Default open timeout for HTTP connections")
 178     Bot::Config.register Bot::Config::BooleanValue.new('http.use_proxy',
 179       :default => false, :desc => "should a proxy be used for HTTP requests?")
 180     Bot::Config.register Bot::Config::StringValue.new('http.proxy_uri', :default => false,
 181       :desc => "Proxy server to use for HTTP requests (URI, e.g http://proxy.host:port)")
 182     Bot::Config.register Bot::Config::StringValue.new('http.proxy_user',
 183       :default => nil,
 184       :desc => "User for authenticating with the http proxy (if required)")
 185     Bot::Config.register Bot::Config::StringValue.new('http.proxy_pass',
 186       :default => nil,
 187       :desc => "Password for authenticating with the http proxy (if required)")
 188     Bot::Config.register Bot::Config::ArrayValue.new('http.proxy_include',
 189       :default => [],
 190       :desc => "List of regexps to check against a URI's hostname/ip to see if we should use the proxy to access this URI. All URIs are proxied by default if the proxy is set, so this is only required to re-include URIs that might have been excluded by the exclude list. e.g. exclude /.*\.foo\.com/, include bar\.foo\.com")
 191     Bot::Config.register Bot::Config::ArrayValue.new('http.proxy_exclude',
 192       :default => [],
 193       :desc => "List of regexps to check against a URI's hostname/ip to see if we should use avoid the proxy to access this URI and access it directly")
 194     Bot::Config.register Bot::Config::IntegerValue.new('http.max_redir',
 195       :default => 5,
 196       :desc => "Maximum number of redirections to be used when getting a document")
 197     Bot::Config.register Bot::Config::IntegerValue.new('http.expire_time',
 198       :default => 60,
 199       :desc => "After how many minutes since last use a cached document is considered to be expired")
 200     Bot::Config.register Bot::Config::IntegerValue.new('http.max_cache_time',
 201       :default => 60*24,
 202       :desc => "After how many minutes since first use a cached document is considered to be expired")
 203     Bot::Config.register Bot::Config::BooleanValue.new('http.no_expire_cache',
 204       :default => false,
 205       :desc => "Set this to true if you want the bot to never expire the cached pages")
 206     Bot::Config.register Bot::Config::IntegerValue.new('http.info_bytes',
 207       :default => 8192,
 208       :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.")
 209
 210   class CachedObject
 211     attr_accessor :response, :last_used, :first_used, :count, :expires, :date
 212
 213     def self.maybe_new(resp)
 214       debug "maybe new #{resp}"
 215       return nil if resp.no_cache
 216       return nil unless Net::HTTPOK === resp ||
 217       Net::HTTPMovedPermanently === resp ||
 218       Net::HTTPFound === resp ||
 219       Net::HTTPPartialContent === resp
 220
 221       cc = resp['cache-control']
 222       return nil if cc && (cc =~ /no-cache/i)
 223
 224       date = Time.now
 225       if d = resp['date']
 226         date = Time.httpdate(d)
 227       end
 228
 229       return nil if resp['expires'] && (Time.httpdate(resp['expires']) < date)
 230
 231       debug "creating cache obj"
 232
 233       self.new(resp)
 234     end
 235
 236     def use
 237       now = Time.now
 238       @first_used = now if @count == 0
 239       @last_used = now
 240       @count += 1
 241     end
 242
 243     def expired?
 244       debug "checking expired?"
 245       if cc = self.response['cache-control'] && cc =~ /must-revalidate/
 246         return true
 247       end
 248       return self.expires < Time.now
 249     end
 250
 251     def setup_headers(hdr)
 252       hdr['if-modified-since'] = self.date.rfc2822
 253
 254       debug "ims == #{hdr['if-modified-since']}"
 255
 256       if etag = self.response['etag']
 257         hdr['if-none-match'] = etag
 258         debug "etag: #{etag}"
 259       end
 260     end
 261
 262     def revalidate(resp = self.response)
 263       @count = 0
 264       self.use
 265       self.date = resp.key?('date') ? Time.httpdate(resp['date']) : Time.now
 266
 267       cc = resp['cache-control']
 268       if cc && (cc =~ /max-age=(\d+)/)
 269         self.expires = self.date + $1.to_i
 270       elsif resp.key?('expires')
 271         self.expires = Time.httpdate(resp['expires'])
 272       elsif lm = resp['last-modified']
 273         delta = self.date - Time.httpdate(lm)
 274         delta = 10 if delta <= 0
 275         delta /= 5
 276         self.expires = self.date + delta
 277       else
 278         self.expires = self.date + 300
 279       end
 280       # self.expires = Time.now + 10 # DEBUG
 281       debug "expires on #{self.expires}"
 282
 283       return true
 284     end
 285
 286     private
 287     def initialize(resp)
 288       @response = resp
 289       begin
 290         self.revalidate
 291         self.response.raw_body
 292       rescue Exception => e
 293         error e
 294         raise e
 295       end
 296     end
 297   end
 298
 299   # Create the HttpUtil instance, associating it with Bot _bot_
 300   #
 301   def initialize(bot)
 302     @bot = bot
 303     @cache = Hash.new
 304     @headers = {
 305       'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8',
 306       'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2',
 307       'User-Agent' =>
 308         "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})"
 309     }
 310     debug "starting http cache cleanup timer"
 311     @timer = @bot.timer.add(300) {
 312       self.remove_stale_cache unless @bot.config['http.no_expire_cache']
 313     }
 314   end
 315
 316   # Clean up on HttpUtil unloading, by stopping the cache cleanup timer.
 317   def cleanup
 318     debug 'stopping http cache cleanup timer'
 319     @bot.timer.remove(@timer)
 320   end
 321
 322   # This method checks if a proxy is required to access _uri_, by looking at
 323   # the values of config values +http.proxy_include+ and +http.proxy_exclude+.
 324   #
 325   # Each of these config values, if set, should be a Regexp the server name and
 326   # IP address should be checked against.
 327   #
 328   def proxy_required(uri)
 329     use_proxy = true
 330     if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty?
 331       return use_proxy
 332     end
 333
 334     list = [uri.host]
 335     begin
 336       list.concat Resolv.getaddresses(uri.host)
 337     rescue StandardError => err
 338       warning "couldn't resolve host uri.host"
 339     end
 340
 341     unless @bot.config["http.proxy_exclude"].empty?
 342       re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)}
 343       re.each do |r|
 344         list.each do |item|
 345           if r.match(item)
 346             use_proxy = false
 347             break
 348           end
 349         end
 350       end
 351     end
 352     unless @bot.config["http.proxy_include"].empty?
 353       re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)}
 354       re.each do |r|
 355         list.each do |item|
 356           if r.match(item)
 357             use_proxy = true
 358             break
 359           end
 360         end
 361       end
 362     end
 363     debug "using proxy for uri #{uri}?: #{use_proxy}"
 364     return use_proxy
 365   end
 366
 367   # _uri_:: URI to create a proxy for
 368   #
 369   # Return a net/http Proxy object, configured for proxying based on the
 370   # bot's proxy configuration. See proxy_required for more details on this.
 371   #
 372   def get_proxy(uri, options = {})
 373     opts = {
 374       :read_timeout => @bot.config["http.read_timeout"],
 375       :open_timeout => @bot.config["http.open_timeout"]
 376     }.merge(options)
 377
 378     proxy = nil
 379     proxy_host = nil
 380     proxy_port = nil
 381     proxy_user = nil
 382     proxy_pass = nil
 383
 384     if @bot.config["http.use_proxy"]
 385       if (ENV['http_proxy'])
 386         proxy = URI.parse ENV['http_proxy'] rescue nil
 387       end
 388       if (@bot.config["http.proxy_uri"])
 389         proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil
 390       end
 391       if proxy
 392         debug "proxy is set to #{proxy.host} port #{proxy.port}"
 393         if proxy_required(uri)
 394           proxy_host = proxy.host
 395           proxy_port = proxy.port
 396           proxy_user = @bot.config["http.proxy_user"]
 397           proxy_pass = @bot.config["http.proxy_pass"]
 398         end
 399       end
 400     end
 401
 402     h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass)
 403     h.use_ssl = true if uri.scheme == "https"
 404
 405     h.read_timeout = opts[:read_timeout]
 406     h.open_timeout = opts[:open_timeout]
 407     return h
 408   end
 409
 410   # Internal method used to hanlde response _resp_ received when making a
 411   # request for URI _uri_.
 412   #
 413   # It follows redirects, optionally yielding them if option :yield is :all.
 414   #
 415   # Also yields and returns the final _resp_.
 416   #
 417   def handle_response(uri, resp, opts, &block) # :yields: resp
 418     if Net::HTTPRedirection === resp && opts[:max_redir] >= 0
 419       if resp.key?('location')
 420         raise 'Too many redirections' if opts[:max_redir] <= 0
 421         yield resp if opts[:yield] == :all && block_given?
 422         # some servers actually provide unescaped location, e.g.
 423         # http://ulysses.soup.io/post/60734021/Image%20curve%20ball
 424         # rediects to something like
 425         # http://ulysses.soup.io/post/60734021/Image curve ball?sessid=8457b2a3752085cca3fb1d79b9965446
 426         # causing the URI parser to (obviously) complain. We cannot just
 427         # escape blindly, as this would make a mess of already-escaped
 428         # locations, so we only do it if the URI.parse fails
 429         loc = resp['location']
 430         escaped = false
 431         debug "redirect location: #{loc.inspect}"
 432         begin
 433           new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc)
 434         rescue
 435           if escaped
 436             raise $!
 437           else
 438             loc = URI.escape(loc)
 439             escaped = true
 440             debug "escaped redirect location: #{loc.inspect}"
 441             retry
 442           end
 443         end
 444         new_opts = opts.dup
 445         new_opts[:max_redir] -= 1
 446         case opts[:method].to_s.downcase.intern
 447         when :post, :"net::http::post"
 448           new_opts[:method] = :get
 449         end
 450         if resp['set-cookie']
 451           debug "set cookie request for #{resp['set-cookie']}"
 452           cookie, cookie_flags = (resp['set-cookie']+'; ').split('; ', 2)
 453           domain = uri.host
 454           cookie_flags.scan(/(\S+)=(\S+);/) { |key, val|
 455             if key.intern == :domain
 456               domain = val
 457               break
 458             end
 459           }
 460           debug "cookie domain #{domain} / #{new_loc.host}"
 461           if new_loc.host.rindex(domain) == new_loc.host.length - domain.length
 462             debug "setting cookie"
 463             new_opts[:headers] ||= Hash.new
 464             new_opts[:headers]['Cookie'] = cookie
 465           else
 466             debug "cookie is for another domain, ignoring"
 467           end
 468         end
 469         debug "following the redirect to #{new_loc}"
 470         return get_response(new_loc, new_opts, &block)
 471       else
 472         warning ":| redirect w/o location?"
 473       end
 474     end
 475     class << resp
 476       undef_method :body
 477       alias :body :cooked_body
 478     end
 479     unless resp['content-type']
 480       debug "No content type, guessing"
 481       resp['content-type'] =
 482         case resp['x-rbot-location']
 483         when /.html?$/i
 484           'text/html'
 485         when /.xml$/i
 486           'application/xml'
 487         when /.xhtml$/i
 488           'application/xml+xhtml'
 489         when /.(gif|png|jpe?g|jp2|tiff?)$/i
 490           "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}"
 491         else
 492           'application/octetstream'
 493         end
 494     end
 495     if block_given?
 496       yield(resp)
 497     else
 498       # Net::HTTP wants us to read the whole body here
 499       resp.raw_body
 500     end
 501     return resp
 502   end
 503
 504   # _uri_::     uri to query (URI object or String)
 505   #
 506   # Generic http transaction method. It will return a Net::HTTPResponse
 507   # object or raise an exception
 508   #
 509   # If a block is given, it will yield the response (see :yield option)
 510   #
 511   # Currently supported _options_:
 512   #
 513   # method::     request method [:get (default), :post or :head]
 514   # open_timeout::     open timeout for the proxy
 515   # read_timeout::     read timeout for the proxy
 516   # cache::            should we cache results?
 517   # yield::      if :final [default], calls the block for the response object;
 518   #              if :all, call the block for all intermediate redirects, too
 519   # max_redir::  how many redirects to follow before raising the exception
 520   #              if -1, don't follow redirects, just return them
 521   # range::      make a ranged request (usually GET). accepts a string
 522   #              for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000")
 523   # body::       request body (usually for POST requests)
 524   # headers::    additional headers to be set for the request. Its value must
 525   #              be a Hash in the form { 'Header' => 'value' }
 526   #
 527   def get_response(uri_or_s, options = {}, &block) # :yields: resp
 528     uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s)
 529     unless URI::HTTP === uri
 530       if uri.scheme
 531         raise "#{uri.scheme.inspect} URI scheme is not supported"
 532       else
 533         raise "don't know what to do with #{uri.to_s.inspect}"
 534       end
 535     end
 536
 537     opts = {
 538       :max_redir => @bot.config['http.max_redir'],
 539       :yield => :final,
 540       :cache => true,
 541       :method => :GET
 542     }.merge(options)
 543
 544     req_class = case opts[:method].to_s.downcase.intern
 545                 when :head, :"net::http::head"
 546                   opts[:max_redir] = -1
 547                   Net::HTTP::Head
 548                 when :get, :"net::http::get"
 549                   Net::HTTP::Get
 550                 when :post, :"net::http::post"
 551                   opts[:cache] = false
 552                   opts[:body] or raise 'post request w/o a body?'
 553                   warning "refusing to cache POST request" if options[:cache]
 554                   Net::HTTP::Post
 555                 else
 556                   warning "unsupported method #{opts[:method]}, doing GET"
 557                   Net::HTTP::Get
 558                 end
 559
 560     if req_class != Net::HTTP::Get && opts[:range]
 561       warning "can't request ranges for #{req_class}"
 562       opts.delete(:range)
 563     end
 564
 565     cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}"
 566
 567     if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head
 568       if opts[:cache]
 569         warning "can't cache #{req_class.inspect} requests, working w/o cache"
 570         opts[:cache] = false
 571       end
 572     end
 573
 574     debug "get_response(#{uri}, #{opts.inspect})"
 575
 576     cached = @cache[cache_key]
 577
 578     if opts[:cache] && cached
 579       debug "got cached"
 580       if !cached.expired?
 581         debug "using cached"
 582         cached.use
 583         return handle_response(uri, cached.response, opts, &block)
 584       end
 585     end
 586
 587     headers = @headers.dup.merge(opts[:headers] || {})
 588     headers['Range'] = opts[:range] if opts[:range]
 589     headers['Authorization'] = opts[:auth_head] if opts[:auth_head]
 590
 591     if opts[:cache] && cached && (req_class == Net::HTTP::Get)
 592       cached.setup_headers headers
 593     end
 594
 595     req = req_class.new(uri.request_uri, headers)
 596     if uri.user && uri.password
 597       req.basic_auth(uri.user, uri.password)
 598       opts[:auth_head] = req['Authorization']
 599     end
 600     req.body = opts[:body] if req_class == Net::HTTP::Post
 601     debug "prepared request: #{req.to_hash.inspect}"
 602
 603     begin
 604       get_proxy(uri, opts).start do |http|
 605         http.request(req) do |resp|
 606           resp['x-rbot-location'] = uri.to_s
 607           if Net::HTTPNotModified === resp
 608             debug "not modified"
 609             begin
 610               cached.revalidate(resp)
 611             rescue Exception => e
 612               error e
 613             end
 614             debug "reusing cached"
 615             resp = cached.response
 616           elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp
 617             debug "http error, deleting cached obj" if cached
 618             @cache.delete(cache_key)
 619           end
 620
 621           begin
 622             return handle_response(uri, resp, opts, &block)
 623           ensure
 624             if cached = CachedObject.maybe_new(resp) rescue nil
 625               debug "storing to cache"
 626               @cache[cache_key] = cached
 627             end
 628           end
 629         end
 630       end
 631     rescue Exception => e
 632       error e
 633       raise e.message
 634     end
 635   end
 636
 637   # _uri_::     uri to query (URI object or String)
 638   #
 639   # Simple GET request, returns (if possible) response body following redirs
 640   # and caching if requested, yielding the actual response(s) to the optional
 641   # block. See get_response for details on the supported _options_
 642   #
 643   def get(uri, options = {}, &block) # :yields: resp
 644     begin
 645       resp = get_response(uri, options, &block)
 646       raise "http error: #{resp}" unless Net::HTTPOK === resp ||
 647         Net::HTTPPartialContent === resp
 648       if options[:resp]
 649         return resp
 650       else
 651         return resp.body
 652       end
 653     rescue Exception => e
 654       error e
 655     end
 656     return nil
 657   end
 658
 659   # _uri_::     uri to query (URI object or String)
 660   #
 661   # Simple HEAD request, returns (if possible) response head following redirs
 662   # and caching if requested, yielding the actual response(s) to the optional
 663   # block. See get_response for details on the supported _options_
 664   #
 665   def head(uri, options = {}, &block) # :yields: resp
 666     opts = {:method => :head}.merge(options)
 667     begin
 668       resp = get_response(uri, opts, &block)
 669       # raise "http error #{resp}" if Net::HTTPClientError === resp ||
 670       #   Net::HTTPServerError == resp
 671       return resp
 672     rescue Exception => e
 673       error e
 674     end
 675     return nil
 676   end
 677
 678   # _uri_::     uri to query (URI object or String)
 679   # _data_::    body of the POST
 680   #
 681   # Simple POST request, returns (if possible) response following redirs and
 682   # caching if requested, yielding the response(s) to the optional block. See
 683   # get_response for details on the supported _options_
 684   #
 685   def post(uri, data, options = {}, &block) # :yields: resp
 686     opts = {:method => :post, :body => data, :cache => false}.merge(options)
 687     begin
 688       resp = get_response(uri, opts, &block)
 689       raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp
 690       return resp
 691     rescue Exception => e
 692       error e
 693     end
 694     return nil
 695   end
 696
 697   # _uri_::     uri to query (URI object or String)
 698   # _nbytes_::  number of bytes to get
 699   #
 700   # Partial GET request, returns (if possible) the first _nbytes_ bytes of the
 701   # response body, following redirs and caching if requested, yielding the
 702   # actual response(s) to the optional block. See get_response for details on
 703   # the supported _options_
 704   #
 705   def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp
 706     opts = {:range => "bytes=0-#{nbytes}"}.merge(options)
 707     return get(uri, opts, &block)
 708   end
 709
 710   def remove_stale_cache
 711     debug "Removing stale cache"
 712     now = Time.new
 713     max_last = @bot.config['http.expire_time'] * 60
 714     max_first = @bot.config['http.max_cache_time'] * 60
 715     debug "#{@cache.size} pages before"
 716     begin
 717       @cache.reject! { |k, val|
 718         (now - val.last_used > max_last) || (now - val.first_used > max_first)
 719       }
 720     rescue => e
 721       error "Failed to remove stale cache: #{e.pretty_inspect}"
 722     end
 723     debug "#{@cache.size} pages after"
 724   end
 725
 726 end
 727 end
 728 end