#
# simple get request, returns (if possible) response body following redirs
# and caching if requested
- # it yields the urls it gets redirected to, for future uses
- def get(uri, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false)
+ # if a block is given, it yields the urls it gets redirected to
+ # TODO we really need something to implement proper caching
+ def get(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"], cache=false)
+ if uri_or_str.kind_of?(URI)
+ uri = uri_or_str
+ else
+ uri = URI.parse(uri_or_str.to_s)
+ end
+
proxy = get_proxy(uri)
proxy.open_timeout = opentimeout
proxy.read_timeout = readtimeout
begin
proxy.start() {|http|
+ yield uri.request_uri() if block_given?
resp = http.get(uri.request_uri(), @headers)
case resp
when Net::HTTPSuccess
- if cache
+ if cache && !(resp.key?('cache-control') && resp['cache-control']=='must-revalidate')
k = uri.to_s
@cache[k] = Hash.new
@cache[k][:body] = resp.body
return resp.body
when Net::HTTPRedirection
debug "Redirecting #{uri} to #{resp['location']}"
- yield resp['location']
+ yield resp['location'] if block_given?
if max_redir > 0
return get( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1, cache)
else
end
# just like the above, but only gets the head
- def head(uri, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"])
+ def head(uri_or_str, readtimeout=10, opentimeout=5, max_redir=@bot.config["http.max_redir"])
+ if uri_or_str.kind_of?(URI)
+ uri = uri_or_str
+ else
+ uri = URI.parse(uri_or_str.to_s)
+ end
+
proxy = get_proxy(uri)
proxy.open_timeout = opentimeout
proxy.read_timeout = readtimeout
begin
proxy.start() {|http|
+ yield uri.request_uri() if block_given?
resp = http.head(uri.request_uri(), @headers)
case resp
when Net::HTTPSuccess
return resp
when Net::HTTPRedirection
debug "Redirecting #{uri} to #{resp['location']}"
- yield resp['location']
+ yield resp['location'] if block_given?
if max_redir > 0
return head( URI.parse(resp['location']), readtimeout, opentimeout, max_redir-1)
else
# gets a page from the cache if it's still (assumed to be) valid
# TODO remove stale cached pages, except when called with noexpire=true
- def get_cached(uri, readtimeout=10, opentimeout=5,
+ def get_cached(uri_or_str, readtimeout=10, opentimeout=5,
max_redir=@bot.config['http.max_redir'],
noexpire=@bot.config['http.no_expire_cache'])
+ if uri_or_str.kind_of?(URI)
+ uri = uri_or_str
+ else
+ uri = URI.parse(uri_or_str.to_s)
+ end
+
k = uri.to_s
if !@cache.key?(k)
remove_stale_cache unless noexpire
h = head(uri, readtimeout, opentimeout, max_redir)
if h.key?('last-modified')
if Time.httpdate(h['last-modified']) == @cache[k][:last_mod]
- if resp.key?('date')
- @cache[k][:last_use] = Time.httpdate(resp['date'])
+ if h.key?('date')
+ @cache[k][:last_use] = Time.httpdate(h['date'])
else
@cache[k][:last_use] = now
end
def remove_stale_cache
now = Time.new
@cache.reject! { |k, val|
- !val.key?[:last_modified] && expired?(val, now)
+ !val.key?(:last_modified) && expired?(val, now)
}
end