X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Furl.rb;h=1e72a3a14f4a6be39e980576caa632253c9ed3ed;hb=ac4141a466cc992539ba076d1188cfa15b35ab6f;hp=396c5ef219ccc7db8deb0b61782500ed1a30a4f0;hpb=7bbff3acaa1ea14fcb61dd3166ef62b01c33c01a;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb index 396c5ef2..1e72a3a1 100644 --- a/data/rbot/plugins/url.rb +++ b/data/rbot/plugins/url.rb @@ -312,6 +312,31 @@ class UrlPlugin < Plugin title = title[0..255] if title.length > 255 "[Link Info] title: #{title}" end + + def read_data_from_response(response, amount) + + amount_read = 0 + chunks = [] + + response.read_body do |chunk| # read body now + + amount_read += chunk.length + + if amount_read > amount + amount_of_overflow = amount_read - amount + chunk = chunk[0...-amount_of_overflow] + end + + chunks << chunk + + break if amount_read >= amount + + end + + chunks.join('') + + end + def get_title_for_url(uri_str, depth=10) # This god-awful mess is what the ruby http library has reduced me to. @@ -326,37 +351,41 @@ class UrlPlugin < Plugin return if url.scheme !~ /https?/ puts "+ connecting to #{url.host}:#{url.port}" - http = @bot.httputil.get_proxy(url) - title = http.start do |http| - url.path = '/' if url.path == '' - head = http.request_head(url.path) - case head - when Net::HTTPRedirection then - # call self recursively if this is a redirect - redirect_to = head['location'] - puts "+ redirect location: #{redirect_to}" - url = URI.join url.to_s, redirect_to - puts "+ whee, redirecting to #{url.to_s}!" - title = get_title_for_url(url.to_s, depth-1) - when Net::HTTPSuccess then - if head['content-type'] =~ /^text\// and (not head['content-length'] or head['content-length'].to_i < 400000) - # since the content is 'text/*' and is small enough to - # be a webpage, retrieve the title from the page - puts "+ getting #{url.request_uri}" - response = http.request_get(url.request_uri) - return get_title_from_html(response.body) - else - # content doesn't have title, just display info. - size = head['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') - #lastmod = head['last-modified'] - return "[Link Info] type: #{head['content-type']}#{size ? ", size: #{size} bytes" : ""}" - end - when Net::HTTPClientError then - return "[Link Info] Error getting link (#{head.code} - #{head.message})" - when Net::HTTPServerError then - return "[Link Info] Error getting link (#{head.code} - #{head.message})" - end - end + http = @bot.httputil.get_proxy(url) + title = http.start { |http| + url.path = '/' if url.path == '' + + http.request_get(url.path, "User-Agent" => "rbot-url_plugin/666.666") { |response| + + case response + when Net::HTTPRedirection then + # call self recursively if this is a redirect + redirect_to = response['location'] || './' + puts "+ redirect location: #{redirect_to.inspect}" + url = URI.join url.to_s, redirect_to + puts "+ whee, redirecting to #{url.to_s}!" + title = get_title_for_url(url.to_s, depth-1) + when Net::HTTPSuccess then + if response['content-type'] =~ /^text\// + # since the content is 'text/*' and is small enough to + # be a webpage, retrieve the title from the page + puts "+ getting #{url.request_uri}" + data = read_data_from_response(response, 50000) + return get_title_from_html(data) + else + # content doesn't have title, just display info. + size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') + return "[Link Info] type: #{response['content-type']}#{size ? ", size: #{size} bytes" : ""}" + end + when Net::HTTPClientError then + return "[Link Info] Error getting link (#{response.code} - #{response.message})" + when Net::HTTPServerError then + return "[Link Info] Error getting link (#{response.code} - #{response.message})" + end # end of "case response" + + } # end of request block + } # end of http start block + rescue SocketError => e return "[Link Info] Error connecting to site (#{e.message})" end