Fine tune url plugin

author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2006-10-13 22:52:36 +0000
committer: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2006-10-13 22:52:36 +0000
commit: c11bf42caa25fd9c4f96a0f43e0e9976f53696ee (patch)
tree: 2693635ad082638e5c86be3172fd756c1f02ec50
parent: 61f0ebb3a600888439c2f5d81e2b31daa704bf8d (diff)
2 files changed, 16 insertions, 18 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index b04beb87..0b0f87c7 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -338,7 +338,7 @@ class UrlPlugin < Plugin
   end
 
 
-  def get_title_for_url(uri_str, depth=10)
+  def get_title_for_url(uri_str, depth=@bot.config['http.max_redir'])
     # This god-awful mess is what the ruby http library has reduced me to.
     # Python's HTTP lib is so much nicer. :~(
     
@@ -346,8 +346,8 @@ class UrlPlugin < Plugin
         raise "Error: Maximum redirects hit."
     end
     
-    debug "+ Getting #{uri_str}"
-    url = URI.parse(uri_str)
+    debug "+ Getting #{uri_str.to_s}"
+    url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
     return if url.scheme !~ /https?/
 
     title = nil
@@ -355,37 +355,34 @@ class UrlPlugin < Plugin
     debug "+ connecting to #{url.host}:#{url.port}"
     http = @bot.httputil.get_proxy(url)
     http.start { |http|
-      url.path = '/' if url.path == ''
 
-      http.request_get(url.path, "User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)") { |response|
+      http.request_get(url.request_uri(), @bot.httputil.headers) { |response|
         
         case response
-          when Net::HTTPRedirection, Net::HTTPMovedPermanently then
+          when Net::HTTPRedirection
             # call self recursively if this is a redirect
-            redirect_to = response['location']  || './'
+            redirect_to = response['location']  || '/'
             debug "+ redirect location: #{redirect_to.inspect}"
-            url = URI.join url.to_s, redirect_to
+            url = URI.join(url.to_s, redirect_to)
             debug "+ whee, redirecting to #{url.to_s}!"
-            return get_title_for_url(url.to_s, depth-1)
-          when Net::HTTPSuccess then
+            return get_title_for_url(url, depth-1)
+          when Net::HTTPSuccess
             if response['content-type'] =~ /^text\//
               # since the content is 'text/*' and is small enough to
               # be a webpage, retrieve the title from the page
               debug "+ getting #{url.request_uri}"
-              data = read_data_from_response(response, 50000)
+              # was 5*10^4 ... seems to much to me ... 4k should be enough for everybody ;)
+              data = read_data_from_response(response, 4096)
               return get_title_from_html(data)
             else
               # content doesn't have title, just display info.
               size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
-              return "[Link Info] type: #{response['content-type']}#{size ? ", size: #{size} bytes" : ""}"
+              size = size ? ", size: #{size} bytes" : ""
+              return "[Link Info] type: #{response['content-type']}#{size}"
             end
-          when Net::HTTPClientError then
-            return "[Link Info] Error getting link (#{response.code} - #{response.message})"
-          when Net::HTTPServerError then
-            return "[Link Info] Error getting link (#{response.code} - #{response.message})"
           else
-            return nil
-        end # end of "case response"
+            return "[Link Info] Error getting link (#{response.code} - #{response.message})"
+          end # end of "case response"
           
       } # end of request block
     } # end of http start block
diff --git a/lib/rbot/httputil.rb b/lib/rbot/httputil.rb
index ddbd8227..bcb05d88 100644
--- a/lib/rbot/httputil.rb
+++ b/lib/rbot/httputil.rb
@@ -48,6 +48,7 @@ class HttpUtil
     @last_response = nil
   end
   attr_reader :last_response
+  attr_reader :headers
 
   # if http_proxy_include or http_proxy_exclude are set, then examine the
   # uri to see if this is a proxied uri
author	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2006-10-13 22:52:36 +0000
committer	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2006-10-13 22:52:36 +0000
commit	c11bf42caa25fd9c4f96a0f43e0e9976f53696ee (patch)
tree	2693635ad082638e5c86be3172fd756c1f02ec50
parent	61f0ebb3a600888439c2f5d81e2b31daa704bf8d (diff)