]> git.netwichtig.de Git - user/henk/code/ruby/rbot.git/commitdiff
url plugin: customizable max amount of data to retrieve to look for a title. return...
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>
Sun, 25 Mar 2007 00:22:00 +0000 (00:22 +0000)
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>
Sun, 25 Mar 2007 00:22:00 +0000 (00:22 +0000)
data/rbot/plugins/url.rb
lib/rbot/core/utils/httputil.rb

index 0d85d473a1e6be42bc4a7be89b05d5d058761758..f9e64efbc55e42e85104881459a21b88dd3d0c6b 100644 (file)
@@ -47,20 +47,18 @@ class UrlPlugin < Plugin
             debug "+ getting #{url.request_uri}"
 
             # we look for the title in the first 4k bytes
-            # TODO make the amount of data configurable
-            response.partial_body(4096) { |part|
+            response.partial_body(@bot.config['http.info_bytes']) { |part|
               title = get_title_from_html(part)
               return title if title
             }
-            # if nothing was found, return nothing
-            return
-          else
-            unless @bot.config['url.titles_only']
-              # content doesn't have title, just display info.
-              size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
-              size = size ? ", size: #{size} bytes" : ""
-              return "type: #{response['content-type']}#{size}"
-            end
+            # if nothing was found, provide more basic info
+          end
+          debug response.to_hash.inspect
+          unless @bot.config['url.titles_only']
+            # content doesn't have title, just display info.
+            size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+            size = size ? ", size: #{size} bytes" : ""
+            return "type: #{response['content-type']}#{size}"
           end
         when Net::HTTPResponse
           return "Error getting link (#{response.code} - #{response.message})"
index 6ca12d5bd4530a2da3c6d2befbcfcb6f311405f7..904e194114ece4fbd71500d6c9aedf64401a20e1 100644 (file)
@@ -23,14 +23,14 @@ module ::Net
   class HTTPResponse
     # Read chunks from the body until we have at least _size_ bytes, yielding
     # the partial text at each chunk. Return the partial body.
-    def partial_body(size, &block)
+    def partial_body(size=0, &block)
 
       partial = String.new
 
       self.read_body { |chunk|
         partial << chunk
         yield partial
-        break if size and partial.length >= size
+        break if size and size > 0 and partial.length >= size
       }
 
       return partial
@@ -75,6 +75,9 @@ class HttpUtil
     BotConfig.register BotConfigIntegerValue.new('http.no_expire_cache',
       :default => false,
       :desc => "Set this to true if you want the bot to never expire the cached pages")
+    BotConfig.register BotConfigIntegerValue.new('http.info_bytes',
+      :default => 4096,
+      :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.")
 
   def initialize(bot)
     @bot = bot