A fix for the case where a crappy webserver labels a giant file (like a .rar) as text/plain and the bot tries to download it.

author: Chris Gahan <chris@ill-logic.com> 2006-01-30 21:45:54 +0000
committer: Chris Gahan <chris@ill-logic.com> 2006-01-30 21:45:54 +0000
commit: 48fc730b582aebc7f2a8a57e07e1d27914de1e55 (patch)
tree: c36ac4cf7627cd880752e3207322ba999d9e89fc
parent: e4ba96db9bd2c62e13cd1cef31a2aab88b839a1f (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index f46cb205..2b5b468e 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -52,14 +52,14 @@ class UrlPlugin < Plugin
           puts "+ whee, redirecting to #{url.to_s}!"
           title = get_title_for_url(url.to_s)
         when Net::HTTPSuccess then
-          if head['content-type'] =~ /^text\//
-            # content is 'text/*'
-            # retrieve the title from the page
+          if head['content-type'] =~ /^text\// and (not head['content-length'] or head['content-length'].to_i < 400000)
+            # since the content is 'text/*' and is small enough to
+            # be a webpage, retrieve the title from the page
             puts "+ getting #{url.request_uri}"
             response = http.request_get(url.request_uri)
             return get_title_from_html(response.body)
           else
-            # content isn't 'text/*'... display info about the file.
+            # content doesn't have title, just display info.
             size = head['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
             #lastmod = head['last-modified']
             return "[Link Info] type: #{head['content-type']}#{size ? ", size: #{size} bytes" : ""}"
author	Chris Gahan <chris@ill-logic.com>	2006-01-30 21:45:54 +0000
committer	Chris Gahan <chris@ill-logic.com>	2006-01-30 21:45:54 +0000
commit	48fc730b582aebc7f2a8a57e07e1d27914de1e55 (patch)
tree	c36ac4cf7627cd880752e3207322ba999d9e89fc
parent	e4ba96db9bd2c62e13cd1cef31a2aab88b839a1f (diff)