]> git.netwichtig.de Git - user/henk/code/ruby/rbot.git/blobdiff - data/rbot/plugins/url.rb
url plugin: revert to block get_response and partial_body to work around sites which...
[user/henk/code/ruby/rbot.git] / data / rbot / plugins / url.rb
index 23a326384f264aecaca9c66933a4020f3d67caa0..0a5ef74ef8c4b5db8819a4dcf00ee4ba1e0dc6af 100644 (file)
@@ -1,5 +1,3 @@
-require 'uri'
-
 Url = Struct.new("Url", :channel, :nick, :time, :url)
 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
 LINK_INFO = "[Link Info]"
@@ -38,61 +36,56 @@ class UrlPlugin < Plugin
     return if url.scheme !~ /https?/
 
     title = nil
+    extra = String.new
 
     begin
-      @bot.httputil.get_response(url) { |response|
-        case response
+      debug "+ getting #{url.request_uri}"
+      @bot.httputil.get_response(url) { |resp|
+        case resp
         when Net::HTTPSuccess
-          extra = String.new
-
-          if response['content-type'] =~ /^text\//
-
-            title = String.new
 
-            # since the content is 'text/*' and is small enough to
-            # be a webpage, retrieve the title from the page
-            debug "+ getting #{url.request_uri}"
-
-            # we act differently depending on whether we want the first par or not:
-            # in the first case we download the initial part and the parse it; in the second
-            # case we only download as much as we need to find the title
+          if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
+            # The page is text or HTML, so we can try finding a title and, if
+            # requested, the first par.
+            #
+            # We act differently depending on whether we want the first par or
+            # not: in the first case we download the initial part and the parse
+            # it; in the second case we only download as much as we need to find
+            # the title
+            #
             if @bot.config['url.first_par']
-              partial = response.partial_body(@bot.config['http.info_bytes'])
-              first_par = Utils.ircify_first_html_par(partial)
-              extra << "\n#{LINK_INFO} text: #{first_par}" unless first_par.empty?
+              partial = resp.partial_body(@bot.config['http.info_bytes'])
               title = get_title_from_html(partial)
-              if title
-                return "title: #{title}#{extra}"
-              end
+              first_par = Utils.ircify_first_html_par(partial, :strip => title)
+              extra << ", #{Bold}text#{Bold}: #{first_par}" unless first_par.empty?
+              return "#{Bold}title#{Bold}: #{title}#{extra}" if title
             else
-              response.partial_body(@bot.config['http.info_bytes']) { |part|
+              resp.partial_body(@bot.config['http.info_bytes']) { |part|
                 title = get_title_from_html(part)
-                return "title: #{title}" if title
+                return "#{Bold}title#{Bold}: #{title}" if title
               }
             end
-            # if nothing was found, provide more basic info
+          # if nothing was found, provide more basic info, as for non-html pages
           end
 
-          debug response.to_hash.inspect
+          debug resp.to_hash.inspect
+
+          enc = resp['content-encoding']
+
+          extra << ", #{Bold}encoding#{Bold}: #{enc}" if enc
+
           unless @bot.config['url.titles_only']
             # content doesn't have title, just display info.
-            size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
-            size = size ? ", size: #{size} bytes" : ""
-            return "type: #{response['content-type']}#{size}#{extra}"
+            size = resp['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+            size = size ? ", #{Bold}size#{Bold}: #{size} bytes" : ""
+            return "#{Bold}type#{Bold}: #{resp['content-type']}#{size}#{extra}"
           end
-        when Net::HTTPResponse
-          return "Error getting link (#{response.code} - #{response.message})"
         else
-          raise response
+          return "Error getting link (#{resp.code} - #{resp.message})"
         end
       }
-    rescue Object => e
-      if e.class <= StandardError
-        error e.inspect
-        debug e.backtrace.join("\n")
-      end
-
-      msg = e.respond_to?(:message) ? e.message : e.to_s
+    rescue Exception => e
+      error e
       return "Error connecting to site (#{e.message})"
     end
   end