summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Gilbert <tom@linuxbrit.co.uk>2006-01-26 09:18:08 +0000
committerTom Gilbert <tom@linuxbrit.co.uk>2006-01-26 09:18:08 +0000
commit12b6b40826949033597c4f15494078ed94c0ce76 (patch)
treef10e02e2ec2a8480d1c062c1d91987ccb7edff0a
parent7b35a6bfcd69da8b308dd1bc8cb0b9218caf4504 (diff)
From Chris:
There was a bit of a glitch in that title grabber extension I wrote to the URL plugin -- namely, that it would just mindlessly download whatever link anybody posted to the channel. Whenever anybody posted a bigass quicktime movie or something, the bot would explode. So, I had to get down and dirty with Net::HTTP to check for mimetypes and redirects and stuff. There is a nice new feature though: 22:37 <@epitron> http://killswitchengage.com/shared/downloads/KillswitchEngage/KsE-RoseOfSharyn_v ideo.mov 22:37 <@pookie> [Link Info] type: video/quicktime, size: 18,244,208 bytes
-rw-r--r--data/rbot/plugins/url.rb72
1 files changed, 58 insertions, 14 deletions
diff --git a/data/rbot/plugins/url.rb b/data/rbot/plugins/url.rb
index 286e8c4a..d48639de 100644
--- a/data/rbot/plugins/url.rb
+++ b/data/rbot/plugins/url.rb
@@ -1,4 +1,5 @@
-require 'open-uri'
+require 'net/http'
+require 'uri'
Url = Struct.new("Url", :channel, :nick, :time, :url)
TITLE_RE = /<\s*title\s*>(.+)<\s*\/title\s*>/im
@@ -20,21 +21,60 @@ class UrlPlugin < Plugin
"urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
end
- def get_title_for_url(url)
- begin
- pagedata = open(url).read
- return unless TITLE_RE.match(pagedata)
- title = $1.strip.gsub(/\s*\n\s*/, " ")
- title = title[0..255] if title.length > 255
- "Title: #{title}"
+ def get_title_from_html(pagedata)
+ return unless TITLE_RE.match(pagedata)
+ title = $1.strip.gsub(/\s*\n+\s*/, " ")
+ title = title[0..255] if title.length > 255
+ "[Title] #{title}"
+ end
- rescue SocketError => e
- "Title: ^- Error connecting to site (#{e.message})"
- rescue OpenURI::HTTPError => e
- "Title: ^- Error getting page (#{e.message})"
- rescue Exception => e
- "Title: ^- Error: #{e.inspect}"
+ def get_title_for_url(uri_str)
+ # This god-awful mess is what the ruby http library has reduced me to.
+ # Python's is so much nicer. :~(
+
+ puts "+ Getting #{uri_str}"
+ url = URI.parse(uri_str)
+ return if url.scheme !~ /https?/
+
+ puts "+ connecting to #{url.host}:#{url.port}"
+ title = Net::HTTP.start(url.host, url.port) do |http|
+ url.path = '/' if url.path == ''
+ head = http.request_head(url.path)
+ case head
+ when Net::HTTPRedirection then
+ # call self recursively if this is a redirect
+ redirect_to = head['location']
+ puts "+ redirect location: #{redirect_to}"
+ absolute_uris = URI.extract redirect_to
+ raise "wtf! redirect = #{redirect_to}" if absolute_uris.size > 1
+ if absolute_uris.size == 1
+ url = URI.parse absolute_uris[0]
+ else
+ url.path = redirect_to
+ end
+ puts "+ whee, redirect to #{url.to_s}!"
+ title = get_title_for_url(url.to_s)
+ when Net::HTTPSuccess then
+ if head['content-type'] =~ /^text\//
+ # content is 'text/*'
+ # retrieve the title from the page
+ puts "+ getting #{url.path}"
+ response = http.request_get(url.path)
+ return get_title_from_html(response.body)
+ else
+ # content isn't 'text/*'... display info about the file.
+ size = head['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
+ #lastmod = head['last-modified']
+ return "[Link Info] type: #{head['content-type']}#{size ? ", size: #{size} bytes" : ""}"
+ end
+ when Net::HTTPClientError then
+ return "[Title] Error getting link (#{response.code} - #{response.message})"
+ when Net::HTTPServerError then
+ return "[Title] Error getting link (#{response.code} - #{response.message})"
+ end
end
+ rescue SocketError => e
+ return "[Title] Error connecting to site (#{e.message})"
end
def listen(m)
@@ -122,3 +162,7 @@ plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
plugin.map 'urls :limit', :defaults => {:limit => 4},
:requirements => {:limit => /^\d+$/},
:private => false
+
+
+
+