Url = Struct.new("Url", :channel, :nick, :time, :url)
TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
+LINK_INFO = "[Link Info]"
class UrlPlugin < Plugin
BotConfig.register BotConfigIntegerValue.new('url.max_urls',
BotConfig.register BotConfigBooleanValue.new('url.titles_only',
:default => false,
:desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
+ BotConfig.register BotConfigBooleanValue.new('url.first_par',
+ :default => false,
+ :desc => "Also try to get the first paragraph of a web page")
def initialize
super
def get_title_from_html(pagedata)
return unless TITLE_RE.match(pagedata)
- title = $1.strip.gsub(/\s*\n+\s*/, " ")
- title = Utils.decode_html_entities title
- "title: #{title}"
+ $1.ircify_html
end
def get_title_for_url(uri_str)
title = nil
begin
- @bot.httputil.get_response(url) { |response|
- case response
- when Net::HTTPSuccess
- if response['content-type'] =~ /^text\//
- # since the content is 'text/*' and is small enough to
- # be a webpage, retrieve the title from the page
- debug "+ getting #{url.request_uri}"
-
- # we look for the title in the first 4k bytes
- response.partial_body(@bot.config['http.info_bytes']) { |part|
- title = get_title_from_html(part)
- return title if title
- }
- # if nothing was found, provide more basic info
- end
- debug response.to_hash.inspect
- unless @bot.config['url.titles_only']
- # content doesn't have title, just display info.
- size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
- size = size ? ", size: #{size} bytes" : ""
- return "type: #{response['content-type']}#{size}"
- end
- when Net::HTTPResponse
- return "Error getting link (#{response.code} - #{response.message})"
+ range = @bot.config['http.info_bytes']
+ response = @bot.httputil.get_response(url, :range => "bytes=0-#{range}")
+ if response.code != "206" && response.code != "200"
+ return "Error getting link (#{response.code} - #{response.message})"
+ end
+ extra = String.new
+
+ if response['content-type'] =~ /^text\//
+
+ body = response.body.slice(0, range)
+ title = String.new
+
+ # since the content is 'text/*' and is small enough to
+ # be a webpage, retrieve the title from the page
+ debug "+ getting #{url.request_uri}"
+
+ title = get_title_from_html(body)
+ if @bot.config['url.first_par']
+ first_par = Utils.ircify_first_html_par(body, :strip => title)
+ extra << ", #{Bold}text#{Bold}: #{first_par}" unless first_par.empty?
+ return "#{Bold}title#{Bold}: #{title}#{extra}" if title
else
- raise response
+ return "#{Bold}title#{Bold}: #{title}" if title
end
- }
- rescue Object => e
- if e.class <= StandardError
- error e.inspect
- debug e.backtrace.join("\n")
+
+ # if nothing was found, provide more basic info
end
- msg = e.respond_to?(:message) ? e.message : e.to_s
+ debug response.to_hash.inspect
+ unless @bot.config['url.titles_only']
+ # content doesn't have title, just display info.
+ size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+ if response.code == '206'
+ if response['content-range'] =~ /bytes\s*[^\/]+\/(\d+)/
+ size = $1.to_s.reverse.scan(/\d{1,3}/).join(',').reverse
+ end
+ end
+ size = size ? ", #{Bold}size#{Bold}: #{size} bytes" : ""
+ return "#{Bold}type#{Bold}: #{response['content-type']}#{size}#{extra}"
+ end
+ rescue Exception => e
+ error e.inspect
+ debug e.backtrace.join("\n")
return "Error connecting to site (#{e.message})"
end
end
begin
title = get_title_for_url urlstr
if title
- m.reply "[Link Info] #{title}"
+ m.reply "#{LINK_INFO} #{title}", :overlong => :truncate
debug "Title found!"
else
debug "Title not found!"