- @bot.httputil.get_response(url) { |response|
- case response
- when Net::HTTPSuccess
- if response['content-type'] =~ /^text\//
- # since the content is 'text/*' and is small enough to
- # be a webpage, retrieve the title from the page
- debug "+ getting #{url.request_uri}"
-
- # we look for the title in the first 4k bytes
- # TODO make the amount of data configurable
- response.partial_body(4096) { |part|
- title = get_title_from_html(part)
- return title if title
- }
- # if nothing was found, return nothing
- return
- else
- unless @bot.config['url.titles_only']
- # content doesn't have title, just display info.
- size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
- size = size ? ", size: #{size} bytes" : ""
- return "type: #{response['content-type']}#{size}"
- end
- end
- when Net::HTTPResponse
- return "Error getting link (#{response.code} - #{response.message})"
- else
- raise response
+ debug "+ getting info for #{url.request_uri}"
+ info = Utils.get_html_info(url)
+ debug info
+ resp = info[:headers]
+
+ logopts[:title] = title = info[:title]
+
+ if info[:content]
+ logopts[:extra] = info[:content]
+ extra << "#{Bold}text#{Bold}: #{info[:content]}" if @bot.config['url.first_par']
+ else
+ logopts[:extra] = String.new
+ logopts[:extra] << "Content Type: #{resp['content-type']}"
+ extra << "#{Bold}type#{Bold}: #{resp['content-type']}" unless title
+ if enc = resp['content-encoding']
+ logopts[:extra] << ", encoding: #{enc}"
+ extra << "#{Bold}encoding#{Bold}: #{enc}" if @bot.config['url.first_par'] or not title
+ end
+
+ size = resp['content-length'].first.gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
+ if size
+ logopts[:extra] << ", size: #{size} bytes"
+ extra << "#{Bold}size#{Bold}: #{size} bytes" if @bot.config['url.first_par'] or not title
+ end
+ end
+ rescue Exception => e
+ case e
+ when UrlLinkError
+ raise e
+ else
+ error e
+ raise "connecting to site/processing information (#{e.message})"
+ end
+ end
+
+ call_event(:url_added, url.to_s, logopts)
+ if title
+ extra.unshift("#{Bold}title#{Bold}: #{title}")
+ end
+ return extra.join(", ") if title or not @bot.config['url.titles_only']
+ end
+
+ def handle_urls(m, urls, display_info=@bot.config['url.display_link_info'])
+ return if urls.empty?
+ debug "found urls #{urls.inspect}"
+ list = m.public? ? @registry[m.target] : nil
+ debug "display link info: #{display_info}"
+ urls_displayed = 0
+ urls.each do |urlstr|
+ debug "working on #{urlstr}"
+ next unless urlstr =~ /^https?:/
+ title = nil
+ debug "Getting title for #{urlstr}..."
+ reply = nil
+ begin
+ title = get_title_for_url(urlstr,
+ :nick => m.source.nick,
+ :channel => m.channel,
+ :ircline => m.message)
+ debug "Title #{title ? '' : 'not '} found"
+ reply = "#{LINK_INFO} #{title}" if title
+ rescue => e
+ if e.message =~ /\(404 - Not Found\)/i
+ # see if we failed to find the thing because of trailing punctuation
+ # but check that we still have 'something' in the URL
+ retry if urlstr.chop! and urlstr =~ /^https?:\/\/./