- debug "+ connecting to #{url.host}:#{url.port}"
- http = @bot.httputil.get_proxy(url)
- http.start { |http|
-
- http.request_get(url.request_uri(), @bot.httputil.headers) { |response|
-
- case response
- when Net::HTTPRedirection
- # call self recursively if this is a redirect
- redirect_to = response['location'] || '/'
- debug "+ redirect location: #{redirect_to.inspect}"
- url = URI.join(url.to_s, redirect_to)
- debug "+ whee, redirecting to #{url.to_s}!"
- return get_title_for_url(url, depth-1)
- when Net::HTTPSuccess
- if response['content-type'] =~ /^text\//
- # since the content is 'text/*' and is small enough to
- # be a webpage, retrieve the title from the page
- debug "+ getting #{url.request_uri}"
- # was 5*10^4 ... seems to much to me ... 4k should be enough for everybody ;)
- data = read_data_from_response(response, 4096)
- return get_title_from_html(data)
- else
- unless @bot.config['url.titles_only']
- # content doesn't have title, just display info.
- size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
- size = size ? ", size: #{size} bytes" : ""
- return "[Link Info] type: #{response['content-type']}#{size}"
+ if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
+ # The page is text or HTML, so we can try finding a title and, if
+ # requested, the first par.
+ #
+ # We act differently depending on whether we want the first par or
+ # not: in the first case we download the initial part and the parse
+ # it; in the second case we only download as much as we need to find
+ # the title
+ #
+ if @bot.config['url.first_par']
+ partial = resp.partial_body(@bot.config['http.info_bytes'])
+ logopts[:title] = title = get_title_from_html(partial)
+ if url.fragment and not url.fragment.empty?
+ fragreg = /.*?<a\s+[^>]*name=["']?#{url.fragment}["']?.*?>/im
+ partial.sub!(fragreg,'')
+ end
+ first_par = Utils.ircify_first_html_par(partial, :strip => title)
+ unless first_par.empty?
+ logopts[:extra] = first_par
+ extra << ", #{Bold}text#{Bold}: #{first_par}"