- if response['content-type'] =~ /^text\//
- # since the content is 'text/*' and is small enough to
- # be a webpage, retrieve the title from the page
- debug "+ getting #{url.request_uri}"
-
- # we look for the title in the first 4k bytes
- # TODO make the amount of data configurable
- response.partial_body(4096) { |part|
- title = get_title_from_html(part)
- return title if title
- }
- # if nothing was found, return nothing
- return
+
+ debug resp.to_hash
+
+ if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
+ # The page is text or HTML, so we can try finding a title and, if
+ # requested, the first par.
+ #
+ # We act differently depending on whether we want the first par or
+ # not: in the first case we download the initial part and the parse
+ # it; in the second case we only download as much as we need to find
+ # the title
+ #
+ if @bot.config['url.first_par']
+ partial = resp.partial_body(@bot.config['http.info_bytes'])
+ logopts[:title] = title = get_title_from_html(partial)
+ first_par = Utils.ircify_first_html_par(partial, :strip => title)
+ unless first_par.empty?
+ logopts[:extra] = first_par
+ extra << ", #{Bold}text#{Bold}: #{first_par}"
+ end
+ call_event(:url_added, url.to_s, logopts)
+ return "#{Bold}title#{Bold}: #{title}#{extra}" if title
+ else
+ resp.partial_body(@bot.config['http.info_bytes']) { |part|
+ logopts[:title] = title = get_title_from_html(part)
+ call_event(:url_added, url.to_s, logopts)
+ return "#{Bold}title#{Bold}: #{title}" if title
+ }
+ end
+ # if nothing was found, provide more basic info, as for non-html pages