5 Url = Struct.new("Url", :channel, :nick, :time, :url)
6 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
21 # extras codes, for future use...
35 'otimes' => '⊗',
44 'Epsilon' => 'Ε',
49 'Upsilon' => 'Υ',
51 'there4' => '∴',
56 'rsaquo' => '›',
80 'lfloor' => '⌊',
101 'sbquo' => '‚',
114 'infin' => '∞',
119 'thinsp' => ' ',
121 'bdquo' => '„',
128 'mdash' => '—',
130 'permil' => '‰',
135 'forall' => '∀',
137 'rceil' => '⌉',
140 'lambda' => 'λ',
144 'dagger' => '†',
147 'image' => 'ℑ',
148 'alefsym' => 'ℵ',
154 'frasl' => '⁄',
156 'lowast' => '∗',
167 'oline' => '‾',
174 'empty' => '∅',
181 'weierp' => '℘',
186 'omicron' => 'ο',
187 'upsilon' => 'υ',
189 'Lambda' => 'Λ',
196 'scaron' => 'š',
197 'lsquo' => '‘',
205 'hellip' => '…',
209 'rfloor' => '⌋',
211 'crarr' => '↵',
213 'notin' => '∉',
214 'exist' => '∃',
217 'Dagger' => '‡',
218 'oplus' => '⊕',
224 'lsaquo' => '‹',
226 'Omicron' => 'Ο',
241 'sigmaf' => 'ς',
243 'minus' => '−',
246 'epsilon' => 'ε',
257 'spades' => '♠',
258 'rsquo' => '’',
262 'thetasym' => 'ϑ',
266 'ldquo' => '“',
267 'hearts' => '♥',
273 class UrlPlugin < Plugin
274 BotConfig.register BotConfigIntegerValue.new('url.max_urls',
275 :default => 100, :validate => Proc.new{|v| v > 0},
276 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
277 BotConfig.register BotConfigBooleanValue.new('url.display_link_info',
279 :desc => "Get the title of any links pasted to the channel and display it (also tells if the link is broken or the site is down)")
283 @registry.set_default(Array.new)
286 def help(plugin, topic="")
287 "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
290 def unescape_title(htmldata)
291 # first pass -- let CGI try to attack it...
292 htmldata = CGI::unescapeHTML htmldata
294 # second pass -- destroy the remaining bits...
295 htmldata.gsub(/(&(.+?);)/) {
298 # remove the 0-paddng from unicode integers
300 symbol = "##{$1.to_i.to_s}"
303 # output the symbol's irc-translated character, or a * if it's unknown
304 UNESCAPE_TABLE[symbol] || '*'
308 def get_title_from_html(pagedata)
309 return unless TITLE_RE.match(pagedata)
310 title = $1.strip.gsub(/\s*\n+\s*/, " ")
311 title = unescape_title title
312 title = title[0..255] if title.length > 255
313 "[Link Info] title: #{title}"
316 def read_data_from_response(response, amount)
321 response.read_body do |chunk| # read body now
323 amount_read += chunk.length
325 if amount_read > amount
326 amount_of_overflow = amount_read - amount
327 chunk = chunk[0...-amount_of_overflow]
332 break if amount_read >= amount
341 def get_title_for_url(uri_str, depth=@bot.config['http.max_redir'])
342 # This god-awful mess is what the ruby http library has reduced me to.
343 # Python's HTTP lib is so much nicer. :~(
346 raise "Error: Maximum redirects hit."
349 debug "+ Getting #{uri_str.to_s}"
350 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
351 return if url.scheme !~ /https?/
355 debug "+ connecting to #{url.host}:#{url.port}"
356 http = @bot.httputil.get_proxy(url)
359 http.request_get(url.request_uri(), @bot.httputil.headers) { |response|
362 when Net::HTTPRedirection
363 # call self recursively if this is a redirect
364 redirect_to = response['location'] || '/'
365 debug "+ redirect location: #{redirect_to.inspect}"
366 url = URI.join(url.to_s, redirect_to)
367 debug "+ whee, redirecting to #{url.to_s}!"
368 return get_title_for_url(url, depth-1)
369 when Net::HTTPSuccess
370 if response['content-type'] =~ /^text\//
371 # since the content is 'text/*' and is small enough to
372 # be a webpage, retrieve the title from the page
373 debug "+ getting #{url.request_uri}"
374 # was 5*10^4 ... seems to much to me ... 4k should be enough for everybody ;)
375 data = read_data_from_response(response, 4096)
376 return get_title_from_html(data)
378 # content doesn't have title, just display info.
379 size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
380 size = size ? ", size: #{size} bytes" : ""
381 return "[Link Info] type: #{response['content-type']}#{size}"
384 return "[Link Info] Error getting link (#{response.code} - #{response.message})"
385 end # end of "case response"
387 } # end of request block
388 } # end of http start block
392 rescue SocketError => e
393 return "[Link Info] Error connecting to site (#{e.message})"
397 return unless m.kind_of?(PrivMessage)
399 # TODO support multiple urls in one line
400 if m.message =~ /(f|ht)tps?:\/\//
401 if m.message =~ /((f|ht)tps?:\/\/.*?)(?:\s+|$)/
403 list = @registry[m.target]
405 if @bot.config['url.display_link_info']
406 debug "Getting title for #{urlstr}..."
407 title = get_title_for_url urlstr
412 debug "Title not found!"
416 # check to see if this url is already listed
417 return if list.find {|u| u.url == urlstr }
419 url = Url.new(m.target, m.sourcenick, Time.new, urlstr)
420 debug "#{list.length} urls so far"
421 if list.length > @bot.config['url.max_urls']
424 debug "storing url #{url.url}"
426 debug "#{list.length} urls now"
427 @registry[m.target] = list
433 channel = params[:channel] ? params[:channel] : m.target
434 max = params[:limit].to_i
437 list = @registry[channel]
439 m.reply "no urls seen yet for channel #{channel}"
441 list[0..(max-1)].each do |url|
442 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
447 def search(m, params)
448 channel = params[:channel] ? params[:channel] : m.target
449 max = params[:limit].to_i
450 string = params[:string]
453 regex = Regexp.new(string, Regexp::IGNORECASE)
454 list = @registry[channel].find_all {|url|
455 regex.match(url.url) || regex.match(url.nick)
458 m.reply "no matches for channel #{channel}"
460 list[0..(max-1)].each do |url|
461 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
466 plugin = UrlPlugin.new
467 plugin.map 'urls search :channel :limit :string', :action => 'search',
468 :defaults => {:limit => 4},
469 :requirements => {:limit => /^\d+$/},
471 plugin.map 'urls search :limit :string', :action => 'search',
472 :defaults => {:limit => 4},
473 :requirements => {:limit => /^\d+$/},
475 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
476 :requirements => {:limit => /^\d+$/},
478 plugin.map 'urls :limit', :defaults => {:limit => 4},
479 :requirements => {:limit => /^\d+$/},