5 Url = Struct.new("Url", :channel, :nick, :time, :url)
6 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
21 # extras codes, for future use...
35 'otimes' => '⊗',
44 'Epsilon' => 'Ε',
49 'Upsilon' => 'Υ',
51 'there4' => '∴',
56 'rsaquo' => '›',
80 'lfloor' => '⌊',
101 'sbquo' => '‚',
114 'infin' => '∞',
119 'thinsp' => ' ',
121 'bdquo' => '„',
128 'mdash' => '—',
130 'permil' => '‰',
135 'forall' => '∀',
137 'rceil' => '⌉',
140 'lambda' => 'λ',
144 'dagger' => '†',
147 'image' => 'ℑ',
148 'alefsym' => 'ℵ',
154 'frasl' => '⁄',
156 'lowast' => '∗',
167 'oline' => '‾',
174 'empty' => '∅',
181 'weierp' => '℘',
186 'omicron' => 'ο',
187 'upsilon' => 'υ',
189 'Lambda' => 'Λ',
196 'scaron' => 'š',
197 'lsquo' => '‘',
205 'hellip' => '…',
209 'rfloor' => '⌋',
211 'crarr' => '↵',
213 'notin' => '∉',
214 'exist' => '∃',
217 'Dagger' => '‡',
218 'oplus' => '⊕',
224 'lsaquo' => '‹',
226 'Omicron' => 'Ο',
241 'sigmaf' => 'ς',
243 'minus' => '−',
246 'epsilon' => 'ε',
257 'spades' => '♠',
258 'rsquo' => '’',
262 'thetasym' => 'ϑ',
266 'ldquo' => '“',
267 'hearts' => '♥',
273 class UrlPlugin < Plugin
274 BotConfig.register BotConfigIntegerValue.new('url.max_urls',
275 :default => 100, :validate => Proc.new{|v| v > 0},
276 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
277 BotConfig.register BotConfigBooleanValue.new('url.display_link_info',
279 :desc => "Get the title of any links pasted to the channel and display it (also tells if the link is broken or the site is down)")
283 @registry.set_default(Array.new)
286 def help(plugin, topic="")
287 "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
290 def unescape_title(htmldata)
291 # first pass -- let CGI try to attack it...
292 htmldata = CGI::unescapeHTML htmldata
294 # second pass -- destroy the remaining bits...
295 htmldata.gsub(/(&(.+?);)/) {
298 # remove the 0-paddng from unicode integers
300 symbol = "##{$1.to_i.to_s}"
303 # output the symbol's irc-translated character, or a * if it's unknown
304 UNESCAPE_TABLE[symbol] || '*'
308 def get_title_from_html(pagedata)
309 return unless TITLE_RE.match(pagedata)
310 title = $1.strip.gsub(/\s*\n+\s*/, " ")
311 title = unescape_title title
312 title = title[0..255] if title.length > 255
313 "[Link Info] title: #{title}"
316 def read_data_from_response(response, amount)
\r
321 response.read_body do |chunk| # read body now
\r
323 amount_read += chunk.length
\r
325 if amount_read > amount
\r
326 amount_of_overflow = amount_read - amount
\r
327 chunk = chunk[0...-amount_of_overflow]
\r
332 break if amount_read >= amount
\r
341 def get_title_for_url(uri_str, depth=10)
342 # This god-awful mess is what the ruby http library has reduced me to.
343 # Python's HTTP lib is so much nicer. :~(
346 raise "Error: Maximum redirects hit."
349 puts "+ Getting #{uri_str}"
350 url = URI.parse(uri_str)
351 return if url.scheme !~ /https?/
353 puts "+ connecting to #{url.host}:#{url.port}"
354 http = @bot.httputil.get_proxy(url)
355 title = http.start { |http|
356 url.path = '/' if url.path == ''
\r
358 http.request_get(url.path, "User-Agent" => "rbot-url_plugin/666.666") { |response|
\r
361 when Net::HTTPRedirection then
362 # call self recursively if this is a redirect
363 redirect_to = response['location'] || './'
364 puts "+ redirect location: #{redirect_to.inspect}"
365 url = URI.join url.to_s, redirect_to
366 puts "+ whee, redirecting to #{url.to_s}!"
367 title = get_title_for_url(url.to_s, depth-1)
368 when Net::HTTPSuccess then
369 if response['content-type'] =~ /^text\//
370 # since the content is 'text/*' and is small enough to
371 # be a webpage, retrieve the title from the page
372 puts "+ getting #{url.request_uri}"
\r
373 data = read_data_from_response(response, 50000)
\r
374 return get_title_from_html(data)
376 # content doesn't have title, just display info.
377 size = response['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
378 return "[Link Info] type: #{response['content-type']}#{size ? ", size: #{size} bytes" : ""}"
380 when Net::HTTPClientError then
381 return "[Link Info] Error getting link (#{response.code} - #{response.message})"
382 when Net::HTTPServerError then
383 return "[Link Info] Error getting link (#{response.code} - #{response.message})"
384 end # end of "case response"
\r
386 } # end of request block
387 } # end of http start block
\r
389 rescue SocketError => e
390 return "[Link Info] Error connecting to site (#{e.message})"
394 return unless m.kind_of?(PrivMessage)
396 # TODO support multiple urls in one line
397 if m.message =~ /(f|ht)tps?:\/\//
398 if m.message =~ /((f|ht)tps?:\/\/.*?)(?:\s+|$)/
400 list = @registry[m.target]
402 if @bot.config['url.display_link_info']
403 debug "Getting title for #{urlstr}..."
404 title = get_title_for_url urlstr
409 debug "Title not found!"
413 # check to see if this url is already listed
414 return if list.find {|u| u.url == urlstr }
416 url = Url.new(m.target, m.sourcenick, Time.new, urlstr)
417 debug "#{list.length} urls so far"
418 if list.length > @bot.config['url.max_urls']
421 debug "storing url #{url.url}"
423 debug "#{list.length} urls now"
424 @registry[m.target] = list
430 channel = params[:channel] ? params[:channel] : m.target
431 max = params[:limit].to_i
434 list = @registry[channel]
436 m.reply "no urls seen yet for channel #{channel}"
438 list[0..(max-1)].each do |url|
439 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
444 def search(m, params)
445 channel = params[:channel] ? params[:channel] : m.target
446 max = params[:limit].to_i
447 string = params[:string]
450 regex = Regexp.new(string, Regexp::IGNORECASE)
451 list = @registry[channel].find_all {|url|
452 regex.match(url.url) || regex.match(url.nick)
455 m.reply "no matches for channel #{channel}"
457 list[0..(max-1)].each do |url|
458 m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
463 plugin = UrlPlugin.new
464 plugin.map 'urls search :channel :limit :string', :action => 'search',
465 :defaults => {:limit => 4},
466 :requirements => {:limit => /^\d+$/},
468 plugin.map 'urls search :limit :string', :action => 'search',
469 :defaults => {:limit => 4},
470 :requirements => {:limit => /^\d+$/},
472 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
473 :requirements => {:limit => /^\d+$/},
475 plugin.map 'urls :limit', :defaults => {:limit => 4},
476 :requirements => {:limit => /^\d+$/},