data/rbot/plugins/url.rb

   1 require 'net/http'
   2 require 'uri'
   3 require 'cgi'
   4
   5 Url = Struct.new("Url", :channel, :nick, :time, :url)
   6 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
   7
   8 class UrlPlugin < Plugin
   9   BotConfig.register BotConfigIntegerValue.new('url.max_urls',
  10     :default => 100, :validate => Proc.new{|v| v > 0},
  11     :desc => "Maximum number of urls to store. New urls replace oldest ones.")
  12   BotConfig.register BotConfigBooleanValue.new('url.display_link_info',
  13     :default => true,
  14     :desc => "Get the title of any links pasted to the channel and display it (also tells if the link is broken or the site is down)")
  15
  16   def initialize
  17     super
  18     @registry.set_default(Array.new)
  19   end
  20
  21   def help(plugin, topic="")
  22     "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
  23   end
  24
  25   def get_title_from_html(pagedata)
  26     return unless TITLE_RE.match(pagedata)
  27     title = $1.strip.gsub(/\s*\n+\s*/, " ")
  28     title = CGI::unescapeHTML title
  29     title = title[0..255] if title.length > 255
  30     "[Link Info] title: #{title}"
  31   end
  32
  33   def get_title_for_url(uri_str)
  34     # This god-awful mess is what the ruby http library has reduced me to.
  35     # Python's HTTP lib is so much nicer. :~(
  36
  37     puts "+ Getting #{uri_str}"
  38     url = URI.parse(uri_str)
  39     return if url.scheme !~ /https?/
  40
  41     puts "+ connecting to #{url.host}:#{url.port}"\r
  42     http = @bot.httputil.get_proxy(url)
  43     title = http.start do |http|
  44       url.path = '/' if url.path == ''
  45       head = http.request_head(url.path)
  46       case head
  47         when Net::HTTPRedirection then
  48           # call self recursively if this is a redirect
  49           redirect_to = head['location']
  50           puts "+ redirect location: #{redirect_to}"
  51           absolute_uris = URI.extract redirect_to
  52           raise "wtf! redirect = #{redirect_to}" if absolute_uris.size > 1
  53           if absolute_uris.size == 1
  54             url = URI.parse absolute_uris[0]
  55           else
  56             url.path = redirect_to
  57           end
  58           puts "+ whee, redirect to #{url.to_s}!"
  59           title = get_title_for_url(url.to_s)
  60         when Net::HTTPSuccess then
  61           if head['content-type'] =~ /^text\//
  62             # content is 'text/*'
  63             # retrieve the title from the page
  64             puts "+ getting #{url.path}"
  65             response = http.request_get(url.path)
  66             return get_title_from_html(response.body)
  67           else
  68             # content isn't 'text/*'... display info about the file.
  69             size = head['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2')
  70             #lastmod = head['last-modified']
  71             return "[Link Info] type: #{head['content-type']}#{size ? ", size: #{size} bytes" : ""}"
  72           end
  73         when Net::HTTPClientError then
  74           return "[Link Info] Error getting link (#{head.code} - #{head.message})"
  75         when Net::HTTPServerError then
  76           return "[Link Info] Error getting link (#{head.code} - #{head.message})"\r
  77       end
  78     end
  79   rescue SocketError => e
  80     return "[Link Info] Error connecting to site (#{e.message})"
  81   end
  82
  83   def listen(m)
  84     return unless m.kind_of?(PrivMessage)
  85     return if m.address?
  86     # TODO support multiple urls in one line
  87     if m.message =~ /(f|ht)tps?:\/\//
  88       if m.message =~ /((f|ht)tps?:\/\/.*?)(?:\s+|$)/
  89         urlstr = $1
  90         list = @registry[m.target]
  91
  92         if @bot.config['url.say_titles']
  93           debug "Getting title for #{urlstr}..."
  94           title = get_title_for_url urlstr
  95           if title
  96             m.reply title
  97             debug "Title found!"
  98           else
  99             debug "Title not found!"
 100           end
 101         end
 102
 103         # check to see if this url is already listed
 104         return if list.find {|u| u.url == urlstr }
 105
 106         url = Url.new(m.target, m.sourcenick, Time.new, urlstr)
 107         debug "#{list.length} urls so far"
 108         if list.length > @bot.config['url.max_urls']
 109           list.pop
 110         end
 111         debug "storing url #{url.url}"
 112         list.unshift url
 113         debug "#{list.length} urls now"
 114         @registry[m.target] = list
 115       end
 116     end
 117   end
 118
 119   def urls(m, params)
 120     channel = params[:channel] ? params[:channel] : m.target
 121     max = params[:limit].to_i
 122     max = 10 if max > 10
 123     max = 1 if max < 1
 124     list = @registry[channel]
 125     if list.empty?
 126       m.reply "no urls seen yet for channel #{channel}"
 127     else
 128       list[0..(max-1)].each do |url|
 129         m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
 130       end
 131     end
 132   end
 133
 134   def search(m, params)
 135     channel = params[:channel] ? params[:channel] : m.target
 136     max = params[:limit].to_i
 137     string = params[:string]
 138     max = 10 if max > 10
 139     max = 1 if max < 1
 140     regex = Regexp.new(string)
 141     list = @registry[channel].find_all {|url|
 142       regex.match(url.url) || regex.match(url.nick)
 143     }
 144     if list.empty?
 145       m.reply "no matches for channel #{channel}"
 146     else
 147       list[0..(max-1)].each do |url|
 148         m.reply "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
 149       end
 150     end
 151   end
 152 end
 153 plugin = UrlPlugin.new
 154 plugin.map 'urls search :channel :limit :string', :action => 'search',
 155                           :defaults => {:limit => 4},
 156                           :requirements => {:limit => /^\d+$/},
 157                           :public => false
 158 plugin.map 'urls search :limit :string', :action => 'search',
 159                           :defaults => {:limit => 4},
 160                           :requirements => {:limit => /^\d+$/},
 161                           :private => false
 162 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
 163                           :requirements => {:limit => /^\d+$/},
 164                           :public => false
 165 plugin.map 'urls :limit', :defaults => {:limit => 4},
 166                           :requirements => {:limit => /^\d+$/},
 167                           :private => false