6 define_structure :Url, :channel, :nick, :time, :url, :info
8 class UrlPlugin < Plugin
9 LINK_INFO = "[Link Info]"
10 OUR_UNSAFE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}%# ]", false, 'N')
12 Config.register Config::IntegerValue.new('url.max_urls',
13 :default => 100, :validate => Proc.new{|v| v > 0},
14 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
15 Config.register Config::IntegerValue.new('url.display_link_info',
17 :desc => "Get the title of links pasted to the channel and display it (also tells if the link is broken or the site is down). Do it for at most this many links per line (set to 0 to disable)")
18 Config.register Config::BooleanValue.new('url.titles_only',
20 :desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
21 Config.register Config::BooleanValue.new('url.first_par',
23 :desc => "Also try to get the first paragraph of a web page")
24 Config.register Config::BooleanValue.new('url.info_on_list',
26 :desc => "Show link info when listing/searching for urls")
27 Config.register Config::ArrayValue.new('url.no_info_hosts',
28 :default => ['localhost', '^192\.168\.', '^10\.', '^127\.', '^172\.(1[6-9]|2\d|31)\.'],
29 :on_change => Proc.new { |bot, v| bot.plugins['url'].reset_no_info_hosts },
30 :desc => "A list of regular expressions matching hosts for which no info should be provided")
35 @registry.set_default(Array.new)
36 unless @bot.config['url.display_link_info'].kind_of?(Integer)
37 @bot.config.items[:'url.display_link_info'].set_string(@bot.config['url.display_link_info'].to_s)
42 def reset_no_info_hosts
43 @no_info_hosts = Regexp.new(@bot.config['url.no_info_hosts'].join('|'), true)
44 debug "no info hosts regexp set to #{@no_info_hosts}"
47 def help(plugin, topic="")
48 "url info <url> => display link info for <url> (set url.display_link_info > 0 if you want the bot to do it automatically when someone writes an url), urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
51 def get_title_from_html(pagedata)
52 return pagedata.ircify_html_title
55 def get_title_for_url(uri_str, opts = {})
57 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
58 return if url.scheme !~ /https?/
60 # also check the ip, the canonical name and the aliases
62 checks = TCPSocket.gethostbyname(url.host)
65 return "Unable to retrieve info for #{url.host}: #{e.message}"
71 unless checks.grep(@no_info_hosts).empty?
72 return "Sorry, info retrieval for #{url.host} (#{checks.first}) is disabled"
81 debug "+ getting info for #{url.request_uri}"
82 info = Utils.get_html_info(url)
86 logopts[:title] = title = info[:title]
89 logopts[:extra] = info[:content]
90 extra << "#{Bold}text#{Bold}: #{info[:content]}" if @bot.config['url.first_par']
92 logopts[:extra] = String.new
93 logopts[:extra] << "Content Type: #{resp['content-type']}"
94 extra << "#{Bold}type#{Bold}: #{resp['content-type']}" unless title
95 if enc = resp['content-encoding']
96 logopts[:extra] << ", encoding: #{enc}"
97 extra << "#{Bold}encoding#{Bold}: #{enc}" if @bot.config['url.first_par'] or not title
100 size = resp['content-length'].first.gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
102 logopts[:extra] << ", size: #{size} bytes"
103 extra << "#{Bold}size#{Bold}: #{size} bytes" if @bot.config['url.first_par'] or not title
106 rescue Exception => e
112 raise "connecting to site/processing information (#{e.message})"
116 call_event(:url_added, url.to_s, logopts)
118 extra.unshift("#{Bold}title#{Bold}: #{title}")
120 return extra.join(", ") if title or not @bot.config['url.titles_only']
123 def handle_urls(m, urls, display_info=@bot.config['url.display_link_info'])
124 return if urls.empty?
125 debug "found urls #{urls.inspect}"
126 list = m.public? ? @registry[m.target] : nil
127 debug "display link info: #{display_info}"
129 urls.each do |urlstr|
130 debug "working on #{urlstr}"
131 next unless urlstr =~ /^https?:/
133 debug "Getting title for #{urlstr}..."
136 title = get_title_for_url(urlstr,
137 :nick => m.source.nick,
138 :channel => m.channel,
139 :ircline => m.message)
140 debug "Title #{title ? '' : 'not '} found"
141 reply = "#{LINK_INFO} #{title}" if title
143 reply = "Error #{e.message}"
146 if display_info > urls_displayed
148 m.reply(reply, :overlong => :truncate)
155 # check to see if this url is already listed
156 next if list.find {|u| u.url == urlstr }
158 url = Url.new(m.target, m.sourcenick, Time.new, urlstr, title)
159 debug "#{list.length} urls so far"
160 list.pop if list.length > @bot.config['url.max_urls']
161 debug "storing url #{url.url}"
163 debug "#{list.length} urls now"
165 @registry[m.target] = list
169 escaped = URI.escape(params[:urls].to_s, OUR_UNSAFE)
170 urls = URI.extract(escaped)
171 Thread.new { handle_urls(m, urls, params[:urls].length) }
175 return unless m.kind_of?(PrivMessage)
178 escaped = URI.escape(m.message, OUR_UNSAFE)
179 urls = URI.extract(escaped, ['http', 'https'])
180 return if urls.empty?
181 Thread.new { handle_urls(m, urls) }
184 def reply_urls(opts={})
187 channel = opts[:channel]
189 return unless list and max and m
190 list[0..(max-1)].each do |url|
191 disp = "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
192 if @bot.config['url.info_on_list']
194 get_title_for_url(url.url,
195 :nick => url.nick, :channel => channel) rescue nil
196 # If the url info was missing and we now have some, try to upgrade it
197 if channel and title and not url.info
198 ll = @registry[channel]
200 if el = ll.find { |u| u.url == url.url }
202 @registry[channel] = ll
205 disp << " --> #{title}" if title
207 m.reply disp, :overlong => :truncate
212 channel = params[:channel] ? params[:channel] : m.target
213 max = params[:limit].to_i
216 list = @registry[channel]
218 m.reply "no urls seen yet for channel #{channel}"
220 reply_urls :msg => m, :channel => channel, :list => list, :max => max
224 def search(m, params)
225 channel = params[:channel] ? params[:channel] : m.target
226 max = params[:limit].to_i
227 string = params[:string]
230 regex = Regexp.new(string, Regexp::IGNORECASE)
231 list = @registry[channel].find_all {|url|
232 regex.match(url.url) || regex.match(url.nick) ||
233 (@bot.config['url.info_on_list'] && regex.match(url.info))
236 m.reply "no matches for channel #{channel}"
238 reply_urls :msg => m, :channel => channel, :list => list, :max => max
243 plugin = UrlPlugin.new
244 plugin.map 'urls info *urls', :action => 'info'
245 plugin.map 'url info *urls', :action => 'info'
246 plugin.map 'urls search :channel :limit :string', :action => 'search',
247 :defaults => {:limit => 4},
248 :requirements => {:limit => /^\d+$/},
250 plugin.map 'urls search :limit :string', :action => 'search',
251 :defaults => {:limit => 4},
252 :requirements => {:limit => /^\d+$/},
254 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
255 :requirements => {:limit => /^\d+$/},
257 plugin.map 'urls :limit', :defaults => {:limit => 4},
258 :requirements => {:limit => /^\d+$/},