6 define_structure :Url, :channel, :nick, :time, :url, :info
8 class UrlPlugin < Plugin
9 LINK_INFO = "[Link Info]"
10 OUR_UNSAFE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}%# ]", false, 'N')
12 Config.register Config::IntegerValue.new('url.max_urls',
13 :default => 100, :validate => Proc.new{|v| v > 0},
14 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
15 Config.register Config::IntegerValue.new('url.display_link_info',
17 :desc => "Get the title of links pasted to the channel and display it (also tells if the link is broken or the site is down). Do it for at most this many links per line (set to 0 to disable)")
18 Config.register Config::BooleanValue.new('url.titles_only',
20 :desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
21 Config.register Config::BooleanValue.new('url.first_par',
23 :desc => "Also try to get the first paragraph of a web page")
24 Config.register Config::BooleanValue.new('url.info_on_list',
26 :desc => "Show link info when listing/searching for urls")
27 Config.register Config::ArrayValue.new('url.no_info_hosts',
28 :default => ['localhost', '^192\.168\.', '^10\.', '^127\.', '^172\.(1[6-9]|2\d|31)\.'],
29 :on_change => Proc.new { |bot, v| bot.plugins['url'].reset_no_info_hosts },
30 :desc => "A list of regular expressions matching hosts for which no info should be provided")
35 @registry.set_default(Array.new)
36 unless @bot.config['url.display_link_info'].kind_of?(Integer)
37 @bot.config.items[:'url.display_link_info'].set_string(@bot.config['url.display_link_info'].to_s)
42 def reset_no_info_hosts
43 @no_info_hosts = Regexp.new(@bot.config['url.no_info_hosts'].join('|'), true)
44 debug "no info hosts regexp set to #{@no_info_hosts}"
47 def help(plugin, topic="")
48 "url info <url> => display link info for <url> (set url.display_link_info > 0 if you want the bot to do it automatically when someone writes an url), urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
51 def get_title_from_html(pagedata)
52 return pagedata.ircify_html_title
55 def get_title_for_url(uri_str, opts = {})
57 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
58 return if url.scheme !~ /https?/
60 if url.host =~ @no_info_hosts
61 return "Sorry, info retrieval for #{url.host} is disabled"
70 debug "+ getting info for #{url.request_uri}"
71 info = Utils.get_html_info(url)
75 logopts[:title] = title = info[:title]
78 logopts[:extra] = info[:content]
79 extra << "#{Bold}text#{Bold}: #{info[:content]}" if @bot.config['url.first_par']
81 logopts[:extra] = String.new
82 logopts[:extra] << "Content Type: #{resp['content-type']}"
83 extra << "#{Bold}type#{Bold}: #{resp['content-type']}" unless title
84 if enc = resp['content-encoding']
85 logopts[:extra] << ", encoding: #{enc}"
86 extra << "#{Bold}encoding#{Bold}: #{enc}" if @bot.config['url.first_par'] or not title
89 size = resp['content-length'].first.gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
91 logopts[:extra] << ", size: #{size} bytes"
92 extra << "#{Bold}size#{Bold}: #{size} bytes" if @bot.config['url.first_par'] or not title
101 raise "connecting to site/processing information (#{e.message})"
105 call_event(:url_added, url.to_s, logopts)
107 extra.unshift("#{Bold}title#{Bold}: #{title}")
109 return extra.join(", ") if title or not @bot.config['url.titles_only']
112 def handle_urls(m, urls, display_info=@bot.config['url.display_link_info'])
113 return if urls.empty?
114 debug "found urls #{urls.inspect}"
115 list = m.public? ? @registry[m.target] : nil
116 debug "display link info: #{display_info}"
118 urls.each do |urlstr|
119 debug "working on #{urlstr}"
120 next unless urlstr =~ /^https?:/
122 debug "Getting title for #{urlstr}..."
125 title = get_title_for_url(urlstr,
126 :nick => m.source.nick,
127 :channel => m.channel,
128 :ircline => m.message)
129 debug "Title #{title ? '' : 'not '} found"
130 reply = "#{LINK_INFO} #{title}" if title
132 reply = "Error #{e.message}"
135 if display_info > urls_displayed
137 m.reply(reply, :overlong => :truncate)
144 # check to see if this url is already listed
145 next if list.find {|u| u.url == urlstr }
147 url = Url.new(m.target, m.sourcenick, Time.new, urlstr, title)
148 debug "#{list.length} urls so far"
149 list.pop if list.length > @bot.config['url.max_urls']
150 debug "storing url #{url.url}"
152 debug "#{list.length} urls now"
154 @registry[m.target] = list
158 escaped = URI.escape(params[:urls].to_s, OUR_UNSAFE)
159 urls = URI.extract(escaped)
160 Thread.new { handle_urls(m, urls, params[:urls].length) }
164 return unless m.kind_of?(PrivMessage)
167 escaped = URI.escape(m.message, OUR_UNSAFE)
168 urls = URI.extract(escaped, ['http', 'https'])
169 return if urls.empty?
170 Thread.new { handle_urls(m, urls) }
173 def reply_urls(opts={})
176 channel = opts[:channel]
178 return unless list and max and m
179 list[0..(max-1)].each do |url|
180 disp = "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
181 if @bot.config['url.info_on_list']
183 get_title_for_url(url.url,
184 :nick => url.nick, :channel => channel) rescue nil
185 # If the url info was missing and we now have some, try to upgrade it
186 if channel and title and not url.info
187 ll = @registry[channel]
189 if el = ll.find { |u| u.url == url.url }
191 @registry[channel] = ll
194 disp << " --> #{title}" if title
196 m.reply disp, :overlong => :truncate
201 channel = params[:channel] ? params[:channel] : m.target
202 max = params[:limit].to_i
205 list = @registry[channel]
207 m.reply "no urls seen yet for channel #{channel}"
209 reply_urls :msg => m, :channel => channel, :list => list, :max => max
213 def search(m, params)
214 channel = params[:channel] ? params[:channel] : m.target
215 max = params[:limit].to_i
216 string = params[:string]
219 regex = Regexp.new(string, Regexp::IGNORECASE)
220 list = @registry[channel].find_all {|url|
221 regex.match(url.url) || regex.match(url.nick) ||
222 (@bot.config['url.info_on_list'] && regex.match(url.info))
225 m.reply "no matches for channel #{channel}"
227 reply_urls :msg => m, :channel => channel, :list => list, :max => max
232 plugin = UrlPlugin.new
233 plugin.map 'urls info *urls', :action => 'info'
234 plugin.map 'url info *urls', :action => 'info'
235 plugin.map 'urls search :channel :limit :string', :action => 'search',
236 :defaults => {:limit => 4},
237 :requirements => {:limit => /^\d+$/},
239 plugin.map 'urls search :limit :string', :action => 'search',
240 :defaults => {:limit => 4},
241 :requirements => {:limit => /^\d+$/},
243 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
244 :requirements => {:limit => /^\d+$/},
246 plugin.map 'urls :limit', :defaults => {:limit => 4},
247 :requirements => {:limit => /^\d+$/},