6 define_structure :Url, :channel, :nick, :time, :url, :info
8 class UrlPlugin < Plugin
9 LINK_INFO = "[Link Info]"
10 OUR_UNSAFE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}%# ]", false, 'N')
12 Config.register Config::IntegerValue.new('url.max_urls',
13 :default => 100, :validate => Proc.new{|v| v > 0},
14 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
15 Config.register Config::IntegerValue.new('url.display_link_info',
17 :desc => "Get the title of links pasted to the channel and display it (also tells if the link is broken or the site is down). Do it for at most this many links per line (set to 0 to disable)")
18 Config.register Config::BooleanValue.new('url.titles_only',
20 :desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
21 Config.register Config::BooleanValue.new('url.first_par',
23 :desc => "Also try to get the first paragraph of a web page")
24 Config.register Config::BooleanValue.new('url.info_on_list',
26 :desc => "Show link info when listing/searching for urls")
27 Config.register Config::ArrayValue.new('url.no_info_hosts',
28 :default => ['localhost', '^192\.168\.', '^10\.', '^127\.', '^172\.(1[6-9]|2\d|31)\.'],
29 :on_change => Proc.new { |bot, v| bot.plugins['url'].reset_no_info_hosts },
30 :desc => "A list of regular expressions matching hosts for which no info should be provided")
35 @registry.set_default(Array.new)
36 unless @bot.config['url.display_link_info'].kind_of?(Integer)
37 @bot.config.items[:'url.display_link_info'].set_string(@bot.config['url.display_link_info'].to_s)
42 def reset_no_info_hosts
43 @no_info_hosts = Regexp.new(@bot.config['url.no_info_hosts'].join('|'), true)
44 debug "no info hosts regexp set to #{@no_info_hosts}"
47 def help(plugin, topic="")
48 "url info <url> => display link info for <url> (set url.display_link_info > 0 if you want the bot to do it automatically when someone writes an url), urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
51 def get_title_from_html(pagedata)
52 return pagedata.ircify_html_title
55 def get_title_for_url(uri_str, opts = {})
57 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
58 return if url.scheme !~ /https?/
60 if url.host =~ @no_info_hosts
61 return "Sorry, info retrieval for #{url.host} is disabled"
70 debug "+ getting info for #{url.request_uri}"
71 info = Utils.get_html_info(url)
75 logopts[:title] = title = info[:title]
78 logopts[:extra] = info[:content]
79 extra << "#{Bold}text#{Bold}: #{info[:content]}" if @bot.config['url.first_par']
81 logopts[:extra] = String.new
82 logopts[:extra] << "Content Type: #{resp['content-type']}"
83 extra << "#{Bold}type#{Bold}: #{resp['content-type']}" unless title
84 if enc = resp['content-encoding']
85 logopts[:extra] << ", encoding: #{enc}"
86 extra << "#{Bold}encoding#{Bold}: #{enc}" if @bot.config['url.first_par'] or not title
89 size = resp['content-length'].first.gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
91 logopts[:extra] << ", size: #{size} bytes"
92 extra << "#{Bold}size#{Bold}: #{size} bytes" if @bot.config['url.first_par'] or not title
101 raise "connecting to site/processing information (#{e.message})"
105 call_event(:url_added, url.to_s, logopts)
107 extra.unshift("#{Bold}title#{Bold}: #{title}")
109 return extra.join(", ") if title or not @bot.config['url.titles_only']
112 def handle_urls(m, urls, display_info=@bot.config['url.display_link_info'])
113 return if urls.empty?
114 debug "found urls #{urls.inspect}"
115 list = m.public? ? @registry[m.target] : nil
116 debug "display link info: #{display_info}"
118 urls.each do |urlstr|
119 debug "working on #{urlstr}"
120 next unless urlstr =~ /^https?:/
122 debug "Getting title for #{urlstr}..."
124 title = get_title_for_url(urlstr,
125 :nick => m.source.nick,
126 :channel => m.channel,
127 :ircline => m.message)
128 debug "Title #{title ? '' : 'not '} found"
130 m.reply "Error #{e.message}"
133 if display_info > urls_displayed
135 m.reply("#{LINK_INFO} #{title}", :overlong => :truncate)
142 # check to see if this url is already listed
143 next if list.find {|u| u.url == urlstr }
145 url = Url.new(m.target, m.sourcenick, Time.new, urlstr, title)
146 debug "#{list.length} urls so far"
147 list.pop if list.length > @bot.config['url.max_urls']
148 debug "storing url #{url.url}"
150 debug "#{list.length} urls now"
152 @registry[m.target] = list
156 escaped = URI.escape(params[:urls].to_s, OUR_UNSAFE)
157 urls = URI.extract(escaped)
158 Thread.new { handle_urls(m, urls, params[:urls].length) }
162 return unless m.kind_of?(PrivMessage)
165 escaped = URI.escape(m.message, OUR_UNSAFE)
166 urls = URI.extract(escaped, ['http', 'https'])
167 return if urls.empty?
168 Thread.new { handle_urls(m, urls) }
171 def reply_urls(opts={})
174 channel = opts[:channel]
176 return unless list and max and m
177 list[0..(max-1)].each do |url|
178 disp = "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
179 if @bot.config['url.info_on_list']
181 get_title_for_url(url.url,
182 :nick => url.nick, :channel => channel) rescue nil
183 # If the url info was missing and we now have some, try to upgrade it
184 if channel and title and not url.info
185 ll = @registry[channel]
187 if el = ll.find { |u| u.url == url.url }
189 @registry[channel] = ll
192 disp << " --> #{title}" if title
194 m.reply disp, :overlong => :truncate
199 channel = params[:channel] ? params[:channel] : m.target
200 max = params[:limit].to_i
203 list = @registry[channel]
205 m.reply "no urls seen yet for channel #{channel}"
207 reply_urls :msg => m, :channel => channel, :list => list, :max => max
211 def search(m, params)
212 channel = params[:channel] ? params[:channel] : m.target
213 max = params[:limit].to_i
214 string = params[:string]
217 regex = Regexp.new(string, Regexp::IGNORECASE)
218 list = @registry[channel].find_all {|url|
219 regex.match(url.url) || regex.match(url.nick) ||
220 (@bot.config['url.info_on_list'] && regex.match(url.info))
223 m.reply "no matches for channel #{channel}"
225 reply_urls :msg => m, :channel => channel, :list => list, :max => max
230 plugin = UrlPlugin.new
231 plugin.map 'urls info *urls', :action => 'info'
232 plugin.map 'url info *urls', :action => 'info'
233 plugin.map 'urls search :channel :limit :string', :action => 'search',
234 :defaults => {:limit => 4},
235 :requirements => {:limit => /^\d+$/},
237 plugin.map 'urls search :limit :string', :action => 'search',
238 :defaults => {:limit => 4},
239 :requirements => {:limit => /^\d+$/},
241 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
242 :requirements => {:limit => /^\d+$/},
244 plugin.map 'urls :limit', :defaults => {:limit => 4},
245 :requirements => {:limit => /^\d+$/},