6 define_structure :Url, :channel, :nick, :time, :url, :info
8 class ::UrlLinkError < RuntimeError
11 class UrlPlugin < Plugin
12 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
13 LINK_INFO = "[Link Info]"
14 OUR_UNSAFE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}%# ]", false, 'N')
16 BotConfig.register BotConfigIntegerValue.new('url.max_urls',
17 :default => 100, :validate => Proc.new{|v| v > 0},
18 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
19 BotConfig.register BotConfigIntegerValue.new('url.display_link_info',
21 :desc => "Get the title of links pasted to the channel and display it (also tells if the link is broken or the site is down). Do it for at most this many links per line (set to 0 to disable)")
22 BotConfig.register BotConfigBooleanValue.new('url.titles_only',
24 :desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
25 BotConfig.register BotConfigBooleanValue.new('url.first_par',
27 :desc => "Also try to get the first paragraph of a web page")
28 BotConfig.register BotConfigBooleanValue.new('url.info_on_list',
30 :desc => "Show link info when listing/searching for urls")
31 BotConfig.register BotConfigArrayValue.new('url.no_info_hosts',
32 :default => ['localhost', '^192\.168\.', '^10\.', '^127\.0\.0\.1', '^172\.(1[6-9]|2\d|31)\.'],
33 :on_change => Proc.new { |bot, v| bot.plugins['url'].reset_no_info_hosts },
34 :desc => "A list of regular expressions matching hosts for which no info should be provided")
39 @registry.set_default(Array.new)
40 unless @bot.config['url.display_link_info'].kind_of?(Integer)
41 @bot.config.items[:'url.display_link_info'].set_string(@bot.config['url.display_link_info'].to_s)
46 def reset_no_info_hosts
47 @no_info_hosts = Regexp.new(@bot.config['url.no_info_hosts'].join('|'), true)
48 debug "no info hosts regexp set to #{@no_info_hosts}"
51 def help(plugin, topic="")
52 "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
55 def get_title_from_html(pagedata)
56 return unless TITLE_RE.match(pagedata)
60 def get_title_for_url(uri_str, nick = nil, channel = nil, ircline = nil)
62 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
63 return if url.scheme !~ /https?/
65 if url.host =~ @no_info_hosts
66 return "Sorry, info retrieval for #{url.host} is disabled"
70 logopts[:nick] = nick if nick
71 logopts[:channel] = channel if channel
72 logopts[:ircline] = ircline if ircline
78 debug "+ getting #{url.request_uri}"
79 @bot.httputil.get_response(url) { |resp|
85 if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
86 # The page is text or HTML, so we can try finding a title and, if
87 # requested, the first par.
89 # We act differently depending on whether we want the first par or
90 # not: in the first case we download the initial part and the parse
91 # it; in the second case we only download as much as we need to find
94 if @bot.config['url.first_par']
95 partial = resp.partial_body(@bot.config['http.info_bytes'])
96 logopts[:title] = title = get_title_from_html(partial)
97 if url.fragment and not url.fragment.empty?
98 fragreg = /.*?<a\s+[^>]*name=["']?#{url.fragment}["']?.*?>/im
99 partial.sub!(fragreg,'')
101 first_par = Utils.ircify_first_html_par(partial, :strip => title)
102 unless first_par.empty?
103 logopts[:extra] = first_par
104 extra << ", #{Bold}text#{Bold}: #{first_par}"
106 call_event(:url_added, url.to_s, logopts)
107 return "#{Bold}title#{Bold}: #{title}#{extra}" if title
109 resp.partial_body(@bot.config['http.info_bytes']) { |part|
110 logopts[:title] = title = get_title_from_html(part)
111 call_event(:url_added, url.to_s, logopts)
112 return "#{Bold}title#{Bold}: #{title}" if title
115 # if nothing was found, provide more basic info, as for non-html pages
120 enc = resp['content-encoding']
121 logopts[:extra] = String.new
122 logopts[:extra] << "Content Type: #{resp['content-type']}"
124 logopts[:extra] << ", encoding: #{enc}"
125 extra << ", #{Bold}encoding#{Bold}: #{enc}"
128 unless @bot.config['url.titles_only']
129 # content doesn't have title, just display info.
130 size = resp['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
132 logopts[:extra] << ", size: #{size} bytes"
133 size = ", #{Bold}size#{Bold}: #{size} bytes"
135 call_event(:url_added, url.to_s, logopts)
136 return "#{Bold}type#{Bold}: #{resp['content-type']}#{size}#{extra}"
138 call_event(:url_added, url.to_s, logopts)
140 raise UrlLinkError, "getting link (#{resp.code} - #{resp.message})"
144 rescue Exception => e
150 raise "connecting to site/processing information (#{e.message})"
156 return unless m.kind_of?(PrivMessage)
159 escaped = URI.escape(m.message, OUR_UNSAFE)
160 urls = URI.extract(escaped)
161 return if urls.empty?
162 debug "found urls #{urls.inspect}"
163 list = @registry[m.target]
166 debug "working on #{urlstr}"
167 next unless urlstr =~ /^https?:/
169 debug "display link info: #{@bot.config['url.display_link_info']}"
170 if @bot.config['url.display_link_info'] > urls_displayed
173 debug "Getting title for #{urlstr}..."
175 title = get_title_for_url urlstr, m.source.nick, m.channel, m.message
177 m.reply "#{LINK_INFO} #{title}", :overlong => :truncate
180 debug "Title not found!"
183 m.reply "Error #{e.message}"
188 # check to see if this url is already listed
189 next if list.find {|u| u.url == urlstr }
191 url = Url.new(m.target, m.sourcenick, Time.new, urlstr, title)
192 debug "#{list.length} urls so far"
193 if list.length > @bot.config['url.max_urls']
196 debug "storing url #{url.url}"
198 debug "#{list.length} urls now"
200 @registry[m.target] = list
203 def reply_urls(opts={})
206 channel = opts[:channel]
208 return unless list and max and m
209 list[0..(max-1)].each do |url|
210 disp = "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
211 if @bot.config['url.info_on_list']
212 title = url.info || get_title_for_url(url.url, url.nick, channel) rescue nil
213 # If the url info was missing and we now have some, try to upgrade it
214 if channel and title and not url.info
215 ll = @registry[channel]
217 if el = ll.find { |u| u.url == url.url }
219 @registry[channel] = ll
222 disp << " --> #{title}" if title
224 m.reply disp, :overlong => :truncate
229 channel = params[:channel] ? params[:channel] : m.target
230 max = params[:limit].to_i
233 list = @registry[channel]
235 m.reply "no urls seen yet for channel #{channel}"
237 reply_urls :msg => m, :channel => channel, :list => list, :max => max
241 def search(m, params)
242 channel = params[:channel] ? params[:channel] : m.target
243 max = params[:limit].to_i
244 string = params[:string]
247 regex = Regexp.new(string, Regexp::IGNORECASE)
248 list = @registry[channel].find_all {|url|
249 regex.match(url.url) || regex.match(url.nick) ||
250 (@bot.config['url.info_on_list'] && regex.match(url.info))
253 m.reply "no matches for channel #{channel}"
255 reply_urls :msg => m, :channel => channel, :list => list, :max => max
260 plugin = UrlPlugin.new
261 plugin.map 'urls search :channel :limit :string', :action => 'search',
262 :defaults => {:limit => 4},
263 :requirements => {:limit => /^\d+$/},
265 plugin.map 'urls search :limit :string', :action => 'search',
266 :defaults => {:limit => 4},
267 :requirements => {:limit => /^\d+$/},
269 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
270 :requirements => {:limit => /^\d+$/},
272 plugin.map 'urls :limit', :defaults => {:limit => 4},
273 :requirements => {:limit => /^\d+$/},