1 define_structure :Url, :channel, :nick, :time, :url, :info
3 class ::UrlLinkError < RuntimeError
6 class UrlPlugin < Plugin
7 TITLE_RE = /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im
8 LINK_INFO = "[Link Info]"
9 OUR_UNSAFE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}%# ]", false, 'N')
11 BotConfig.register BotConfigIntegerValue.new('url.max_urls',
12 :default => 100, :validate => Proc.new{|v| v > 0},
13 :desc => "Maximum number of urls to store. New urls replace oldest ones.")
14 BotConfig.register BotConfigIntegerValue.new('url.display_link_info',
16 :desc => "Get the title of links pasted to the channel and display it (also tells if the link is broken or the site is down). Do it for at most this many links per line (set to 0 to disable)")
17 BotConfig.register BotConfigBooleanValue.new('url.titles_only',
19 :desc => "Only show info for links that have <title> tags (in other words, don't display info for jpegs, mpegs, etc.)")
20 BotConfig.register BotConfigBooleanValue.new('url.first_par',
22 :desc => "Also try to get the first paragraph of a web page")
23 BotConfig.register BotConfigBooleanValue.new('url.info_on_list',
25 :desc => "Show link info when listing/searching for urls")
30 @registry.set_default(Array.new)
31 unless @bot.config['url.display_link_info'].kind_of?(Integer)
32 @bot.config.items[:'url.display_link_info'].set_string(@bot.config['url.display_link_info'].to_s)
36 def help(plugin, topic="")
37 "urls [<max>=4] => list <max> last urls mentioned in current channel, urls search [<max>=4] <regexp> => search for matching urls. In a private message, you must specify the channel to query, eg. urls <channel> [max], urls search <channel> [max] <regexp>"
40 def get_title_from_html(pagedata)
41 return unless TITLE_RE.match(pagedata)
45 def get_title_for_url(uri_str, nick = nil, channel = nil)
47 url = uri_str.kind_of?(URI) ? uri_str : URI.parse(uri_str)
48 return if url.scheme !~ /https?/
51 logopts[:nick] = nick if nick
52 logopts[:channel] = channel if channel
58 debug "+ getting #{url.request_uri}"
59 @bot.httputil.get_response(url) { |resp|
65 if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
66 # The page is text or HTML, so we can try finding a title and, if
67 # requested, the first par.
69 # We act differently depending on whether we want the first par or
70 # not: in the first case we download the initial part and the parse
71 # it; in the second case we only download as much as we need to find
74 if @bot.config['url.first_par']
75 partial = resp.partial_body(@bot.config['http.info_bytes'])
76 logopts[:title] = title = get_title_from_html(partial)
77 if url.fragment and not url.fragment.empty?
78 fragreg = /.*?<a\s+[^>]*name=["']?#{url.fragment}["']?.*?>/im
79 partial.sub!(fragreg,'')
81 first_par = Utils.ircify_first_html_par(partial, :strip => title)
82 unless first_par.empty?
83 logopts[:extra] = first_par
84 extra << ", #{Bold}text#{Bold}: #{first_par}"
86 call_event(:url_added, url.to_s, logopts)
87 return "#{Bold}title#{Bold}: #{title}#{extra}" if title
89 resp.partial_body(@bot.config['http.info_bytes']) { |part|
90 logopts[:title] = title = get_title_from_html(part)
91 call_event(:url_added, url.to_s, logopts)
92 return "#{Bold}title#{Bold}: #{title}" if title
95 # if nothing was found, provide more basic info, as for non-html pages
100 enc = resp['content-encoding']
101 logopts[:extra] = String.new
102 logopts[:extra] << "Content Type: #{resp['content-type']}"
104 logopts[:extra] << ", encoding: #{enc}"
105 extra << ", #{Bold}encoding#{Bold}: #{enc}"
108 unless @bot.config['url.titles_only']
109 # content doesn't have title, just display info.
110 size = resp['content-length'].gsub(/(\d)(?=\d{3}+(?:\.|$))(\d{3}\..*)?/,'\1,\2') rescue nil
112 logopts[:extra] << ", size: #{size} bytes"
113 size = ", #{Bold}size#{Bold}: #{size} bytes"
115 call_event(:url_added, url.to_s, logopts)
116 return "#{Bold}type#{Bold}: #{resp['content-type']}#{size}#{extra}"
118 call_event(:url_added, url.to_s, logopts)
120 raise UrlLinkError, "getting link (#{resp.code} - #{resp.message})"
124 rescue Exception => e
130 raise "connecting to site/processing information (#{e.message})"
136 return unless m.kind_of?(PrivMessage)
139 escaped = URI.escape(m.message, OUR_UNSAFE)
140 urls = URI.extract(escaped)
141 return if urls.empty?
142 debug "found urls #{urls.inspect}"
143 list = @registry[m.target]
146 debug "working on #{urlstr}"
147 next unless urlstr =~ /^https?:/
149 debug "display link info: #{@bot.config['url.display_link_info']}"
150 if @bot.config['url.display_link_info'] > urls_displayed
153 debug "Getting title for #{urlstr}..."
155 title = get_title_for_url urlstr, m.source.nick, m.channel
157 m.reply "#{LINK_INFO} #{title}", :overlong => :truncate
160 debug "Title not found!"
163 m.reply "Error #{e.message}"
168 # check to see if this url is already listed
169 next if list.find {|u| u.url == urlstr }
171 url = Url.new(m.target, m.sourcenick, Time.new, urlstr, title)
172 debug "#{list.length} urls so far"
173 if list.length > @bot.config['url.max_urls']
176 debug "storing url #{url.url}"
178 debug "#{list.length} urls now"
180 @registry[m.target] = list
183 def reply_urls(opts={})
186 channel = opts[:channel]
188 return unless list and max and m
189 list[0..(max-1)].each do |url|
190 disp = "[#{url.time.strftime('%Y/%m/%d %H:%M:%S')}] <#{url.nick}> #{url.url}"
191 if @bot.config['url.info_on_list']
192 title = url.info || get_title_for_url(url.url, url.nick, channel) rescue nil
193 # If the url info was missing and we now have some, try to upgrade it
194 if channel and title and not url.info
195 ll = @registry[channel]
197 if el = ll.find { |u| u.url == url.url }
199 @registry[channel] = ll
202 disp << " --> #{title}" if title
204 m.reply disp, :overlong => :truncate
209 channel = params[:channel] ? params[:channel] : m.target
210 max = params[:limit].to_i
213 list = @registry[channel]
215 m.reply "no urls seen yet for channel #{channel}"
217 reply_urls :msg => m, :channel => channel, :list => list, :max => max
221 def search(m, params)
222 channel = params[:channel] ? params[:channel] : m.target
223 max = params[:limit].to_i
224 string = params[:string]
227 regex = Regexp.new(string, Regexp::IGNORECASE)
228 list = @registry[channel].find_all {|url|
229 regex.match(url.url) || regex.match(url.nick) ||
230 (@bot.config['url.info_on_list'] && regex.match(url.info))
233 m.reply "no matches for channel #{channel}"
235 reply_urls :msg => m, :channel => channel, :list => list, :max => max
240 plugin = UrlPlugin.new
241 plugin.map 'urls search :channel :limit :string', :action => 'search',
242 :defaults => {:limit => 4},
243 :requirements => {:limit => /^\d+$/},
245 plugin.map 'urls search :limit :string', :action => 'search',
246 :defaults => {:limit => 4},
247 :requirements => {:limit => /^\d+$/},
249 plugin.map 'urls :channel :limit', :defaults => {:limit => 4},
250 :requirements => {:limit => /^\d+$/},
252 plugin.map 'urls :limit', :defaults => {:limit => 4},
253 :requirements => {:limit => /^\d+$/},