4 # :title: rbot utilities provider
6 # Author:: Tom Gilbert <tom@linuxbrit.co.uk>
7 # Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
9 # Copyright:: (C) 2002-2006 Tom Gilbert
10 # Copyright:: (C) 2007 Giuseppe Bilotta
12 # TODO some of these Utils should be rewritten as extensions to the approriate
13 # standard Ruby classes and accordingly be moved to extends.rb
20 $we_have_html_entities_decoder = require 'htmlentities'
22 $we_have_html_entities_decoder = false
45 # extras codes, for future use...
59 'otimes' => '⊗',
68 'Epsilon' => 'Ε',
72 'Upsilon' => 'Υ',
74 'there4' => '∴',
79 'rsaquo' => '›',
101 'lfloor' => '⌊',
108 'clubs' => '♣',
109 'diams' => '♦',
116 'Scaron' => 'Š',
122 'sbquo' => '‚',
135 'infin' => '∞',
140 'thinsp' => ' ',
142 'bdquo' => '„',
149 'mdash' => '—',
151 'permil' => '‰',
156 'forall' => '∀',
158 'rceil' => '⌉',
161 'lambda' => 'λ',
165 'dagger' => '†',
168 'image' => 'ℑ',
169 'alefsym' => 'ℵ',
175 'frasl' => '⁄',
177 'lowast' => '∗',
188 'oline' => '‾',
195 'empty' => '∅',
202 'weierp' => '℘',
207 'omicron' => 'ο',
208 'upsilon' => 'υ',
210 'Lambda' => 'Λ',
217 'scaron' => 'š',
218 'lsquo' => '‘',
226 'hellip' => '…',
230 'rfloor' => '⌋',
232 'crarr' => '↵',
234 'notin' => '∉',
235 'exist' => '∃',
238 'Dagger' => '‡',
239 'oplus' => '⊕',
245 'lsaquo' => '‹',
247 'Omicron' => 'Ο',
262 'sigmaf' => 'ς',
264 'minus' => '−',
267 'epsilon' => 'ε',
278 'spades' => '♠',
279 'rsquo' => '’',
283 'thetasym' => 'ϑ',
287 'ldquo' => '“',
288 'hearts' => '♥',
300 # miscellaneous useful functions
303 SEC_PER_HR = SEC_PER_MIN * 60
304 SEC_PER_DAY = SEC_PER_HR * 24
305 SEC_PER_MNTH = SEC_PER_DAY * 30
306 SEC_PER_YR = SEC_PER_MNTH * 12
308 def Utils.secs_to_string_case(array, var, string, plural)
311 array << "1 #{string}"
313 array << "#{var} #{plural}"
317 # turn a number of seconds into a human readable string, e.g
318 # 2 days, 3 hours, 18 minutes, 10 seconds
319 def Utils.secs_to_string(secs)
321 years, secs = secs.divmod SEC_PER_YR
322 secs_to_string_case(ret, years, "year", "years") if years > 0
323 months, secs = secs.divmod SEC_PER_MNTH
324 secs_to_string_case(ret, months, "month", "months") if months > 0
325 days, secs = secs.divmod SEC_PER_DAY
326 secs_to_string_case(ret, days, "day", "days") if days > 0
327 hours, secs = secs.divmod SEC_PER_HR
328 secs_to_string_case(ret, hours, "hour", "hours") if hours > 0
329 mins, secs = secs.divmod SEC_PER_MIN
330 secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0
332 secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty?
335 raise "Empty ret array!"
339 return [ret[0, ret.length-1].join(", ") , ret[-1]].join(" and ")
344 def Utils.safe_exec(command, *args)
347 return p.readlines.join("\n")
352 rescue Exception => e
353 puts "exec of #{command} led to exception: #{e.inspect}"
356 puts "exec of #{command} failed"
363 @@safe_save_dir = nil unless defined?(@@safe_save_dir)
364 def Utils.set_safe_save_dir(str)
365 @@safe_save_dir = str.dup
368 def Utils.safe_save(file)
369 raise 'No safe save directory defined!' if @@safe_save_dir.nil?
370 basename = File.basename(file)
371 temp = Tempfile.new(basename,@@safe_save_dir)
373 yield temp if block_given?
375 File.rename(temp.path, file)
379 # returns a string containing the result of an HTTP GET on the uri
380 def Utils.http_get(uristr, readtimeout=8, opentimeout=4)
382 # ruby 1.7 or better needed for this (or 1.6 and debian unstable)
383 Net::HTTP.version_1_2
384 # (so we support the 1_1 api anyway, avoids problems)
386 uri = URI.parse uristr
389 query += "?#{uri.query}"
394 if(ENV['http_proxy'] && proxy_uri = URI.parse(ENV['http_proxy']))
395 proxy_host = proxy_uri.host
396 proxy_port = proxy_uri.port
400 http = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port)
401 http.open_timeout = opentimeout
402 http.read_timeout = readtimeout
405 resp = http.get(query)
406 if resp.code == "200"
412 error "Utils.http_get exception: #{e.inspect}, while trying to get #{uristr}"
417 def Utils.decode_html_entities(str)
418 if $we_have_html_entities_decoder
419 return HTMLEntities.decode_entities(str)
421 str.gsub(/(&(.+?);)/) {
423 # remove the 0-paddng from unicode integers
425 symbol = "##{$1.to_i.to_s}"
428 # output the symbol's irc-translated character, or a * if it's unknown
429 UNESCAPE_TABLE[symbol] || '*'
434 H1_REGEX = /<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im
435 PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/p>/im
436 # Try to grab and IRCify the first HTML par (<p> tag) in the given string.
437 # If possible, grab the one after the first h1 heading
439 # It is possible to pass some options to determine how the stripping
440 # occurs. Currently, only one option is supported:
441 # * :strip => Regex or String to strip at the beginning of the obtained
444 def Utils.ircify_first_html_par(xml, opts={})
447 strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
449 header_found = xml.match(H1_REGEX)
452 debug "Found header: #{header_found[1].inspect}"
454 candidate = header_found[PAR_REGEX]
455 break unless candidate
456 txt = candidate.ircify_html
458 txt.sub!(strip, '') if strip
462 # If we haven't found a first par yet, try to get it from the whole
467 candidate = header_found[PAR_REGEX]
468 break unless candidate
469 txt = candidate.ircify_html
471 txt.sub!(strip, '') if strip
477 # Get the first pars of the first _count_ _urls_.
478 # The pages are downloaded using an HttpUtil service passed as _opts_ :http_util,
479 # and echoed as replies to the IRC message passed as _opts_ :message.
481 def Utils.get_first_pars(urls, count, opts={})
484 while count > 0 and urls.length > 0
488 # FIXME what happens if some big file is returned? We should share
489 # code with the url plugin to only retrieve partial file content!
490 xml = opts[:http_util].get_cached(url)
492 debug "Unable to retrieve #{url}"
495 par = Utils.ircify_first_html_par(xml, opts)
497 debug "No first par found\n#{xml}"
498 # FIXME only do this if the 'url' plugin is loaded
499 # TODO even better, put the code here
500 # par = @bot.plugins['url'].get_title_from_html(xml)
503 msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg