6 $we_have_html_entities_decoder = require 'htmlentities'
8 $we_have_html_entities_decoder = false
31 # extras codes, for future use...
45 'otimes' => '⊗',
54 'Epsilon' => 'Ε',
58 'Upsilon' => 'Υ',
60 'there4' => '∴',
65 'rsaquo' => '›',
87 'lfloor' => '⌊',
102 'Scaron' => 'Š',
108 'sbquo' => '‚',
121 'infin' => '∞',
126 'thinsp' => ' ',
128 'bdquo' => '„',
135 'mdash' => '—',
137 'permil' => '‰',
142 'forall' => '∀',
144 'rceil' => '⌉',
147 'lambda' => 'λ',
151 'dagger' => '†',
154 'image' => 'ℑ',
155 'alefsym' => 'ℵ',
161 'frasl' => '⁄',
163 'lowast' => '∗',
174 'oline' => '‾',
181 'empty' => '∅',
188 'weierp' => '℘',
193 'omicron' => 'ο',
194 'upsilon' => 'υ',
196 'Lambda' => 'Λ',
203 'scaron' => 'š',
204 'lsquo' => '‘',
212 'hellip' => '…',
216 'rfloor' => '⌋',
218 'crarr' => '↵',
220 'notin' => '∉',
221 'exist' => '∃',
224 'Dagger' => '‡',
225 'oplus' => '⊕',
231 'lsaquo' => '‹',
233 'Omicron' => 'Ο',
248 'sigmaf' => 'ς',
250 'minus' => '−',
253 'epsilon' => 'ε',
264 'spades' => '♠',
265 'rsquo' => '’',
269 'thetasym' => 'ϑ',
273 'ldquo' => '“',
274 'hearts' => '♥',
286 # miscellaneous useful functions
289 SEC_PER_HR = SEC_PER_MIN * 60
290 SEC_PER_DAY = SEC_PER_HR * 24
291 SEC_PER_MNTH = SEC_PER_DAY * 30
292 SEC_PER_YR = SEC_PER_MNTH * 12
294 def Utils.secs_to_string_case(array, var, string, plural)
297 array << "1 #{string}"
299 array << "#{var} #{plural}"
303 # turn a number of seconds into a human readable string, e.g
304 # 2 days, 3 hours, 18 minutes, 10 seconds
305 def Utils.secs_to_string(secs)
307 years, secs = secs.divmod SEC_PER_YR
308 secs_to_string_case(ret, years, "year", "years") if years > 0
309 months, secs = secs.divmod SEC_PER_MNTH
310 secs_to_string_case(ret, months, "month", "months") if months > 0
311 days, secs = secs.divmod SEC_PER_DAY
312 secs_to_string_case(ret, days, "day", "days") if days > 0
313 hours, secs = secs.divmod SEC_PER_HR
314 secs_to_string_case(ret, hours, "hour", "hours") if hours > 0
315 mins, secs = secs.divmod SEC_PER_MIN
316 secs_to_string_case(ret, mins, "minute", "minutes") if mins > 0
317 secs_to_string_case(ret, secs, "second", "seconds") if secs > 0 or ret.empty?
320 raise "Empty ret array!"
324 return [ret[0, ret.length-1].join(", ") , ret[-1]].join(" and ")
329 def Utils.safe_exec(command, *args)
332 return p.readlines.join("\n")
337 rescue Exception => e
338 puts "exec of #{command} led to exception: #{e.inspect}"
341 puts "exec of #{command} failed"
348 @@safe_save_dir = nil
349 def Utils.set_safe_save_dir(str)
350 @@safe_save_dir = str.dup
353 def Utils.safe_save(file)
354 raise 'No safe save directory defined!' if @@safe_save_dir.nil?
355 basename = File.basename(file)
356 temp = Tempfile.new(basename,@@safe_save_dir)
358 yield temp if block_given?
360 File.rename(temp.path, file)
364 # returns a string containing the result of an HTTP GET on the uri
365 def Utils.http_get(uristr, readtimeout=8, opentimeout=4)
367 # ruby 1.7 or better needed for this (or 1.6 and debian unstable)
368 Net::HTTP.version_1_2
369 # (so we support the 1_1 api anyway, avoids problems)
371 uri = URI.parse uristr
374 query += "?#{uri.query}"
379 if(ENV['http_proxy'] && proxy_uri = URI.parse(ENV['http_proxy']))
380 proxy_host = proxy_uri.host
381 proxy_port = proxy_uri.port
385 http = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port)
386 http.open_timeout = opentimeout
387 http.read_timeout = readtimeout
390 resp = http.get(query)
391 if resp.code == "200"
397 error "Utils.http_get exception: #{e.inspect}, while trying to get #{uristr}"
402 def Utils.decode_html_entities(str)
403 if $we_have_html_entities_decoder
404 return HTMLEntities.decode_entities(str)
406 str.gsub(/(&(.+?);)/) {
408 # remove the 0-paddng from unicode integers
410 symbol = "##{$1.to_i.to_s}"
413 # output the symbol's irc-translated character, or a * if it's unknown
414 UNESCAPE_TABLE[symbol] || '*'
419 H1_REGEX = /<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im
420 PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/p>/im
421 # Try to grab and IRCify the first HTML par (<p> tag) in the given string.
422 # If possible, grab the one after the first h1 heading
423 def Utils.ircify_first_html_par(xml)
424 header_found = xml.match(H1_REGEX)
427 debug "Found header: #{header_found[1].inspect}"
430 candidate = header_found[PAR_REGEX]
431 break unless candidate
432 txt = candidate.ircify_html
435 # If we haven't found a first par yet, try to get it from the whole
440 candidate = header_found[PAR_REGEX]
441 break unless candidate
442 txt = candidate.ircify_html
449 # Get the first pars of the first _count_ _urls_.
450 # The pages are downloaded using an HttpUtil service passed as _opts_ :http_util,
451 # and echoed as replies to the IRC message passed as _opts_ :message.
453 def Utils.get_first_pars(urls, count, opts={})
456 while count > 0 and urls.length > 0
460 # FIXME what happens if some big file is returned? We should share
461 # code with the url plugin to only retrieve partial file content!
462 xml = opts[:http_util].get_cached(url)
464 debug "Unable to retrieve #{url}"
467 debug "Retrieved #{url}"
469 par = Utils.ircify_first_html_par(xml)
471 debug "No first par found\n#{xml}"
472 # FIXME only do this if the 'url' plugin is loaded
473 # TODO even better, put the code here
474 # par = @bot.plugins['url'].get_title_from_html(xml)
477 msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg