X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=951d0513ddb22974e97580d1b28465abde852998;hb=56e4713c5c0498838ed77a409e44fbc3251acde2;hp=ce5cdea4917cabafa55f88a5b8896accf1054898;hpb=9996da20c88d45c34b8f1267b23b83ae1e1bbea3;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index ce5cdea4..951d0513 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -1,3 +1,4 @@ +# encoding: UTF-8 #-- vim:sw=2:et #++ # @@ -6,9 +7,6 @@ # Author:: Tom Gilbert # Author:: Giuseppe "Oblomov" Bilotta # -# Copyright:: (C) 2002-2006 Tom Gilbert -# Copyright:: (C) 2007 Giuseppe Bilotta -# # TODO some of these Utils should be rewritten as extensions to the approriate # standard Ruby classes and accordingly be moved to extends.rb @@ -19,15 +17,6 @@ require 'set' begin require 'htmlentities' rescue LoadError - gems = nil - begin - gems = require 'rubygems' - rescue LoadError - gems = false - end - if gems - retry - else module ::Irc module Utils UNESCAPE_TABLE = { @@ -35,6 +24,7 @@ rescue LoadError 'raquo' => '»', 'quot' => '"', 'apos' => '\'', + 'deg' => '°', 'micro' => 'µ', 'copy' => '©', 'trade' => '™', @@ -44,6 +34,7 @@ rescue LoadError 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', + 'ndash' => '–', 'Agrave' => 'À', 'Aacute' => 'Á', 'Acirc' => 'Â', @@ -111,7 +102,6 @@ rescue LoadError } end end - end end begin @@ -124,15 +114,6 @@ begin end end rescue LoadError - gems = nil - begin - gems = require 'rubygems' - rescue LoadError - gems = false - end - if gems - retry - else module ::Irc module Utils # Some regular expressions to manage HTML data @@ -147,45 +128,30 @@ rescue LoadError # Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class # to mark actual text - AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im + AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im # At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im end end - end end module ::Irc # Miscellaneous useful functions module Utils - @@bot = nil unless defined? @@bot - @@safe_save_dir = nil unless defined?(@@safe_save_dir) - - # The bot instance - def Utils.bot - @@bot - end - - # Set up some Utils routines which depend on the associated bot. - def Utils.bot=(b) - debug "initializing utils" - @@bot = b - @@safe_save_dir = "#{@@bot.botclass}/safe_save" - end - - # Seconds per minute SEC_PER_MIN = 60 # Seconds per hour SEC_PER_HR = SEC_PER_MIN * 60 # Seconds per day SEC_PER_DAY = SEC_PER_HR * 24 + # Seconds per week + SEC_PER_WK = SEC_PER_DAY * 7 # Seconds per (30-day) month SEC_PER_MNTH = SEC_PER_DAY * 30 - # Second per (30*12 = 360 day) year - SEC_PER_YR = SEC_PER_MNTH * 12 + # Second per (non-leap) year + SEC_PER_YR = SEC_PER_DAY * 365 # Auxiliary method needed by Utils.secs_to_string def Utils.secs_to_string_case(array, var, string, plural) @@ -217,7 +183,7 @@ module ::Irc when 0 raise "Empty ret array!" when 1 - return ret.to_s + return ret[0].to_s else return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and ")) end @@ -239,56 +205,130 @@ module ::Irc end end + # Returns human readable time. + # Like: 5 days ago + # about one hour ago + # options + # :start_date, sets the time to measure against, defaults to now + # :date_format, used with to_formatted_s, default to :default + def Utils.timeago(time, options = {}) + start_date = options.delete(:start_date) || Time.new + date_format = options.delete(:date_format) || "%x" + delta = (start_date - time).round + if delta.abs < 2 + _("right now") + else + distance = Utils.age_string(delta) + if delta < 0 + _("%{d} from now") % {:d => distance} + else + _("%{d} ago") % {:d => distance} + end + end + end + + # Converts age in seconds to "nn units". Inspired by previous attempts + # but also gitweb's age_string() sub + def Utils.age_string(secs) + case + when secs < 0 + Utils.age_string(-secs) + when secs > 2*SEC_PER_YR + _("%{m} years") % { :m => secs/SEC_PER_YR } + when secs > 2*SEC_PER_MNTH + _("%{m} months") % { :m => secs/SEC_PER_MNTH } + when secs > 2*SEC_PER_WK + _("%{m} weeks") % { :m => secs/SEC_PER_WK } + when secs > 2*SEC_PER_DAY + _("%{m} days") % { :m => secs/SEC_PER_DAY } + when secs > 2*SEC_PER_HR + _("%{m} hours") % { :m => secs/SEC_PER_HR } + when (20*SEC_PER_MIN..40*SEC_PER_MIN).include?(secs) + _("half an hour") + when (50*SEC_PER_MIN..70*SEC_PER_MIN).include?(secs) + # _("about one hour") + _("an hour") + when (80*SEC_PER_MIN..100*SEC_PER_MIN).include?(secs) + _("an hour and a half") + when secs > 2*SEC_PER_MIN + _("%{m} minutes") % { :m => secs/SEC_PER_MIN } + when secs > 1 + _("%{m} seconds") % { :m => secs } + else + _("one second") + end + end # Execute an external program, returning a String obtained by redirecting - # the program's standards errors and output + # the program's standards errors and output # + # TODO: find a way to expose some common errors (e.g. Errno::NOENT) + # to the caller def Utils.safe_exec(command, *args) - IO.popen("-") { |p| + output = IO.popen("-") { |p| if p - return p.readlines.join("\n") + break p.readlines.join("\n") else begin $stderr.reopen($stdout) exec(command, *args) rescue Exception => e - puts "exec of #{command} led to exception: #{e.pretty_inspect}" - Kernel::exit! 0 + puts "exception #{e.pretty_inspect} trying to run #{command}" + Kernel::exit! 1 end puts "exec of #{command} failed" - Kernel::exit! 0 + Kernel::exit! 1 end } + raise "safe execution of #{command} returned #{$?}" unless $?.success? + return output end - - # Safely (atomically) save to _file_, by passing a tempfile to the block - # and then moving the tempfile to its final location when done. - # - # call-seq: Utils.safe_save(file, &block) - # - def Utils.safe_save(file) - raise 'No safe save directory defined!' if @@safe_save_dir.nil? - basename = File.basename(file) - temp = Tempfile.new(basename,@@safe_save_dir) - temp.binmode - yield temp if block_given? - temp.close - File.rename(temp.path, file) + # Try executing an external program, returning true if the run was successful + # and false otherwise + def Utils.try_exec(command, *args) + IO.popen("-") { |p| + if p.nil? + begin + $stderr.reopen($stdout) + exec(command, *args) + rescue Exception => e + Kernel::exit! 1 + end + Kernel::exit! 1 + else + debug p.readlines + end + } + debug $? + return $?.success? end # Decode HTML entities in the String _str_, using HTMLEntities if the # package was found, or UNESCAPE_TABLE otherwise. # - def Utils.decode_html_entities(str) - if defined? ::HTMLEntities - return HTMLEntities.decode_entities(str) + + if defined? ::HTMLEntities + if ::HTMLEntities.respond_to? :decode_entities + def Utils.decode_html_entities(str) + return HTMLEntities.decode_entities(str) + end else - str.gsub(/(&(.+?);)/) { + @@html_entities = HTMLEntities.new + def Utils.decode_html_entities(str) + return @@html_entities.decode str + end + end + else + def Utils.decode_html_entities(str) + return str.gsub(/(&(.+?);)/) { symbol = $2 # remove the 0-paddng from unicode integers - if symbol =~ /^#(\d+)$/ + case symbol + when /^#x([0-9a-fA-F]+)$/ + symbol = $1.to_i(16).to_s + when /^#(\d+)$/ symbol = $1.to_i.to_s end @@ -426,7 +466,11 @@ module ::Irc # HTML first par grabber without hpricot def Utils.ircify_first_html_par_woh(xml_org, opts={}) - xml = xml_org.gsub(//m, '').gsub(/]*)?>.*?<\/script>/im, "").gsub(/]*)?>.*?<\/style>/im, "") + xml = xml_org.gsub(//m, + "").gsub(/]*)?>.*?<\/script>/im, + "").gsub(/]*)?>.*?<\/style>/im, + "").gsub(/]*)?>.*?<\/select>/im, + "") strip = opts[:strip] strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) @@ -514,16 +558,16 @@ module ::Irc # information is retrieved, and special title/summary # extraction routines are used if possible. # - def Utils.get_html_info(doc, opts={}) + def Utils.get_html_info(bot, doc, opts={}) case doc when String Utils.get_string_html_info(doc, opts) when Net::HTTPResponse - Utils.get_resp_html_info(doc, opts) + Utils.get_resp_html_info(bot, doc, opts) when URI ret = DataStream.new - @@bot.httputil.get_response(doc) { |resp| - ret.replace Utils.get_resp_html_info(resp, opts) + bot.httputil.get_response(doc) { |resp| + ret.replace Utils.get_resp_html_info(bot, resp, opts) } return ret else @@ -540,7 +584,7 @@ module ::Irc # Currently, the only accepted options (in _opts_) are # uri_fragment:: the URI fragment of the original request # full_body:: get the whole body instead of - # @@bot.config['http.info_bytes'] bytes only + # bot.config['http.info_bytes'] bytes only # # Returns a DataStream with the following keys: # text:: the (partial) body @@ -551,7 +595,7 @@ module ::Irc # a Hash whose keys are lowercase forms of the HTTP # header fields, and whose values are Arrays. # - def Utils.get_resp_html_info(resp, opts={}) + def Utils.get_resp_html_info(bot, resp, opts={}) case resp when Net::HTTPSuccess loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil @@ -560,9 +604,9 @@ module ::Irc end ret = DataStream.new(opts.dup) ret[:headers] = resp.to_hash - ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes']) + ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(bot.config['http.info_bytes']) - filtered = Utils.try_htmlinfo_filters(ret) + filtered = Utils.try_htmlinfo_filters(bot, ret) if filtered return filtered @@ -580,17 +624,17 @@ module ::Irc # returns non-nil, its results are merged in _ds_ and returned. Otherwise # nil is returned. # - # The input DataStream shuold have the downloaded HTML as primary key + # The input DataStream should have the downloaded HTML as primary key # (:text) and possibly a :headers key holding the resonse headers. # - def Utils.try_htmlinfo_filters(ds) - filters = @@bot.filter_names(:htmlinfo) + def Utils.try_htmlinfo_filters(bot, ds) + filters = bot.filter_names(:htmlinfo) return nil if filters.empty? cur = nil # TODO filter priority filters.each { |n| - debug "testing filter #{n}" - cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds) + debug "testing htmlinfo filter #{n}" + cur = bot.filter(bot.global_filter_name(n, :htmlinfo), ds) debug "returned #{cur.pretty_inspect}" break if cur } @@ -623,13 +667,14 @@ module ::Irc title = txt.ircify_html_title debug opts if frag = opts[:uri_fragment] and not frag.empty? - fragreg = /]*name=["']?#{frag}["']?[^>]*>/im + fragreg = /]+\s+)?(?:name|id)=["']?#{frag}["']?[^>]*>/im debug fragreg debug txt if txt.match(fragreg) # grab the post-match txt = $' end + debug txt end c_opts = opts.dup c_opts[:strip] ||= title @@ -644,7 +689,7 @@ module ::Irc # If (optional) _opts_ :message is specified, those paragraphs are # echoed as replies to the IRC message passed as _opts_ :message # - def Utils.get_first_pars(urls, count, opts={}) + def Utils.get_first_pars(bot, urls, count, opts={}) idx = 0 msg = opts[:message] retval = Array.new @@ -653,7 +698,7 @@ module ::Irc idx += 1 begin - info = Utils.get_html_info(URI.parse(url), opts) + info = Utils.get_html_info(bot, URI.parse(url), opts) par = info[:content] retval.push(par) @@ -670,7 +715,19 @@ module ::Irc return retval end + # Returns a comma separated list except for the last element + # which is joined in with specified conjunction + # + def Utils.comma_list(words, options={}) + defaults = { :join_with => ", ", :join_last_with => _(" and ") } + opts = defaults.merge(options) + + if words.size < 2 + words.last + else + [words[0..-2].join(opts[:join_with]), words.last].join(opts[:join_last_with]) + end + end + end end - -Irc::Utils.bot = Irc::Bot::Plugins.manager.bot