X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=951d0513ddb22974e97580d1b28465abde852998;hb=56e4713c5c0498838ed77a409e44fbc3251acde2;hp=f9912ebb0b367b8e1e450b80b0e340644080ad44;hpb=ab0c959bc7305d853e4a52b2a0f25a5fc78f4bfb;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index f9912ebb..951d0513 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -1,3 +1,4 @@ +# encoding: UTF-8 #-- vim:sw=2:et #++ # @@ -16,15 +17,6 @@ require 'set' begin require 'htmlentities' rescue LoadError - gems = nil - begin - gems = require 'rubygems' - rescue LoadError - gems = false - end - if gems - retry - else module ::Irc module Utils UNESCAPE_TABLE = { @@ -32,6 +24,7 @@ rescue LoadError 'raquo' => '»', 'quot' => '"', 'apos' => '\'', + 'deg' => '°', 'micro' => 'µ', 'copy' => '©', 'trade' => '™', @@ -41,6 +34,7 @@ rescue LoadError 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', + 'ndash' => '–', 'Agrave' => 'À', 'Aacute' => 'Á', 'Acirc' => 'Â', @@ -108,7 +102,6 @@ rescue LoadError } end end - end end begin @@ -121,15 +114,6 @@ begin end end rescue LoadError - gems = nil - begin - gems = require 'rubygems' - rescue LoadError - gems = false - end - if gems - retry - else module ::Irc module Utils # Some regular expressions to manage HTML data @@ -144,35 +128,18 @@ rescue LoadError # Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class # to mark actual text - AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im + AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im # At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im end end - end end module ::Irc # Miscellaneous useful functions module Utils - @@bot = nil unless defined? @@bot - @@safe_save_dir = nil unless defined?(@@safe_save_dir) - - # The bot instance - def Utils.bot - @@bot - end - - # Set up some Utils routines which depend on the associated bot. - def Utils.bot=(b) - debug "initializing utils" - @@bot = b - @@safe_save_dir = @@bot.path('safe_save') - end - - # Seconds per minute SEC_PER_MIN = 60 # Seconds per hour @@ -216,7 +183,7 @@ module ::Irc when 0 raise "Empty ret array!" when 1 - return ret.to_s + return ret[0].to_s else return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and ")) end @@ -295,52 +262,73 @@ module ::Irc # Execute an external program, returning a String obtained by redirecting # the program's standards errors and output # + # TODO: find a way to expose some common errors (e.g. Errno::NOENT) + # to the caller def Utils.safe_exec(command, *args) - IO.popen("-") { |p| + output = IO.popen("-") { |p| if p - return p.readlines.join("\n") + break p.readlines.join("\n") else begin $stderr.reopen($stdout) exec(command, *args) rescue Exception => e - puts "exec of #{command} led to exception: #{e.pretty_inspect}" - Kernel::exit! 0 + puts "exception #{e.pretty_inspect} trying to run #{command}" + Kernel::exit! 1 end puts "exec of #{command} failed" - Kernel::exit! 0 + Kernel::exit! 1 end } + raise "safe execution of #{command} returned #{$?}" unless $?.success? + return output end - - # Safely (atomically) save to _file_, by passing a tempfile to the block - # and then moving the tempfile to its final location when done. - # - # call-seq: Utils.safe_save(file, &block) - # - def Utils.safe_save(file) - raise 'No safe save directory defined!' if @@safe_save_dir.nil? - basename = File.basename(file) - temp = Tempfile.new(basename,@@safe_save_dir) - temp.binmode - yield temp if block_given? - temp.close - File.rename(temp.path, file) + # Try executing an external program, returning true if the run was successful + # and false otherwise + def Utils.try_exec(command, *args) + IO.popen("-") { |p| + if p.nil? + begin + $stderr.reopen($stdout) + exec(command, *args) + rescue Exception => e + Kernel::exit! 1 + end + Kernel::exit! 1 + else + debug p.readlines + end + } + debug $? + return $?.success? end # Decode HTML entities in the String _str_, using HTMLEntities if the # package was found, or UNESCAPE_TABLE otherwise. # - def Utils.decode_html_entities(str) - if defined? ::HTMLEntities - return HTMLEntities.decode_entities(str) + + if defined? ::HTMLEntities + if ::HTMLEntities.respond_to? :decode_entities + def Utils.decode_html_entities(str) + return HTMLEntities.decode_entities(str) + end else - str.gsub(/(&(.+?);)/) { + @@html_entities = HTMLEntities.new + def Utils.decode_html_entities(str) + return @@html_entities.decode str + end + end + else + def Utils.decode_html_entities(str) + return str.gsub(/(&(.+?);)/) { symbol = $2 # remove the 0-paddng from unicode integers - if symbol =~ /^#(\d+)$/ + case symbol + when /^#x([0-9a-fA-F]+)$/ + symbol = $1.to_i(16).to_s + when /^#(\d+)$/ symbol = $1.to_i.to_s end @@ -478,7 +466,11 @@ module ::Irc # HTML first par grabber without hpricot def Utils.ircify_first_html_par_woh(xml_org, opts={}) - xml = xml_org.gsub(//m, '').gsub(/]*)?>.*?<\/script>/im, "").gsub(/]*)?>.*?<\/style>/im, "") + xml = xml_org.gsub(//m, + "").gsub(/]*)?>.*?<\/script>/im, + "").gsub(/]*)?>.*?<\/style>/im, + "").gsub(/]*)?>.*?<\/select>/im, + "") strip = opts[:strip] strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) @@ -566,16 +558,16 @@ module ::Irc # information is retrieved, and special title/summary # extraction routines are used if possible. # - def Utils.get_html_info(doc, opts={}) + def Utils.get_html_info(bot, doc, opts={}) case doc when String Utils.get_string_html_info(doc, opts) when Net::HTTPResponse - Utils.get_resp_html_info(doc, opts) + Utils.get_resp_html_info(bot, doc, opts) when URI ret = DataStream.new - @@bot.httputil.get_response(doc) { |resp| - ret.replace Utils.get_resp_html_info(resp, opts) + bot.httputil.get_response(doc) { |resp| + ret.replace Utils.get_resp_html_info(bot, resp, opts) } return ret else @@ -592,7 +584,7 @@ module ::Irc # Currently, the only accepted options (in _opts_) are # uri_fragment:: the URI fragment of the original request # full_body:: get the whole body instead of - # @@bot.config['http.info_bytes'] bytes only + # bot.config['http.info_bytes'] bytes only # # Returns a DataStream with the following keys: # text:: the (partial) body @@ -603,7 +595,7 @@ module ::Irc # a Hash whose keys are lowercase forms of the HTTP # header fields, and whose values are Arrays. # - def Utils.get_resp_html_info(resp, opts={}) + def Utils.get_resp_html_info(bot, resp, opts={}) case resp when Net::HTTPSuccess loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil @@ -612,9 +604,9 @@ module ::Irc end ret = DataStream.new(opts.dup) ret[:headers] = resp.to_hash - ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes']) + ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(bot.config['http.info_bytes']) - filtered = Utils.try_htmlinfo_filters(ret) + filtered = Utils.try_htmlinfo_filters(bot, ret) if filtered return filtered @@ -635,14 +627,14 @@ module ::Irc # The input DataStream should have the downloaded HTML as primary key # (:text) and possibly a :headers key holding the resonse headers. # - def Utils.try_htmlinfo_filters(ds) - filters = @@bot.filter_names(:htmlinfo) + def Utils.try_htmlinfo_filters(bot, ds) + filters = bot.filter_names(:htmlinfo) return nil if filters.empty? cur = nil # TODO filter priority filters.each { |n| debug "testing htmlinfo filter #{n}" - cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds) + cur = bot.filter(bot.global_filter_name(n, :htmlinfo), ds) debug "returned #{cur.pretty_inspect}" break if cur } @@ -697,7 +689,7 @@ module ::Irc # If (optional) _opts_ :message is specified, those paragraphs are # echoed as replies to the IRC message passed as _opts_ :message # - def Utils.get_first_pars(urls, count, opts={}) + def Utils.get_first_pars(bot, urls, count, opts={}) idx = 0 msg = opts[:message] retval = Array.new @@ -706,7 +698,7 @@ module ::Irc idx += 1 begin - info = Utils.get_html_info(URI.parse(url), opts) + info = Utils.get_html_info(bot, URI.parse(url), opts) par = info[:content] retval.push(par) @@ -739,5 +731,3 @@ module ::Irc end end - -Irc::Utils.bot = Irc::Bot::Plugins.manager.bot