X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=7b316ffe28cd3df6ffc6ecd0d1707063142dd1bb;hb=bf9734ff89a238c5a63015b68eabd8d0ef9d1308;hp=c47601c1baf89386d0801fb5d5458670866e7e24;hpb=00f2fa9d625b19d6c8b33c62ea41460924e43634;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index c47601c1..7b316ffe 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -23,6 +23,7 @@ rescue LoadError 'raquo' => '»', 'quot' => '"', 'apos' => '\'', + 'deg' => '°', 'micro' => 'µ', 'copy' => '©', 'trade' => '™', @@ -32,6 +33,7 @@ rescue LoadError 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', + 'ndash' => '–', 'Agrave' => 'À', 'Aacute' => 'Á', 'Acirc' => 'Â', @@ -125,7 +127,7 @@ rescue LoadError # Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class # to mark actual text - AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im + AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im # At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im @@ -275,24 +277,47 @@ module ::Irc # Execute an external program, returning a String obtained by redirecting # the program's standards errors and output # + # TODO: find a way to expose some common errors (e.g. Errno::NOENT) + # to the caller def Utils.safe_exec(command, *args) - IO.popen("-") { |p| + output = IO.popen("-") { |p| if p - return p.readlines.join("\n") + break p.readlines.join("\n") else begin $stderr.reopen($stdout) exec(command, *args) rescue Exception => e - puts "exec of #{command} led to exception: #{e.pretty_inspect}" - Kernel::exit! 0 + puts "exception #{e.pretty_inspect} trying to run #{command}" + Kernel::exit! 1 end puts "exec of #{command} failed" - Kernel::exit! 0 + Kernel::exit! 1 end } + raise "safe execution of #{command} returned #{$?}" unless $?.success? + return output end + # Try executing an external program, returning true if the run was successful + # and false otherwise + def Utils.try_exec(command, *args) + IO.popen("-") { |p| + if p.nil? + begin + $stderr.reopen($stdout) + exec(command, *args) + rescue Exception => e + Kernel::exit! 1 + end + Kernel::exit! 1 + else + debug p.readlines + end + } + debug $? + return $?.success? + end # Safely (atomically) save to _file_, by passing a tempfile to the block # and then moving the tempfile to its final location when done. @@ -313,14 +338,27 @@ module ::Irc # Decode HTML entities in the String _str_, using HTMLEntities if the # package was found, or UNESCAPE_TABLE otherwise. # - def Utils.decode_html_entities(str) - if defined? ::HTMLEntities - return HTMLEntities.decode_entities(str) + + if defined? ::HTMLEntities + if ::HTMLEntities.respond_to? :decode_entities + def Utils.decode_html_entities(str) + return HTMLEntities.decode_entities(str) + end else - str.gsub(/(&(.+?);)/) { + @@html_entities = HTMLEntities.new + def Utils.decode_html_entities(str) + return @@html_entities.decode str + end + end + else + def Utils.decode_html_entities(str) + return str.gsub(/(&(.+?);)/) { symbol = $2 # remove the 0-paddng from unicode integers - if symbol =~ /^#(\d+)$/ + case symbol + when /^#x([0-9a-fA-F]+)$/ + symbol = $1.to_i(16).to_s + when /^#(\d+)$/ symbol = $1.to_i.to_s end @@ -458,7 +496,11 @@ module ::Irc # HTML first par grabber without hpricot def Utils.ircify_first_html_par_woh(xml_org, opts={}) - xml = xml_org.gsub(//m, '').gsub(/]*)?>.*?<\/script>/im, "").gsub(/]*)?>.*?<\/style>/im, "") + xml = xml_org.gsub(//m, + "").gsub(/]*)?>.*?<\/script>/im, + "").gsub(/]*)?>.*?<\/style>/im, + "").gsub(/]*)?>.*?<\/select>/im, + "") strip = opts[:strip] strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)