X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=7b316ffe28cd3df6ffc6ecd0d1707063142dd1bb;hb=bf9734ff89a238c5a63015b68eabd8d0ef9d1308;hp=c47601c1baf89386d0801fb5d5458670866e7e24;hpb=00f2fa9d625b19d6c8b33c62ea41460924e43634;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index c47601c1..7b316ffe 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -23,6 +23,7 @@ rescue LoadError
'raquo' => '»',
'quot' => '"',
'apos' => '\'',
+ 'deg' => '°',
'micro' => 'µ',
'copy' => '©',
'trade' => 'â¢',
@@ -32,6 +33,7 @@ rescue LoadError
'gt' => '>',
'hellip' => 'â¦',
'nbsp' => 'Â ',
+ 'ndash' => 'â',
'Agrave' => 'Ã',
'Aacute' => 'Ã',
'Acirc' => 'Ã',
@@ -125,7 +127,7 @@ rescue LoadError
# Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class
# to mark actual text
- AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+ AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
# At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /
]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im
@@ -275,24 +277,47 @@ module ::Irc
# Execute an external program, returning a String obtained by redirecting
# the program's standards errors and output
#
+ # TODO: find a way to expose some common errors (e.g. Errno::NOENT)
+ # to the caller
def Utils.safe_exec(command, *args)
- IO.popen("-") { |p|
+ output = IO.popen("-") { |p|
if p
- return p.readlines.join("\n")
+ break p.readlines.join("\n")
else
begin
$stderr.reopen($stdout)
exec(command, *args)
rescue Exception => e
- puts "exec of #{command} led to exception: #{e.pretty_inspect}"
- Kernel::exit! 0
+ puts "exception #{e.pretty_inspect} trying to run #{command}"
+ Kernel::exit! 1
end
puts "exec of #{command} failed"
- Kernel::exit! 0
+ Kernel::exit! 1
end
}
+ raise "safe execution of #{command} returned #{$?}" unless $?.success?
+ return output
end
+ # Try executing an external program, returning true if the run was successful
+ # and false otherwise
+ def Utils.try_exec(command, *args)
+ IO.popen("-") { |p|
+ if p.nil?
+ begin
+ $stderr.reopen($stdout)
+ exec(command, *args)
+ rescue Exception => e
+ Kernel::exit! 1
+ end
+ Kernel::exit! 1
+ else
+ debug p.readlines
+ end
+ }
+ debug $?
+ return $?.success?
+ end
# Safely (atomically) save to _file_, by passing a tempfile to the block
# and then moving the tempfile to its final location when done.
@@ -313,14 +338,27 @@ module ::Irc
# Decode HTML entities in the String _str_, using HTMLEntities if the
# package was found, or UNESCAPE_TABLE otherwise.
#
- def Utils.decode_html_entities(str)
- if defined? ::HTMLEntities
- return HTMLEntities.decode_entities(str)
+
+ if defined? ::HTMLEntities
+ if ::HTMLEntities.respond_to? :decode_entities
+ def Utils.decode_html_entities(str)
+ return HTMLEntities.decode_entities(str)
+ end
else
- str.gsub(/(&(.+?);)/) {
+ @@html_entities = HTMLEntities.new
+ def Utils.decode_html_entities(str)
+ return @@html_entities.decode str
+ end
+ end
+ else
+ def Utils.decode_html_entities(str)
+ return str.gsub(/(&(.+?);)/) {
symbol = $2
# remove the 0-paddng from unicode integers
- if symbol =~ /^#(\d+)$/
+ case symbol
+ when /^#x([0-9a-fA-F]+)$/
+ symbol = $1.to_i(16).to_s
+ when /^#(\d+)$/
symbol = $1.to_i.to_s
end
@@ -458,7 +496,11 @@ module ::Irc
# HTML first par grabber without hpricot
def Utils.ircify_first_html_par_woh(xml_org, opts={})
- xml = xml_org.gsub(//m, '').gsub(/