X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=417622e43ea1cf65e13235971d66a228b4f4bcd6;hb=c7c670947b9ec9129412e05fc7934531c9d132ba;hp=1ea69df3414ea32bcb3fa979661277fcdbaf5864;hpb=5b6e6a93dc620315f69b3507bc86d8a806cdb968;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index 1ea69df3..417622e4 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
#-- vim:sw=2:et
#++
#
@@ -33,6 +34,7 @@ rescue LoadError
'gt' => '>',
'hellip' => 'â¦',
'nbsp' => 'Â ',
+ 'ndash' => 'â',
'Agrave' => 'Ã',
'Aacute' => 'Ã',
'Acirc' => 'Ã',
@@ -126,7 +128,7 @@ rescue LoadError
# Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class
# to mark actual text
- AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+ AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
# At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /
]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im
@@ -197,7 +199,7 @@ module ::Irc
when 0
raise "Empty ret array!"
when 1
- return ret.to_s
+ return ret[0].to_s
else
return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and "))
end
@@ -337,14 +339,27 @@ module ::Irc
# Decode HTML entities in the String _str_, using HTMLEntities if the
# package was found, or UNESCAPE_TABLE otherwise.
#
- def Utils.decode_html_entities(str)
- if defined? ::HTMLEntities
- return HTMLEntities.decode_entities(str)
+
+ if defined? ::HTMLEntities
+ if ::HTMLEntities.respond_to? :decode_entities
+ def Utils.decode_html_entities(str)
+ return HTMLEntities.decode_entities(str)
+ end
else
- str.gsub(/(&(.+?);)/) {
+ @@html_entities = HTMLEntities.new
+ def Utils.decode_html_entities(str)
+ return @@html_entities.decode str
+ end
+ end
+ else
+ def Utils.decode_html_entities(str)
+ return str.gsub(/(&(.+?);)/) {
symbol = $2
# remove the 0-paddng from unicode integers
- if symbol =~ /^#(\d+)$/
+ case symbol
+ when /^#x([0-9a-fA-F]+)$/
+ symbol = $1.to_i(16).to_s
+ when /^#(\d+)$/
symbol = $1.to_i.to_s
end
@@ -482,7 +497,11 @@ module ::Irc
# HTML first par grabber without hpricot
def Utils.ircify_first_html_par_woh(xml_org, opts={})
- xml = xml_org.gsub(//m, '').gsub(/