X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Fcore%2Futils%2Futils.rb;h=7b316ffe28cd3df6ffc6ecd0d1707063142dd1bb;hb=bf9734ff89a238c5a63015b68eabd8d0ef9d1308;hp=7fe83410c817cc15b45fbf1cb79bc65c5aa37dc9;hpb=979dfca5faff9e9ea52588220a862bed19a8c731;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index 7fe83410..7b316ffe 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -127,7 +127,7 @@ rescue LoadError
# Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class
# to mark actual text
- AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+ AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
# At worst, we can try stuff which is comprised between two
AFTER_PAR2_REGEX = /
]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im
@@ -355,7 +355,10 @@ module ::Irc
return str.gsub(/(&(.+?);)/) {
symbol = $2
# remove the 0-paddng from unicode integers
- if symbol =~ /^#(\d+)$/
+ case symbol
+ when /^#x([0-9a-fA-F]+)$/
+ symbol = $1.to_i(16).to_s
+ when /^#(\d+)$/
symbol = $1.to_i.to_s
end
@@ -493,7 +496,11 @@ module ::Irc
# HTML first par grabber without hpricot
def Utils.ircify_first_html_par_woh(xml_org, opts={})
- xml = xml_org.gsub(//m, '').gsub(/