summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-25 18:04:12 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-25 18:04:12 +0000
commitbc9e991b8665fdd8f77a257c5381cf70d015a6ec (patch)
treeba197242a79d7ea477ac7588e8fcb6f24c24cbb7
parent8b45e3f06184a66bd5bfa578c194059a8cf8ceb4 (diff)
Utils: fix ircify_html (the final stripsvn diff lib/rbot/core/utils/extends.rb could cause it to return nil) and improve whitespace handling
-rw-r--r--lib/rbot/core/utils/extends.rb14
1 files changed, 12 insertions, 2 deletions
diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb
index c43f3f3b..7022fb91 100644
--- a/lib/rbot/core/utils/extends.rb
+++ b/lib/rbot/core/utils/extends.rb
@@ -51,22 +51,32 @@ class ::String
## Maybe make it configurable?
# txt.gsub!(/<\/?a( [^>]*)?>/, "#{Reverse}")
- # Paragraph and br tags are converted to whitespace.
+ # Paragraph and br tags are converted to whitespace
txt.gsub!(/<\/?(p|br)\s*\/?\s*>/, ' ')
txt.gsub!("\n", ' ')
+ txt.gsub!("\r", ' ')
# All other tags are just removed
txt.gsub!(/<[^>]+>/, '')
+ # Convert HTML entities. We do it now to be able to handle stuff
+ # such as &nbsp;
+ txt = Utils.decode_html_entities(txt)
+
# Remove double formatting options, since they only waste bytes
txt.gsub!(/#{Bold}(\s*)#{Bold}/, '\1')
txt.gsub!(/#{Underline}(\s*)#{Underline}/, '\1')
+ # Simplify whitespace that appears on both sides of a formatting option
+ txt.gsub!(/\s+(#{Bold}|#{Underline})\s+/, ' \1')
+ txt.sub!(/\s+(#{Bold}|#{Underline})\z/, '\1')
+ txt.sub!(/\A(#{Bold}|#{Underline})\s+/, '\1')
+
# And finally whitespace is squeezed
txt.gsub!(/\s+/, ' ')
# Decode entities and strip whitespace
- return Utils.decode_html_entities(txt).strip!
+ return txt.strip
end
# This method will strip all HTML crud from the receiver