From c671bf49230781ed80d9fa80577fed9b1b655a99 Mon Sep 17 00:00:00 2001
From: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
Date: Sun, 25 Mar 2007 19:30:29 +0000
Subject: Utils: try non-paragraphs if no paragraphs was found

---
 lib/rbot/core/utils/utils.rb | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'lib')
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index cf16b601..bbdd462b 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -432,7 +432,8 @@ module ::Irc
     end
 
     HX_REGEX = /<h(\d)(?:\s+[^>]*)?>.*?<\/h\1>/im
-    PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/?(?:p|(?:div|html|body|table|td|tr)(?:\s+[^>]*)?)>/im
+    PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+    AFTER_PAR1_REGEX = /<\w+\s+[^>]*body[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
     # Try to grab and IRCify the first HTML par (<p> tag) in the given string.
     # If possible, grab the one after the first heading
     #
@@ -471,6 +472,19 @@ module ::Irc
 	  txt.sub!(strip, '') if strip
         end
       end
+
+      # Nothing yet ... let's get drastic: we ca
+      if txt.empty?
+	header_found = xml
+        while txt.empty? 
+          candidate = header_found[AFTER_PAR1_REGEX]
+          break unless candidate
+          txt = candidate.ircify_html
+          header_found = $'
+	  txt.sub!(strip, '') if strip
+        end
+      end
+
       return txt
     end
 
-- 
cgit v1.2.3