Move code to find and ircify first par from search plugin to utils

author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2007-02-06 15:08:25 +0000
committer: Giuseppe Bilotta <giuseppe.bilotta@gmail.com> 2007-02-06 15:08:25 +0000
commit: 059f917a709673d1d88f7056b45e86916de29ad4 (patch)
tree: c8e88dfb8611cf0132d36065fcbd9ae935eb6846
parent: 64689811ca5ee4e190d1837463675c68f9a094ff (diff)
2 files changed, 33 insertions, 28 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index 6fb1959a..e94661b0 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -82,37 +82,14 @@ class SearchPlugin < Plugin
         debug "Unable to retrieve #{url}"
         next
       end
-      # We get the first par after the first main heading, if possible
-      header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
-      txt = String.new
-      if header_found
-        debug "Found header: #{header_found[1].inspect}"
-        while txt.empty? 
-          header_found = $'
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
-          break unless candidate
-          txt.replace candidate.ircify_html
-        end
-      end
-      # If we haven't found a first par yet, try to get it from the whole
-      # document
-      if txt.empty?
-	header_found = xml
-        while txt.empty? 
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
-          break unless candidate
-          txt.replace candidate.ircify_html
-          header_found = $'
-        end
-      end
-      # Nothing yet, try title
-      if txt.empty?
+      par = Utils.ircify_first_html_par(xml)
+      if par.empty?
         debug "No first par found\n#{xml}"
 	# FIXME only do this if the 'url' plugin is loaded
-	txt.replace @bot.plugins['url'].get_title_from_html(xml)
-        next if txt.empty?
+	par = @bot.plugins['url'].get_title_from_html(xml)
+        next if par.empty?
       end
-      m.reply "[#{idx}] #{txt}", :overlong => :truncate
+      m.reply "[#{idx}] #{par}", :overlong => :truncate
       first_pars -=1
     end
   end
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index fc89e1c3..52375334 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -415,5 +415,33 @@ module ::Irc
         }
       end
     end
+
+    # Try to grab and IRFify the first HTML par (<p> tag) in the given string.
+    # If possible, grab the one after the first h1 heading
+    def Utils.ircify_first_html_par(xml)
+      header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
+      txt = String.new
+      if header_found
+        debug "Found header: #{header_found[1].inspect}"
+        while txt.empty? 
+          header_found = $'
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+          break unless candidate
+          txt = candidate.ircify_html
+        end
+      end
+      # If we haven't found a first par yet, try to get it from the whole
+      # document
+      if txt.empty?
+	header_found = xml
+        while txt.empty? 
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+          break unless candidate
+          txt = candidate.ircify_html
+          header_found = $'
+        end
+      end
+      return txt
+    end
   end
 end
author	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2007-02-06 15:08:25 +0000
committer	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>	2007-02-06 15:08:25 +0000
commit	059f917a709673d1d88f7056b45e86916de29ad4 (patch)
tree	c8e88dfb8611cf0132d36065fcbd9ae935eb6846
parent	64689811ca5ee4e190d1837463675c68f9a094ff (diff)