Move code to find and ircify first par from search plugin to utils

author Giuseppe Bilotta <giuseppe.bilotta@gmail.com>

Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)

committer Giuseppe Bilotta <giuseppe.bilotta@gmail.com>

Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)
author Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)
committer Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb

index 6fb1959aeec31b76f87083c78260cbf02269519b..e94661b014483c2f94b66384ddc73a89de8ee79c 100644 (file)
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -82,37 +82,14 @@ class SearchPlugin < Plugin
          debug "Unable to retrieve #{url}"
          next
        end
-      # We get the first par after the first main heading, if possible
-      header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
-      txt = String.new
-      if header_found
-        debug "Found header: #{header_found[1].inspect}"
-        while txt.empty? 
-          header_found = $'
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
-          break unless candidate
-          txt.replace candidate.ircify_html
-        end
-      end
-      # If we haven't found a first par yet, try to get it from the whole
-      # document
-      if txt.empty?
-       header_found = xml
-        while txt.empty? 
-          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
-          break unless candidate
-          txt.replace candidate.ircify_html
-          header_found = $'
-        end
-      end
-      # Nothing yet, try title
-      if txt.empty?
+      par = Utils.ircify_first_html_par(xml)
+      if par.empty?
          debug "No first par found\n#{xml}"
         # FIXME only do this if the 'url' plugin is loaded
-       txt.replace @bot.plugins['url'].get_title_from_html(xml)
-        next if txt.empty?
+       par = @bot.plugins['url'].get_title_from_html(xml)
+        next if par.empty?
        end
-      m.reply "[#{idx}] #{txt}", :overlong => :truncate
+      m.reply "[#{idx}] #{par}", :overlong => :truncate
        first_pars -=1
      end
    end
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb

index fc89e1c3f7146077945e5794de02981b93d2b2d5..52375334459c8c64fea43742ae88bb588b7eadbd 100644 (file)
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -415,5 +415,33 @@ module ::Irc
          }
        end
      end
+
+    # Try to grab and IRFify the first HTML par (<p> tag) in the given string.
+    # If possible, grab the one after the first h1 heading
+    def Utils.ircify_first_html_par(xml)
+      header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
+      txt = String.new
+      if header_found
+        debug "Found header: #{header_found[1].inspect}"
+        while txt.empty? 
+          header_found = $'
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+          break unless candidate
+          txt = candidate.ircify_html
+        end
+      end
+      # If we haven't found a first par yet, try to get it from the whole
+      # document
+      if txt.empty?
+       header_found = xml
+        while txt.empty? 
+          candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+          break unless candidate
+          txt = candidate.ircify_html
+          header_found = $'
+        end
+      end
+      return txt
+    end
    end
  end
author	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
	Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)
committer	Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
	Tue, 6 Feb 2007 15:08:25 +0000 (15:08 +0000)
data/rbot/plugins/search.rb		patch \| blob \| history
lib/rbot/core/utils/utils.rb		patch \| blob \| history