diff options
author | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-06 17:36:43 +0000 |
---|---|---|
committer | Giuseppe Bilotta <giuseppe.bilotta@gmail.com> | 2007-02-06 17:36:43 +0000 |
commit | bf8325c9a59667474940566065590b8da3dec85d (patch) | |
tree | adaf23c4299f882002cfa1790c59a622793bba60 | |
parent | f5abcb7eff07f436904ad5d88c0651e197c5914c (diff) |
Enhance Utils tools to get first pars, allowing an option to strip an initial part of the paragraphs extracted
-rw-r--r-- | data/rbot/plugins/dict.rb | 5 | ||||
-rw-r--r-- | lib/rbot/core/utils/utils.rb | 23 |
2 files changed, 20 insertions, 8 deletions
diff --git a/data/rbot/plugins/dict.rb b/data/rbot/plugins/dict.rb index d23df81f..4e1bb721 100644 --- a/data/rbot/plugins/dict.rb +++ b/data/rbot/plugins/dict.rb @@ -87,7 +87,8 @@ class DictPlugin < Plugin return unless first_pars > 0
- Utils.get_first_pars urls, first_pars, :http_util => @bot.httputil, :message => m
+ Utils.get_first_pars urls, first_pars, :http_util => @bot.httputil, :message => m,
+ :strip => /^\S+\s+-\s+/
end
@@ -138,7 +139,7 @@ class DictPlugin < Plugin return false if justcheck
m.reply "Nothing found for #{word}, but see #{url} for possible suggestions"
else
- return false if justcheck
+ return true if justcheck
m.reply "#{word}: #{url}"
end
end
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb index f5a6c1db..4613dada 100644 --- a/lib/rbot/core/utils/utils.rb +++ b/lib/rbot/core/utils/utils.rb @@ -420,18 +420,30 @@ module ::Irc PAR_REGEX = /<p(?:\s+[^>]*)?>.*?<\/p>/im # Try to grab and IRCify the first HTML par (<p> tag) in the given string. # If possible, grab the one after the first h1 heading - def Utils.ircify_first_html_par(xml) - header_found = xml.match(H1_REGEX) + # + # It is possible to pass some options to determine how the stripping + # occurs. Currently, only one option is supported: + # * :strip => Regex or String to strip at the beginning of the obtained + # text + # + def Utils.ircify_first_html_par(xml, opts={}) txt = String.new + strip = opts[:strip] + strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) + + header_found = xml.match(H1_REGEX) if header_found + header_found = $' debug "Found header: #{header_found[1].inspect}" while txt.empty? - header_found = $' candidate = header_found[PAR_REGEX] break unless candidate txt = candidate.ircify_html + header_found = $' + txt.sub!(strip, '') if strip end end + # If we haven't found a first par yet, try to get it from the whole # document if txt.empty? @@ -441,6 +453,7 @@ module ::Irc break unless candidate txt = candidate.ircify_html header_found = $' + txt.sub!(strip, '') if strip end end return txt @@ -464,9 +477,7 @@ module ::Irc debug "Unable to retrieve #{url}" next end - debug "Retrieved #{url}" - debug "\t#{xml}" - par = Utils.ircify_first_html_par(xml) + par = Utils.ircify_first_html_par(xml, opts) if par.empty? debug "No first par found\n#{xml}" # FIXME only do this if the 'url' plugin is loaded |