summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/rbot/plugins/search.rb33
-rw-r--r--lib/rbot/core/utils/utils.rb28
2 files changed, 33 insertions, 28 deletions
diff --git a/data/rbot/plugins/search.rb b/data/rbot/plugins/search.rb
index 6fb1959a..e94661b0 100644
--- a/data/rbot/plugins/search.rb
+++ b/data/rbot/plugins/search.rb
@@ -82,37 +82,14 @@ class SearchPlugin < Plugin
debug "Unable to retrieve #{url}"
next
end
- # We get the first par after the first main heading, if possible
- header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
- txt = String.new
- if header_found
- debug "Found header: #{header_found[1].inspect}"
- while txt.empty?
- header_found = $'
- candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
- break unless candidate
- txt.replace candidate.ircify_html
- end
- end
- # If we haven't found a first par yet, try to get it from the whole
- # document
- if txt.empty?
- header_found = xml
- while txt.empty?
- candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
- break unless candidate
- txt.replace candidate.ircify_html
- header_found = $'
- end
- end
- # Nothing yet, try title
- if txt.empty?
+ par = Utils.ircify_first_html_par(xml)
+ if par.empty?
debug "No first par found\n#{xml}"
# FIXME only do this if the 'url' plugin is loaded
- txt.replace @bot.plugins['url'].get_title_from_html(xml)
- next if txt.empty?
+ par = @bot.plugins['url'].get_title_from_html(xml)
+ next if par.empty?
end
- m.reply "[#{idx}] #{txt}", :overlong => :truncate
+ m.reply "[#{idx}] #{par}", :overlong => :truncate
first_pars -=1
end
end
diff --git a/lib/rbot/core/utils/utils.rb b/lib/rbot/core/utils/utils.rb
index fc89e1c3..52375334 100644
--- a/lib/rbot/core/utils/utils.rb
+++ b/lib/rbot/core/utils/utils.rb
@@ -415,5 +415,33 @@ module ::Irc
}
end
end
+
+ # Try to grab and IRFify the first HTML par (<p> tag) in the given string.
+ # If possible, grab the one after the first h1 heading
+ def Utils.ircify_first_html_par(xml)
+ header_found = xml.match(/<h1(?:\s+[^>]*)?>(.*?)<\/h1>/im)
+ txt = String.new
+ if header_found
+ debug "Found header: #{header_found[1].inspect}"
+ while txt.empty?
+ header_found = $'
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+ break unless candidate
+ txt = candidate.ircify_html
+ end
+ end
+ # If we haven't found a first par yet, try to get it from the whole
+ # document
+ if txt.empty?
+ header_found = xml
+ while txt.empty?
+ candidate = header_found[/<p(?:\s+[^>]*)?>.*?<\/p>/im]
+ break unless candidate
+ txt = candidate.ircify_html
+ header_found = $'
+ end
+ end
+ return txt
+ end
end
end