1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
require 'uri'
Net::HTTP.version_1_2
GOOGLE_WAP_LINK = /<a accesskey="(\d)" href=".*?u=(.*?)">(.*?)<\/a>/im
class ::String
def omissis_after(len)
if self.length > len
return self[0...len].sub(/\s+\S*$/,"...")
else
return self
end
end
def ircify_html
txt = self
txt.gsub!(/<\/?b\s*>/, "#{Bold}")
txt.gsub!(/<\/?i\s*>/, "#{Underline}")
## This would be a nice addition, but the results are horrible
## Maybe make it configurable?
# txt.gsub!(/<\/?a( [^>]*)?>/, "#{Reverse}")
txt.gsub!(/<\/?(p|br)>/, ' ')
txt.gsub!("\n", ' ')
txt.gsub!(/<[^>]+>/, '')
txt.gsub!(/\s+/, ' ')
return Utils.decode_html_entities(txt).strip!
end
end
class SearchPlugin < Plugin
BotConfig.register BotConfigIntegerValue.new('google.hits',
:default => 3,
:desc => "Number of hits to return from Google searches")
BotConfig.register BotConfigIntegerValue.new('google.first_par',
:default => 0,
:desc => "When set to n > 0, the bot will return the first paragraph from the first n search hits")
BotConfig.register BotConfigIntegerValue.new('wikipedia.hits',
:default => 3,
:desc => "Number of hits to return from Wikipedia searches")
BotConfig.register BotConfigIntegerValue.new('wikipedia.first_par',
:default => 1,
:desc => "When set to n > 0, the bot will return the first paragraph from the first n wikipedia search hits")
def help(plugin, topic="")
case topic
when "search", "google"
"#{topic} <string> => search google for <string>"
when "wp"
"wp [<code>] <string> => search for <string> on Wikipedia. You can select a national <code> to only search the national Wikipedia"
else
"search <string> (or: google <string>) => search google for <string> | wp <string> => search for <string> on Wikipedia"
end
end
def google(m, params)
what = params[:words].to_s
searchfor = URI.escape what
# This method is also called by other methods to restrict searching to some sites
if params[:site]
site = "site:#{params[:site]}+"
else
site = ""
end
# It is also possible to choose a filter to remove constant parts from the titles
# e.g.: "Wikipedia, the free encyclopedia" when doing Wikipedia searches
filter = params[:filter] || ""
url = "http://www.google.com/wml/search?q=#{site}#{searchfor}"
hits = params[:hits] || @bot.config['google.hits']
begin
wml = @bot.httputil.get_cached(url)
rescue => e
m.reply "error googling for #{what}"
return
end
results = wml.scan(GOOGLE_WAP_LINK)
if results.length == 0
m.reply "no results found for #{what}"
return
end
urls = Array.new
results = results[0...hits].map { |res|
n = res[0]
t = Utils.decode_html_entities res[2].gsub(filter, '').strip
u = URI.unescape res[1]
urls.push(u)
"#{n}. #{Bold}#{t}#{Bold}: #{u}"
}.join(" | ")
m.reply "Results for #{what}: #{results}"
first_pars = params[:firstpar] || @bot.config['google.first_par']
idx = 0
while first_pars > 0 and urls.length > 0
url.replace(urls.shift)
idx += 1
xml = @bot.httputil.get_cached(url)
if xml.nil?
debug "Unable to retrieve #{url}"
next
end
# We get the first par after the first main heading, if possible
header_found = xml.match(/<h1( [^>]*)?>.*?<\/h1>/im)
txt = nil
if header_found
txt = header_found.post_match[/<p( [^>]*)?>.*?<\/p>/im]
end
# If we haven't found a first par yet, try to get it from the whole
# document
unless txt
txt = xml[/<p( [^>]*)?>.*?<\/p>/im]
end
# Nothing yet, give up
unless txt
debug "No first par found\n#{xml}"
next
end
m.reply "[#{idx}] #{txt.ircify_html}".omissis_after(400)
first_pars -=1
end
end
def wikipedia(m, params)
lang = params[:lang]
site = "#{lang.nil? ? '' : lang + '.'}wikipedia.org"
debug "Looking up things on #{site}"
params[:site] = site
params[:filter] = / - Wikipedia.*$/
params[:hits] = @bot.config['wikipedia.hits']
params[:firstpar] = @bot.config['wikipedia.first_par']
return google(m, params)
end
end
plugin = SearchPlugin.new
plugin.map "search *words", :action => 'google'
plugin.map "google *words", :action => 'google'
plugin.map "wp :lang *words", :action => 'wikipedia', :requirements => { :lang => /^\w\w\w?$/ }
plugin.map "wp *words", :action => 'wikipedia'
|