4 # :title: Markov plugin
6 # Author:: Tom Gilbert <tom@linuxbrit.co.uk>
7 # Copyright:: (C) 2005 Tom Gilbert
9 # Contribute to chat with random phrases built from word sequences learned
10 # by listening to chat
12 class MarkovPlugin < Plugin
13 Config.register Config::BooleanValue.new('markov.enabled',
15 :desc => "Enable and disable the plugin")
16 Config.register Config::IntegerValue.new('markov.probability',
18 :validate => Proc.new { |v| (0..100).include? v },
19 :desc => "Percentage chance of markov plugin chipping in")
20 Config.register Config::ArrayValue.new('markov.ignore',
22 :desc => "Hostmasks and channel names markov should NOT learn from (e.g. idiot*!*@*, #privchan).")
23 Config.register Config::IntegerValue.new('markov.max_words',
25 :validate => Proc.new { |v| (0..100).include? v },
26 :desc => "Maximum number of words the bot should put in a sentence")
27 Config.register Config::IntegerValue.new('markov.learn_delay',
29 :validate => Proc.new { |v| v >= 0 },
30 :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
34 # upgrade a registry entry from 0.9.14 and earlier, converting the Arrays
35 # into Hashes of weights
36 def upgrade_entry(k, logfile)
37 logfile.puts "\t#{k.inspect}"
43 # wipe the current key
48 logfile.puts "\tEMPTY"
53 logfile.puts "\t#{ar.inspect}"
55 # re-encode key to UTF-8 and cleanup as needed
56 words = k.split.map do |w|
57 BasicUserMessage.strip_formatting(
58 @bot.socket.filter.in(w)
62 # old import that failed to split properly?
63 if words.length == 1 and words.first.include? '/'
66 at = unsplit.rindex('/')
67 words = [unsplit[0,at], unsplit[at+1..-1]]
70 # if any of the re-split/re-encoded words have spaces,
71 # or are empty, we would get a chain we can't convert,
73 if words.first.empty? or words.first.include?(' ') or
74 words.last.empty? or words.last.include?(' ')
75 logfile.puts "\tSKIPPED"
79 # former unclean CTCP, we can't convert this
80 if words.first[0] == 1
81 logfile.puts "\tSKIPPED"
85 # nonword CTCP => SKIP
86 # someword CTCP => nonword someword
88 if words.first == "nonword"
89 logfile.puts "\tSKIPPED"
98 ['nonword', MARKER].include?(w) ? MARKER : w.chomp("\001")
101 newkey = words.join(' ')
102 logfile.puts "\t#{newkey.inspect}"
104 # the new key exists already, so we want to merge
105 if k != newkey and @registry.key? newkey
106 ar2 = @registry[newkey]
107 logfile.puts "\tMERGE"
108 logfile.puts "\t\t#{ar2.inspect}"
110 # and get rid of the key
111 @registry.delete(newkey)
117 @chains_mutex.synchronize do
118 if @chains.key? newkey
119 ar2 = @chains[newkey]
130 # we convert old words into UTF-8, cleanup, resplit if needed,
131 # and only get the first word. we may lose some data for old
132 # missplits, but this is the best we can do
133 w = BasicUserMessage.strip_formatting(
134 @bot.socket.filter.in(word).split.first
137 when /^\001\S+$/, "\001", ""
138 # former unclean CTCP or end of CTCP
141 # intern after clearing leftover end-of-actions if present
142 sym = w.chomp("\001").intern
149 logfile.puts "\tSKIPPED"
152 logfile.puts "\t#{[total, hash].inspect}"
153 @chains[newkey] = [total, hash]
158 # we load all the keys and then iterate over this array because
159 # running each() on the registry and updating it at the same time
161 keys = @registry.keys
162 # no registry, nothing to do
163 return if keys.empty?
166 log "starting markov database conversion thread (v1 to v2, #{keys.length} keys)"
168 keys.each { |k| @upgrade_queue.push k }
169 @upgrade_queue.push nil
171 @upgrade_thread = Thread.new do
172 logfile = File.open(@bot.path('markov-conversion.log'), 'a')
173 logfile.puts "=== conversion thread started #{Time.now} ==="
174 while k = @upgrade_queue.pop
176 logfile.puts "Key #{ki} (#{@upgrade_queue.length} in queue):"
178 upgrade_entry(k, logfile)
179 rescue Exception => e
180 logfile.puts "=== ERROR ==="
181 logfile.puts e.pretty_inspect
182 logfile.puts "=== EREND ==="
184 sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
186 logfile.puts "=== conversion thread stopped #{Time.now} ==="
189 @upgrade_thread.priority = -1
192 attr_accessor :chains
196 @registry.set_default([])
197 if @registry.has_key?('enabled')
198 @bot.config['markov.enabled'] = @registry['enabled']
199 @registry.delete('enabled')
201 if @registry.has_key?('probability')
202 @bot.config['markov.probability'] = @registry['probability']
203 @registry.delete('probability')
205 if @bot.config['markov.ignore_users']
206 debug "moving markov.ignore_users to markov.ignore"
207 @bot.config['markov.ignore'] = @bot.config['markov.ignore_users'].dup
208 @bot.config.delete('markov.ignore_users'.to_sym)
211 @chains = @registry.sub_registry('v2')
212 @chains.set_default([])
213 @chains_mutex = Mutex.new
215 @upgrade_queue = Queue.new
216 @upgrade_thread = nil
219 @learning_queue = Queue.new
220 @learning_thread = Thread.new do
221 while s = @learning_queue.pop
223 sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
226 @learning_thread.priority = -1
230 if @upgrade_thread and @upgrade_thread.alive?
231 debug 'closing conversion thread'
233 @upgrade_queue.push nil
235 debug 'conversion thread closed'
238 debug 'closing learning thread'
239 @learning_queue.push nil
240 @learning_thread.join
241 debug 'learning thread closed'
244 # if passed a pair, pick a word from the registry using the pair as key.
245 # otherwise, pick a word from an given list
246 def pick_word(word1, word2=MARKER)
247 if word1.kind_of? Array
250 k = "#{word1} #{word2}"
251 return MARKER unless @chains.key? k
252 wordlist = @chains[k]
254 total = wordlist.first
256 return MARKER if total == 0
257 return hash.keys.first if hash.length == 1
270 def generate_string(word1, word2)
271 # limit to max of markov.max_words words
273 output = "#{word1} #{word2}"
278 if @chains.key? output
279 wordlist = @chains[output]
280 wordlist.last.delete(MARKER)
284 @chains.each_key(output) do |key|
285 if key.downcase.include? output
292 keys = @chains.keys.select { |k| k.downcase.include? output }
294 return nil if keys.empty?
295 while key = keys.delete_one
296 wordlist = @chains[key]
297 wordlist.last.delete(MARKER)
298 unless wordlist.empty?
300 # split using / / so that we can properly catch the marker
301 word1, word2 = output.split(/ /).map {|w| w.intern}
307 word3 = pick_word(wordlist)
308 return nil if word3 == MARKER
310 output << " #{word3}"
311 word1, word2 = word2, word3
313 (@bot.config['markov.max_words'] - 1).times do
314 word3 = pick_word(word1, word2)
315 break if word3 == MARKER
316 output << " #{word3}"
317 word1, word2 = word2, word3
322 def help(plugin, topic="")
323 topic, subtopic = topic.split
329 "markov ignore add <hostmask|channel> => ignore a hostmask or a channel"
331 "markov ignore list => show ignored hostmasks and channels"
333 "markov ignore remove <hostmask|channel> => unignore a hostmask or channel"
335 "ignore hostmasks or channels -- topics: add, remove, list"
338 "markov status => show if markov is enabled, probability and amount of messages in queue for learning"
340 "markov probability [<percent>] => set the % chance of rbot responding to input, or display the current probability"
344 "markov chat about <word> [<another word>] => talk about <word> or riff on a word pair (if possible)"
346 "markov chat => try to say something intelligent"
349 "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, status, probability, chat, chat about"
355 str.gsub!(/^\S+[:,;]/, "")
356 str.gsub!(/\s{2,}/, ' ') # fix for two or more spaces
361 return @bot.config['markov.probability']
365 if @bot.config['markov.enabled']
366 reply = _("markov is currently enabled, %{p}% chance of chipping in") % { :p => probability? }
367 l = @learning_queue.length
368 reply << (_(", %{l} messages in queue") % {:l => l}) if l > 0
369 l = @upgrade_queue.length
370 reply << (_(", %{l} chains to upgrade") % {:l => l}) if l > 0
372 reply = _("markov is currently disabled")
378 return false unless m
379 return true if m.address? or m.private?
380 @bot.config['markov.ignore'].each do |mask|
381 return true if m.channel.downcase == mask.downcase
382 return true if m.source.matches?(mask)
387 def ignore(m, params)
388 action = params[:action]
389 user = params[:option]
392 if @bot.config['markov.ignore'].include? user
393 s = @bot.config['markov.ignore']
395 @bot.config['ignore'] = s
396 m.reply _("%{u} removed") % { :u => user }
398 m.reply _("not found in list")
402 if @bot.config['markov.ignore'].include?(user)
403 m.reply _("%{u} already in list") % { :u => user }
405 @bot.config['markov.ignore'] = @bot.config['markov.ignore'].push user
406 m.reply _("%{u} added to markov ignore list") % { :u => user }
409 m.reply _("give the name of a person or channel to ignore")
412 m.reply _("I'm ignoring %{ignored}") % { :ignored => @bot.config['markov.ignore'].join(", ") }
414 m.reply _("have markov ignore the input from a hostmask or a channel. usage: markov ignore add <mask or channel>; markov ignore remove <mask or channel>; markov ignore list")
418 def enable(m, params)
419 @bot.config['markov.enabled'] = true
423 def probability(m, params)
424 if params[:probability]
425 @bot.config['markov.probability'] = params[:probability].to_i
428 m.reply _("markov has a %{prob}% chance of chipping in") % { :prob => probability? }
432 def disable(m, params)
433 @bot.config['markov.enabled'] = false
438 return false unless @bot.config['markov.enabled']
440 return true if prob > rand(100)
448 def random_markov(m, message)
449 return unless should_talk
451 word1, word2 = clean_str(message).split(/\s+/)
452 return unless word1 and word2
453 line = generate_string(word1.intern, word2.intern)
455 # we do nothing if the line we return is just an initial substring
456 # of the line we received
457 return if message.index(line) == 0
458 @bot.timer.add_once(delay) {
459 m.reply line, :nick => false, :to => :public
464 line = generate_string(params[:seed1], params[:seed2])
465 if line and line != [params[:seed1], params[:seed2]].compact.join(" ")
468 m.reply _("I can't :(")
472 def rand_chat(m, params)
473 # pick a random pair from the db and go from there
474 word1, word2 = MARKER, MARKER
476 @bot.config['markov.max_words'].times do
477 word3 = pick_word(word1, word2)
478 break if word3 == MARKER
480 word1, word2 = word2, word3
483 m.reply output.join(" ")
485 m.reply _("I can't :(")
490 lines.each { |l| @learning_queue.push l }
496 # in channel message, the kind we are interested in
497 message = m.plainmessage
500 message = "#{m.sourcenick} #{message}"
504 random_markov(m, message) unless m.replied?
507 def learn_triplet(word1, word2, word3)
508 k = "#{word1} #{word2}"
509 @chains_mutex.synchronize do
519 @chains[k] = [total, hash]
523 def learn_line(message)
524 # debug "learning #{message.inspect}"
525 wordlist = clean_str(message).split(/\s+/).map { |w| w.intern }
526 return unless wordlist.length >= 2
527 word1, word2 = MARKER, MARKER
529 wordlist.each do |word3|
530 learn_triplet(word1, word2, word3)
531 word1, word2 = word2, word3
535 # TODO allow learning from URLs
536 def learn_from(m, params)
539 file = File.open(path, "r")
540 pattern = params[:pattern].empty? ? nil : Regexp.new(params[:pattern].to_s)
542 m.reply _("no such file")
547 m.reply _("the file is empty!")
553 range = case params[:lines]
555 Range.new(*params[:lines].split("..").map { |e| e.to_i })
557 Range.new(1, params[:lines].to_i)
559 Range.new(1, [@bot.config['send.max_lines'], 3].max)
563 next unless file.lineno >= range.begin
565 break if file.lineno == range.end
568 lines = lines.map do |l|
569 pattern ? l.scan(pattern).to_s : l
570 end.reject { |e| e.empty? }
574 m.reply _("example matches for that pattern at lines %{range} include: %{lines}") % {
575 :lines => lines.map { |e| Underline+e+Underline }.join(", "),
579 m.reply _("the pattern doesn't match anything at lines %{range}") % {
584 m.reply _("learning from the file without a pattern would learn, for example: ")
585 lines.each { |l| m.reply l }
592 file.each { |l| learn(l.scan(pattern).to_s) }
594 file.each { |l| learn(l.chomp) }
601 plugin = MarkovPlugin.new
602 plugin.map 'markov ignore :action :option', :action => "ignore"
603 plugin.map 'markov ignore :action', :action => "ignore"
604 plugin.map 'markov ignore', :action => "ignore"
605 plugin.map 'markov enable', :action => "enable"
606 plugin.map 'markov disable', :action => "disable"
607 plugin.map 'markov status', :action => "status"
608 plugin.map 'chat about :seed1 [:seed2]', :action => "chat"
609 plugin.map 'chat', :action => "rand_chat"
610 plugin.map 'markov probability [:probability]', :action => "probability",
611 :requirements => {:probability => /^\d+%?$/}
612 plugin.map 'markov learn from :file [:testing [:lines lines]] [using pattern *pattern]', :action => "learn_from", :thread => true,
614 :testing => /^testing$/,
615 :lines => /^(?:\d+\.\.\d+|\d+)$/ }
617 plugin.default_auth('ignore', false)
618 plugin.default_auth('probability', false)
619 plugin.default_auth('learn', false)