4 # :title: Markov plugin
6 # Author:: Tom Gilbert <tom@linuxbrit.co.uk>
7 # Copyright:: (C) 2005 Tom Gilbert
9 # Contribute to chat with random phrases built from word sequences learned
10 # by listening to chat
12 class MarkovPlugin < Plugin
13 Config.register Config::BooleanValue.new('markov.enabled',
15 :desc => "Enable and disable the plugin")
16 Config.register Config::IntegerValue.new('markov.probability',
18 :validate => Proc.new { |v| (0..100).include? v },
19 :desc => "Percentage chance of markov plugin chipping in")
20 Config.register Config::ArrayValue.new('markov.ignore',
22 :desc => "Hostmasks and channel names markov should NOT learn from (e.g. idiot*!*@*, #privchan).")
23 Config.register Config::ArrayValue.new('markov.readonly',
25 :desc => "Hostmasks and channel names markov should NOT talk to (e.g. idiot*!*@*, #privchan).")
26 Config.register Config::IntegerValue.new('markov.max_words',
28 :validate => Proc.new { |v| (0..100).include? v },
29 :desc => "Maximum number of words the bot should put in a sentence")
30 Config.register Config::FloatValue.new('markov.learn_delay',
32 :validate => Proc.new { |v| v >= 0 },
33 :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
34 Config.register Config::IntegerValue.new('markov.delay',
36 :validate => Proc.new { |v| v >= 0 },
37 :desc => "Wait short time before contributing to conversation.")
38 Config.register Config::IntegerValue.new('markov.answer_addressed',
40 :validate => Proc.new { |v| (0..100).include? v },
41 :desc => "Probability of answer when addressed by nick")
42 Config.register Config::ArrayValue.new('markov.ignore_patterns',
44 :desc => "Ignore these word patterns")
48 # upgrade a registry entry from 0.9.14 and earlier, converting the Arrays
49 # into Hashes of weights
50 def upgrade_entry(k, logfile)
51 logfile.puts "\t#{k.inspect}"
57 # wipe the current key
62 logfile.puts "\tEMPTY"
67 logfile.puts "\t#{ar.inspect}"
69 # re-encode key to UTF-8 and cleanup as needed
70 words = k.split.map do |w|
71 BasicUserMessage.strip_formatting(
72 @bot.socket.filter.in(w)
76 # old import that failed to split properly?
77 if words.length == 1 and words.first.include? '/'
80 at = unsplit.rindex('/')
81 words = [unsplit[0,at], unsplit[at+1..-1]]
84 # if any of the re-split/re-encoded words have spaces,
85 # or are empty, we would get a chain we can't convert,
87 if words.first.empty? or words.first.include?(' ') or
88 words.last.empty? or words.last.include?(' ')
89 logfile.puts "\tSKIPPED"
93 # former unclean CTCP, we can't convert this
94 if words.first[0] == 1
95 logfile.puts "\tSKIPPED"
99 # nonword CTCP => SKIP
100 # someword CTCP => nonword someword
101 if words.last[0] == 1
102 if words.first == "nonword"
103 logfile.puts "\tSKIPPED"
110 # intern the old keys
112 ['nonword', MARKER].include?(w) ? MARKER : w.chomp("\001")
115 newkey = words.join(' ')
116 logfile.puts "\t#{newkey.inspect}"
118 # the new key exists already, so we want to merge
119 if k != newkey and @registry.key? newkey
120 ar2 = @registry[newkey]
121 logfile.puts "\tMERGE"
122 logfile.puts "\t\t#{ar2.inspect}"
124 # and get rid of the key
125 @registry.delete(newkey)
131 @chains_mutex.synchronize do
132 if @chains.key? newkey
133 ar2 = @chains[newkey]
144 # we convert old words into UTF-8, cleanup, resplit if needed,
145 # and only get the first word. we may lose some data for old
146 # missplits, but this is the best we can do
147 w = BasicUserMessage.strip_formatting(
148 @bot.socket.filter.in(word).split.first
151 when /^\001\S+$/, "\001", ""
152 # former unclean CTCP or end of CTCP
155 # intern after clearing leftover end-of-actions if present
156 sym = w.chomp("\001")
163 logfile.puts "\tSKIPPED"
166 logfile.puts "\t#{[total, hash].inspect}"
167 @chains[newkey] = [total, hash]
172 # we load all the keys and then iterate over this array because
173 # running each() on the registry and updating it at the same time
175 keys = @registry.keys
176 # no registry, nothing to do
177 return if keys.empty?
180 log "starting markov database conversion thread (v1 to v2, #{keys.length} keys)"
182 keys.each { |k| @upgrade_queue.push k }
183 @upgrade_queue.push nil
185 @upgrade_thread = Thread.new do
186 @registry.recovery = Proc.new { |val|
189 logfile = File.open(@bot.path('markov-conversion.log'), 'a')
190 logfile.puts "=== conversion thread started #{Time.now} ==="
191 while k = @upgrade_queue.pop
193 logfile.puts "Key #{ki} (#{@upgrade_queue.length} in queue):"
195 upgrade_entry(k, logfile)
196 rescue Exception => e
197 logfile.puts "=== ERROR ==="
198 logfile.puts e.pretty_inspect
199 logfile.puts "=== EREND ==="
201 sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
203 logfile.puts "=== conversion thread stopped #{Time.now} ==="
205 @registry.recovery = nil
207 @upgrade_thread.priority = -1
210 attr_accessor :chains
214 @registry.set_default([])
215 if @registry.has_key?('enabled')
216 @bot.config['markov.enabled'] = @registry['enabled']
217 @registry.delete('enabled')
219 if @registry.has_key?('probability')
220 @bot.config['markov.probability'] = @registry['probability']
221 @registry.delete('probability')
223 if @bot.config['markov.ignore_users']
224 debug "moving markov.ignore_users to markov.ignore"
225 @bot.config['markov.ignore'] = @bot.config['markov.ignore_users'].dup
226 @bot.config.delete('markov.ignore_users'.to_sym)
229 @chains = @registry.sub_registry('v2')
230 @chains.set_default([])
231 @rchains = @registry.sub_registry('v2r')
232 @rchains.set_default([])
233 @chains_mutex = Mutex.new
234 @rchains_mutex = Mutex.new
236 @upgrade_queue = Queue.new
237 @upgrade_thread = nil
240 @learning_queue = Queue.new
241 @learning_thread = Thread.new do
242 while s = @learning_queue.pop
244 sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
247 @learning_thread.priority = -1
251 if @upgrade_thread and @upgrade_thread.alive?
252 debug 'closing conversion thread'
254 @upgrade_queue.push nil
256 debug 'conversion thread closed'
259 debug 'closing learning thread'
260 @learning_queue.clear
261 @learning_queue.push nil
262 @learning_thread.join
263 debug 'learning thread closed'
269 # pick a word from the registry using the pair as key.
270 def pick_word(word1, word2=MARKER, chainz=@chains)
271 k = "#{word1} #{word2}"
272 return MARKER unless chainz.key? k
274 pick_word_from_list wordlist
277 # pick a word from weighted hash
278 def pick_word_from_list(wordlist)
279 total = wordlist.first
281 return MARKER if total == 0
282 return hash.keys.first if hash.length == 1
295 def generate_string(word1, word2)
296 # limit to max of markov.max_words words
298 output = [word1, word2]
302 @chains.each_key(output) do |key|
303 if key.downcase.include? output
309 return nil if keys.empty?
310 output = keys[rand(keys.size)].split(/ /)
312 output = output.split(/ /) unless output.is_a? Array
313 input = [word1, word2]
314 while output.length < @bot.config['markov.max_words'] and (output.first != MARKER or output.last != MARKER) do
315 if output.last != MARKER
316 output << pick_word(output[-2], output[-1])
318 if output.first != MARKER
319 output.insert 0, pick_word(output[0], output[1], @rchains)
330 def help(plugin, topic="")
331 topic, subtopic = topic.split
335 "markov delay <value> => Set message delay"
339 "markov ignore add <hostmask|channel> => ignore a hostmask or a channel"
341 "markov ignore list => show ignored hostmasks and channels"
343 "markov ignore remove <hostmask|channel> => unignore a hostmask or channel"
345 "ignore hostmasks or channels -- topics: add, remove, list"
350 "markov readonly add <hostmask|channel> => read-only a hostmask or a channel"
352 "markov readonly list => show read-only hostmasks and channels"
354 "markov readonly remove <hostmask|channel> => unreadonly a hostmask or channel"
356 "restrict hostmasks or channels to read only -- topics: add, remove, list"
359 "markov status => show if markov is enabled, probability and amount of messages in queue for learning"
361 "markov probability [<percent>] => set the % chance of rbot responding to input, or display the current probability"
365 "markov chat about <word> [<another word>] => talk about <word> or riff on a word pair (if possible)"
367 "markov chat => try to say something intelligent"
370 ["markov learn from <file> [testing [<num> lines]] [using pattern <pattern>]:",
371 "learn from the text in the specified <file>, optionally using the given <pattern> to filter the text.",
372 "you can sample what would be learned by specifying 'testing <num> lines'"].join(' ')
374 "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, readonly, delay, status, probability, chat, chat about, learn"
379 str = m.plainmessage.dup
380 str =~ /^(\S+)([:,;])/
381 if $1 and m.target.is_a? Irc::Channel and m.target.user_nicks.include? $1.downcase
382 str.gsub!(/^(\S+)([:,;])\s+/, "")
384 str.gsub!(/\s{2,}/, ' ') # fix for two or more spaces
389 return @bot.config['markov.probability']
393 if @bot.config['markov.enabled']
394 reply = _("markov is currently enabled, %{p}% chance of chipping in") % { :p => probability? }
395 l = @learning_queue.length
396 reply << (_(", %{l} messages in queue") % {:l => l}) if l > 0
397 l = @upgrade_queue.length
398 reply << (_(", %{l} chains to upgrade") % {:l => l}) if l > 0
400 reply = _("markov is currently disabled")
406 return false unless m
407 return true if m.private?
408 return true if m.prefixed?
409 @bot.config['markov.ignore'].each do |mask|
410 return true if m.channel.downcase == mask.downcase
411 return true if m.source.matches?(mask)
417 return false unless m
418 @bot.config['markov.readonly'].each do |mask|
419 return true if m.channel.downcase == mask.downcase
420 return true if m.source.matches?(mask)
425 def ignore(m, params)
426 action = params[:action]
427 user = params[:option]
430 if @bot.config['markov.ignore'].include? user
431 s = @bot.config['markov.ignore']
433 @bot.config['ignore'] = s
434 m.reply _("%{u} removed") % { :u => user }
436 m.reply _("not found in list")
440 if @bot.config['markov.ignore'].include?(user)
441 m.reply _("%{u} already in list") % { :u => user }
443 @bot.config['markov.ignore'] = @bot.config['markov.ignore'].push user
444 m.reply _("%{u} added to markov ignore list") % { :u => user }
447 m.reply _("give the name of a person or channel to ignore")
450 m.reply _("I'm ignoring %{ignored}") % { :ignored => @bot.config['markov.ignore'].join(", ") }
452 m.reply _("have markov ignore the input from a hostmask or a channel. usage: markov ignore add <mask or channel>; markov ignore remove <mask or channel>; markov ignore list")
456 def readonly(m, params)
457 action = params[:action]
458 user = params[:option]
461 if @bot.config['markov.readonly'].include? user
462 s = @bot.config['markov.readonly']
464 @bot.config['markov.readonly'] = s
465 m.reply _("%{u} removed") % { :u => user }
467 m.reply _("not found in list")
471 if @bot.config['markov.readonly'].include?(user)
472 m.reply _("%{u} already in list") % { :u => user }
474 @bot.config['markov.readonly'] = @bot.config['markov.readonly'].push user
475 m.reply _("%{u} added to markov readonly list") % { :u => user }
478 m.reply _("give the name of a person or channel to read only")
481 m.reply _("I'm only reading %{readonly}") % { :readonly => @bot.config['markov.readonly'].join(", ") }
483 m.reply _("have markov not answer to input from a hostmask or a channel. usage: markov readonly add <mask or channel>; markov readonly remove <mask or channel>; markov readonly list")
487 def enable(m, params)
488 @bot.config['markov.enabled'] = true
492 def probability(m, params)
493 if params[:probability]
494 @bot.config['markov.probability'] = params[:probability].to_i
497 m.reply _("markov has a %{prob}% chance of chipping in") % { :prob => probability? }
501 def disable(m, params)
502 @bot.config['markov.enabled'] = false
507 return false unless @bot.config['markov.enabled']
508 prob = m.address? ? @bot.config['markov.answer_addressed'] : probability?
509 return true if prob > rand(100)
513 # Generates all sequence pairs from array
514 # seq_pairs [1,2,3,4] == [ [1,2], [2,3], [3,4]]
517 0.upto(arr.size-2) do |i|
518 res << [arr[i], arr[i+1]]
523 def set_delay(m, params)
524 if params[:delay] == "off"
525 @bot.config["markov.delay"] = 0
527 elsif !params[:delay]
528 m.reply _("Message delay is %{delay}" % { :delay => @bot.config["markov.delay"]})
530 @bot.config["markov.delay"] = params[:delay].to_i
535 def reply_delay(m, line)
537 if @bot.config['markov.delay'] > 0
538 @bot.timer.add_once(1 + rand(@bot.config['markov.delay'])) {
539 m.reply line, :nick => false, :to => :public
542 m.reply line, :nick => false, :to => :public
546 def random_markov(m, message)
547 return unless should_talk(m)
549 words = clean_message(m).split(/\s+/)
551 line = generate_string words.first, nil
553 if line and message.index(line) != 0
558 pairs = seq_pairs(words).sort_by { rand }
559 pairs.each do |word1, word2|
560 line = generate_string(word1, word2)
561 if line and message.index(line) != 0
566 words.sort_by { rand }.each do |word|
567 line = generate_string word.first, nil
568 if line and message.index(line) != 0
577 line = generate_string(params[:seed1], params[:seed2])
578 if line and line != [params[:seed1], params[:seed2]].compact.join(" ")
581 m.reply _("I can't :(")
585 def rand_chat(m, params)
586 # pick a random pair from the db and go from there
587 word1, word2 = MARKER, MARKER
589 @bot.config['markov.max_words'].times do
590 word3 = pick_word(word1, word2)
591 break if word3 == MARKER
593 word1, word2 = word2, word3
596 m.reply output.join(" ")
598 m.reply _("I can't :(")
603 lines.each { |l| @learning_queue.push l }
609 # in channel message, the kind we are interested in
610 message = m.plainmessage
613 message = "#{m.sourcenick} #{message}"
616 random_markov(m, message) unless readonly? m or m.replied?
617 learn clean_message(m)
621 def learn_triplet(word1, word2, word3)
622 k = "#{word1} #{word2}"
623 rk = "#{word2} #{word3}"
624 @chains_mutex.synchronize do
634 @chains[k] = [total, hash]
636 @rchains_mutex.synchronize do
641 t2, h2 = @rchains[rk]
647 @rchains[rk] = [total, hash]
652 def learn_line(message)
653 # debug "learning #{message.inspect}"
654 wordlist = message.strip.split(/\s+/).reject do |w|
655 @bot.config['markov.ignore_patterns'].map do |pat|
656 w =~ Regexp.new(pat.to_s)
657 end.select{|v| v}.size != 0
659 return unless wordlist.length >= 2
660 word1, word2 = MARKER, MARKER
662 wordlist.each do |word3|
663 learn_triplet(word1, word2, word3.to_sym)
664 word1, word2 = word2, word3
668 # TODO allow learning from URLs
669 def learn_from(m, params)
672 file = File.open(path, "r")
673 pattern = params[:pattern].empty? ? nil : Regexp.new(params[:pattern].to_s)
675 m.reply _("no such file")
680 m.reply _("the file is empty!")
686 range = case params[:lines]
688 Range.new(*params[:lines].split("..").map { |e| e.to_i })
690 Range.new(1, params[:lines].to_i)
692 Range.new(1, [@bot.config['send.max_lines'], 3].max)
696 next unless file.lineno >= range.begin
698 break if file.lineno == range.end
701 lines = lines.map do |l|
702 pattern ? l.scan(pattern).to_s : l
703 end.reject { |e| e.empty? }
707 m.reply _("example matches for that pattern at lines %{range} include: %{lines}") % {
708 :lines => lines.map { |e| Underline+e+Underline }.join(", "),
712 m.reply _("the pattern doesn't match anything at lines %{range}") % {
717 m.reply _("learning from the file without a pattern would learn, for example: ")
718 lines.each { |l| m.reply l }
725 file.each { |l| learn(l.scan(pattern).to_s) }
727 file.each { |l| learn(l.chomp) }
734 m.reply "Markov status: chains: #{@chains.length} forward, #{@rchains.length} reverse, queued phrases: #{@learning_queue.size}"
739 plugin = MarkovPlugin.new
740 plugin.map 'markov delay :delay', :action => "set_delay"
741 plugin.map 'markov delay', :action => "set_delay"
742 plugin.map 'markov ignore :action :option', :action => "ignore"
743 plugin.map 'markov ignore :action', :action => "ignore"
744 plugin.map 'markov ignore', :action => "ignore"
745 plugin.map 'markov readonly :action :option', :action => "readonly"
746 plugin.map 'markov readonly :action', :action => "readonly"
747 plugin.map 'markov readonly', :action => "readonly"
748 plugin.map 'markov enable', :action => "enable"
749 plugin.map 'markov disable', :action => "disable"
750 plugin.map 'markov status', :action => "status"
751 plugin.map 'markov stats', :action => "stats"
752 plugin.map 'chat about :seed1 [:seed2]', :action => "chat"
753 plugin.map 'chat', :action => "rand_chat"
754 plugin.map 'markov probability [:probability]', :action => "probability",
755 :requirements => {:probability => /^\d+%?$/}
756 plugin.map 'markov learn from :file [:testing [:lines lines]] [using pattern *pattern]', :action => "learn_from", :thread => true,
758 :testing => /^testing$/,
759 :lines => /^(?:\d+\.\.\d+|\d+)$/ }
761 plugin.default_auth('ignore', false)
762 plugin.default_auth('probability', false)
763 plugin.default_auth('learn', false)