+ Config.register Config::BooleanValue.new('markov.enabled',
+ :default => false,
+ :desc => "Enable and disable the plugin")
+ Config.register Config::IntegerValue.new('markov.probability',
+ :default => 25,
+ :validate => Proc.new { |v| (0..100).include? v },
+ :desc => "Percentage chance of markov plugin chipping in")
+ Config.register Config::ArrayValue.new('markov.ignore',
+ :default => [],
+ :desc => "Hostmasks and channel names markov should NOT learn from (e.g. idiot*!*@*, #privchan).")
+ Config.register Config::ArrayValue.new('markov.readonly',
+ :default => [],
+ :desc => "Hostmasks and channel names markov should NOT talk to (e.g. idiot*!*@*, #privchan).")
+ Config.register Config::IntegerValue.new('markov.max_words',
+ :default => 50,
+ :validate => Proc.new { |v| (0..100).include? v },
+ :desc => "Maximum number of words the bot should put in a sentence")
+ Config.register Config::FloatValue.new('markov.learn_delay',
+ :default => 0.5,
+ :validate => Proc.new { |v| v >= 0 },
+ :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
+ Config.register Config::IntegerValue.new('markov.delay',
+ :default => 5,
+ :validate => Proc.new { |v| v >= 0 },
+ :desc => "Wait short time before contributing to conversation.")
+ Config.register Config::IntegerValue.new('markov.answer_addressed',
+ :default => 50,
+ :validate => Proc.new { |v| (0..100).include? v },
+ :desc => "Probability of answer when addressed by nick")
+ Config.register Config::ArrayValue.new('markov.ignore_patterns',
+ :default => [],
+ :desc => "Ignore these word patterns")
+
+ MARKER = :"\r\n"
+
+ # upgrade a registry entry from 0.9.14 and earlier, converting the Arrays
+ # into Hashes of weights
+ def upgrade_entry(k, logfile)
+ logfile.puts "\t#{k.inspect}"
+ logfile.flush
+ logfile.fsync
+
+ ar = @registry[k]
+
+ # wipe the current key
+ @registry.delete(k)
+
+ # discard empty keys
+ if ar.empty?
+ logfile.puts "\tEMPTY"
+ return
+ end
+
+ # otherwise, proceed
+ logfile.puts "\t#{ar.inspect}"
+
+ # re-encode key to UTF-8 and cleanup as needed
+ words = k.split.map do |w|
+ BasicUserMessage.strip_formatting(
+ @bot.socket.filter.in(w)
+ ).sub(/\001$/,'')
+ end
+
+ # old import that failed to split properly?
+ if words.length == 1 and words.first.include? '/'
+ # split at the last /
+ unsplit = words.first
+ at = unsplit.rindex('/')
+ words = [unsplit[0,at], unsplit[at+1..-1]]
+ end
+
+ # if any of the re-split/re-encoded words have spaces,
+ # or are empty, we would get a chain we can't convert,
+ # so drop it
+ if words.first.empty? or words.first.include?(' ') or
+ words.last.empty? or words.last.include?(' ')
+ logfile.puts "\tSKIPPED"
+ return
+ end
+
+ # former unclean CTCP, we can't convert this
+ if words.first[0] == 1
+ logfile.puts "\tSKIPPED"
+ return
+ end
+
+ # nonword CTCP => SKIP
+ # someword CTCP => nonword someword
+ if words.last[0] == 1
+ if words.first == "nonword"
+ logfile.puts "\tSKIPPED"
+ return
+ end
+ words.unshift MARKER
+ words.pop
+ end
+
+ # intern the old keys
+ words.map! do |w|
+ ['nonword', MARKER].include?(w) ? MARKER : w.chomp("\001")
+ end
+
+ newkey = words.join(' ')
+ logfile.puts "\t#{newkey.inspect}"
+
+ # the new key exists already, so we want to merge
+ if k != newkey and @registry.key? newkey
+ ar2 = @registry[newkey]
+ logfile.puts "\tMERGE"
+ logfile.puts "\t\t#{ar2.inspect}"
+ ar.push(*ar2)
+ # and get rid of the key
+ @registry.delete(newkey)
+ end
+
+ total = 0
+ hash = Hash.new(0)
+
+ @chains_mutex.synchronize do
+ if @chains.key? newkey
+ ar2 = @chains[newkey]
+ total += ar2.first
+ hash.update ar2.last
+ end
+
+ ar.each do |word|
+ case word
+ when :nonword
+ # former marker
+ sym = MARKER
+ else
+ # we convert old words into UTF-8, cleanup, resplit if needed,
+ # and only get the first word. we may lose some data for old
+ # missplits, but this is the best we can do
+ w = BasicUserMessage.strip_formatting(
+ @bot.socket.filter.in(word).split.first
+ )
+ case w
+ when /^\001\S+$/, "\001", ""
+ # former unclean CTCP or end of CTCP
+ next
+ else
+ # intern after clearing leftover end-of-actions if present
+ sym = w.chomp("\001")
+ end
+ end
+ hash[sym] += 1
+ total += 1
+ end
+ if hash.empty?
+ logfile.puts "\tSKIPPED"
+ return
+ end
+ logfile.puts "\t#{[total, hash].inspect}"
+ @chains[newkey] = [total, hash]
+ end
+ end
+
+ def upgrade_registry
+ # we load all the keys and then iterate over this array because
+ # running each() on the registry and updating it at the same time
+ # doesn't work
+ keys = @registry.keys
+ # no registry, nothing to do
+ return if keys.empty?
+
+ ki = 0
+ log "starting markov database conversion thread (v1 to v2, #{keys.length} keys)"
+
+ keys.each { |k| @upgrade_queue.push k }
+ @upgrade_queue.push nil
+
+ @upgrade_thread = Thread.new do
+ logfile = File.open(@bot.path('markov-conversion.log'), 'a')
+ logfile.puts "=== conversion thread started #{Time.now} ==="
+ while k = @upgrade_queue.pop
+ ki += 1
+ logfile.puts "Key #{ki} (#{@upgrade_queue.length} in queue):"
+ begin
+ upgrade_entry(k, logfile)
+ rescue Exception => e
+ logfile.puts "=== ERROR ==="
+ logfile.puts e.pretty_inspect
+ logfile.puts "=== EREND ==="
+ end
+ sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
+ end
+ logfile.puts "=== conversion thread stopped #{Time.now} ==="
+ logfile.close
+ end
+ @upgrade_thread.priority = -1
+ end
+
+ attr_accessor :chains
+