X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Fmarkov.rb;h=1aa1a0b236d0e89610a7ffbfe090858b8cf0bb7f;hb=c3aec9254dd9e48c0fcc7bfd5432d6555b881071;hp=6c51af5179f696a35d8cf749e31432042c236db2;hpb=bd68a53c74b1e8e9877af68743ca382fadf26d9b;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/markov.rb b/data/rbot/plugins/markov.rb old mode 100644 new mode 100755 index 6c51af51..1aa1a0b2 --- a/data/rbot/plugins/markov.rb +++ b/data/rbot/plugins/markov.rb @@ -1,28 +1,353 @@ +#-- vim:sw=2:et +#++ +# +# :title: Markov plugin +# +# Author:: Tom Gilbert +# Copyright:: (C) 2005 Tom Gilbert +# +# Contribute to chat with random phrases built from word sequences learned +# by listening to chat + class MarkovPlugin < Plugin + Config.register Config::BooleanValue.new('markov.enabled', + :default => false, + :desc => "Enable and disable the plugin") + Config.register Config::IntegerValue.new('markov.probability', + :default => 25, + :validate => Proc.new { |v| (0..100).include? v }, + :desc => "Percentage chance of markov plugin chipping in") + Config.register Config::ArrayValue.new('markov.ignore', + :default => [], + :desc => "Hostmasks and channel names markov should NOT learn from (e.g. idiot*!*@*, #privchan).") + Config.register Config::IntegerValue.new('markov.max_words', + :default => 50, + :validate => Proc.new { |v| (0..100).include? v }, + :desc => "Maximum number of words the bot should put in a sentence") + Config.register Config::IntegerValue.new('markov.learn_delay', + :default => 0.5, + :validate => Proc.new { |v| v >= 0 }, + :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.") + + MARKER = :"\r\n" + + # upgrade a registry entry from 0.9.14 and earlier, converting the Arrays + # into Hashes of weights + def upgrade_entry(k, logfile) + logfile.puts "\t#{k.inspect}" + logfile.flush + logfile.fsync + + ar = @registry[k] + + # wipe the current key + @registry.delete(k) + + # discard empty keys + if ar.empty? + logfile.puts "\tEMPTY" + return + end + + # otherwise, proceed + logfile.puts "\t#{ar.inspect}" + + # re-encode key to UTF-8 and cleanup as needed + words = k.split.map do |w| + BasicUserMessage.strip_formatting( + @bot.socket.filter.in(w) + ).sub(/\001$/,'') + end + + # old import that failed to split properly? + if words.length == 1 and words.first.include? '/' + # split at the last / + unsplit = words.first + at = unsplit.rindex('/') + words = [unsplit[0,at], unsplit[at+1..-1]] + end + + # if any of the re-split/re-encoded words have spaces, + # or are empty, we would get a chain we can't convert, + # so drop it + if words.first.empty? or words.first.include?(' ') or + words.last.empty? or words.last.include?(' ') + logfile.puts "\tSKIPPED" + return + end + + # former unclean CTCP, we can't convert this + if words.first[0] == 1 + logfile.puts "\tSKIPPED" + return + end + + # nonword CTCP => SKIP + # someword CTCP => nonword someword + if words.last[0] == 1 + if words.first == "nonword" + logfile.puts "\tSKIPPED" + return + end + words.unshift MARKER + words.pop + end + + # intern the old keys + words.map! do |w| + ['nonword', MARKER].include?(w) ? MARKER : w.chomp("\001") + end + + newkey = words.join(' ') + logfile.puts "\t#{newkey.inspect}" + + # the new key exists already, so we want to merge + if k != newkey and @registry.key? newkey + ar2 = @registry[newkey] + logfile.puts "\tMERGE" + logfile.puts "\t\t#{ar2.inspect}" + ar.push(*ar2) + # and get rid of the key + @registry.delete(newkey) + end + + total = 0 + hash = Hash.new(0) + + @chains_mutex.synchronize do + if @chains.key? newkey + ar2 = @chains[newkey] + total += ar2.first + hash.update ar2.last + end + + ar.each do |word| + case word + when :nonword + # former marker + sym = MARKER + else + # we convert old words into UTF-8, cleanup, resplit if needed, + # and only get the first word. we may lose some data for old + # missplits, but this is the best we can do + w = BasicUserMessage.strip_formatting( + @bot.socket.filter.in(word).split.first + ) + case w + when /^\001\S+$/, "\001", "" + # former unclean CTCP or end of CTCP + next + else + # intern after clearing leftover end-of-actions if present + sym = w.chomp("\001").intern + end + end + hash[sym] += 1 + total += 1 + end + if hash.empty? + logfile.puts "\tSKIPPED" + return + end + logfile.puts "\t#{[total, hash].inspect}" + @chains[newkey] = [total, hash] + end + end + + def upgrade_registry + # we load all the keys and then iterate over this array because + # running each() on the registry and updating it at the same time + # doesn't work + keys = @registry.keys + # no registry, nothing to do + return if keys.empty? + + ki = 0 + log "starting markov database conversion thread (v1 to v2, #{keys.length} keys)" + + keys.each { |k| @upgrade_queue.push k } + @upgrade_queue.push nil + + @upgrade_thread = Thread.new do + logfile = File.open(@bot.path('markov-conversion.log'), 'a') + logfile.puts "=== conversion thread started #{Time.now} ===" + while k = @upgrade_queue.pop + ki += 1 + logfile.puts "Key #{ki} (#{@upgrade_queue.length} in queue):" + begin + upgrade_entry(k, logfile) + rescue Exception => e + logfile.puts "=== ERROR ===" + logfile.puts e.pretty_inspect + logfile.puts "=== EREND ===" + end + sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero? + end + logfile.puts "=== conversion thread stopped #{Time.now} ===" + logfile.close + end + @upgrade_thread.priority = -1 + end + + attr_accessor :chains + def initialize super @registry.set_default([]) - @lastline = false - end - - def generate_string(seedline) - # limit to max of 50 words - return unless seedline - word1, word2 = seedline.split(/\s+/) - output = word1 + " " + word2 - 50.times do - wordlist = @registry["#{word1}/#{word2}"] - break if wordlist.empty? - word3 = wordlist[rand(wordlist.length)] - break if word3 == :nonword - output = output + " " + word3 + if @registry.has_key?('enabled') + @bot.config['markov.enabled'] = @registry['enabled'] + @registry.delete('enabled') + end + if @registry.has_key?('probability') + @bot.config['markov.probability'] = @registry['probability'] + @registry.delete('probability') + end + if @bot.config['markov.ignore_users'] + debug "moving markov.ignore_users to markov.ignore" + @bot.config['markov.ignore'] = @bot.config['markov.ignore_users'].dup + @bot.config.delete('markov.ignore_users'.to_sym) + end + + @chains = @registry.sub_registry('v2') + @chains.set_default([]) + @chains_mutex = Mutex.new + + @upgrade_queue = Queue.new + @upgrade_thread = nil + upgrade_registry + + @learning_queue = Queue.new + @learning_thread = Thread.new do + while s = @learning_queue.pop + learn_line s + sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero? + end + end + @learning_thread.priority = -1 + end + + def cleanup + if @upgrade_thread and @upgrade_thread.alive? + debug 'closing conversion thread' + @upgrade_queue.clear + @upgrade_queue.push nil + @upgrade_thread.join + debug 'conversion thread closed' + end + + debug 'closing learning thread' + @learning_queue.push nil + @learning_thread.join + debug 'learning thread closed' + end + + # if passed a pair, pick a word from the registry using the pair as key. + # otherwise, pick a word from an given list + def pick_word(word1, word2=MARKER) + if word1.kind_of? Array + wordlist = word1 + else + k = "#{word1} #{word2}" + return MARKER unless @chains.key? k + wordlist = @chains[k] + end + total = wordlist.first + hash = wordlist.last + return MARKER if total == 0 + return hash.keys.first if hash.length == 1 + hit = rand(total) + ret = MARKER + hash.each do |k, w| + hit -= w + if hit < 0 + ret = k + break + end + end + return ret + end + + def generate_string(word1, word2) + # limit to max of markov.max_words words + if word2 + output = "#{word1} #{word2}" + else + output = word1.to_s + end + + if @chains.key? output + wordlist = @chains[output] + wordlist.last.delete(MARKER) + else + output.downcase! + keys = [] + @chains.each_key(output) do |key| + if key.downcase.include? output + keys << key + else + break + end + end + if keys.empty? + keys = @chains.keys.select { |k| k.downcase.include? output } + end + return nil if keys.empty? + while key = keys.delete_one + wordlist = @chains[key] + wordlist.last.delete(MARKER) + unless wordlist.empty? + output = key + # split using / / so that we can properly catch the marker + word1, word2 = output.split(/ /).map {|w| w.intern} + break + end + end + end + + word3 = pick_word(wordlist) + return nil if word3 == MARKER + + output << " #{word3}" + word1, word2 = word2, word3 + + (@bot.config['markov.max_words'] - 1).times do + word3 = pick_word(word1, word2) + break if word3 == MARKER + output << " #{word3}" word1, word2 = word2, word3 end return output end def help(plugin, topic="") - "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. usage: 'markov' to attempt to say something relevant to the last line of chat, if it can. other options to markov: 'ignore' => ignore a hostmask (accept no input), 'status' => show current status, 'probability' => set the % chance of rbot responding to input, 'chat' => try and say something intelligent, 'chat about ' => riff on a word pair (if possible)" + topic, subtopic = topic.split + + case topic + when "ignore" + case subtopic + when "add" + "markov ignore add => ignore a hostmask or a channel" + when "list" + "markov ignore list => show ignored hostmasks and channels" + when "remove" + "markov ignore remove => unignore a hostmask or channel" + else + "ignore hostmasks or channels -- topics: add, remove, list" + end + when "status" + "markov status => show if markov is enabled, probability and amount of messages in queue for learning" + when "probability" + "markov probability [] => set the % chance of rbot responding to input, or display the current probability" + when "chat" + case subtopic + when "about" + "markov chat about [] => talk about or riff on a word pair (if possible)" + else + "markov chat => try to say something intelligent" + end + else + "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, status, probability, chat, chat about" + end end def clean_str(s) @@ -33,139 +358,246 @@ class MarkovPlugin < Plugin end def probability? - prob = @registry['probability'] - prob = 25 if prob.kind_of? Array; - prob = 0 if prob < 0 - prob = 100 if prob > 100 - return prob + return @bot.config['markov.probability'] end def status(m,params) - enabled = @registry['enabled'] - if (enabled) - m.reply "markov is currently enabled, #{probability?}% chance of chipping in" + if @bot.config['markov.enabled'] + reply = _("markov is currently enabled, %{p}% chance of chipping in") % { :p => probability? } + l = @learning_queue.length + reply << (_(", %{l} messages in queue") % {:l => l}) if l > 0 + l = @upgrade_queue.length + reply << (_(", %{l} chains to upgrade") % {:l => l}) if l > 0 else - m.reply "markov is currently disabled" + reply = _("markov is currently disabled") end + m.reply reply end - def ignore?(user=nil) - return @registry['ignore_users'].include?(user) + def ignore?(m=nil) + return false unless m + return true if m.address? or m.private? + @bot.config['markov.ignore'].each do |mask| + return true if m.channel.downcase == mask.downcase + return true if m.source.matches?(mask) + end + return false end def ignore(m, params) - if @registry['ignore_users'].nil? - @registry['ignore_users'] = [] - end action = params[:action] user = params[:option] case action when 'remove': - if @registry['ignore_users'].include? user - s = @registry['ignore_users'] + if @bot.config['markov.ignore'].include? user + s = @bot.config['markov.ignore'] s.delete user - @registry['ignore_users'] = s - m.reply "#{user} removed" + @bot.config['ignore'] = s + m.reply _("%{u} removed") % { :u => user } else - m.reply "not found in list" + m.reply _("not found in list") end when 'add': if user - if @registry['ignore_users'].include?(user) - m.reply "#{user} already in list" + if @bot.config['markov.ignore'].include?(user) + m.reply _("%{u} already in list") % { :u => user } else - @registry['ignore_users'] = @registry['ignore_users'].push user - m.reply "#{user} added to markov ignore list" + @bot.config['markov.ignore'] = @bot.config['markov.ignore'].push user + m.reply _("%{u} added to markov ignore list") % { :u => user } end else - m.reply "give the name of a person to ignore" + m.reply _("give the name of a person or channel to ignore") end when 'list': - m.reply "I'm ignoring #{@registry['ignore_users'].join(", ")}" + m.reply _("I'm ignoring %{ignored}") % { :ignored => @bot.config['markov.ignore'].join(", ") } else - m.reply "have markov ignore the input from a hostmask. usage: markov ignore add ; markov ignore remove ; markov ignore list" + m.reply _("have markov ignore the input from a hostmask or a channel. usage: markov ignore add ; markov ignore remove ; markov ignore list") end end def enable(m, params) - @registry['enabled'] = true + @bot.config['markov.enabled'] = true m.okay end def probability(m, params) - @registry['probability'] = params[:probability].to_i - m.okay + if params[:probability] + @bot.config['markov.probability'] = params[:probability].to_i + m.okay + else + m.reply _("markov has a %{prob}% chance of chipping in") % { :prob => probability? } + end end def disable(m, params) - @registry['enabled'] = false + @bot.config['markov.enabled'] = false m.okay end def should_talk - return false unless @registry['enabled'] + return false unless @bot.config['markov.enabled'] prob = probability? return true if prob > rand(100) return false end + def delay + 1 + rand(5) + end + def random_markov(m, message) return unless should_talk - line = generate_string(message) + + word1, word2 = clean_str(message).split(/\s+/) + return unless word1 and word2 + line = generate_string(word1.intern, word2.intern) return unless line - m.reply line unless line == message + # we do nothing if the line we return is just an initial substring + # of the line we received + return if message.index(line) == 0 + @bot.timer.add_once(delay) { + m.reply line, :nick => false, :to => :public + } end def chat(m, params) - seed = "#{params[:seed1]} #{params[:seed2]}" - line = generate_string seed - if line != seed - m.reply line + line = generate_string(params[:seed1], params[:seed2]) + if line and line != [params[:seed1], params[:seed2]].compact.join(" ") + m.reply line else - m.reply "I can't :(" + m.reply _("I can't :(") end end def rand_chat(m, params) # pick a random pair from the db and go from there - word1, word2 = :nonword, :nonword + word1, word2 = MARKER, MARKER output = Array.new - 50.times do - wordlist = @registry["#{word1}/#{word2}"] - break if wordlist.empty? - word3 = wordlist[rand(wordlist.length)] - break if word3 == :nonword + @bot.config['markov.max_words'].times do + word3 = pick_word(word1, word2) + break if word3 == MARKER output << word3 word1, word2 = word2, word3 end if output.length > 1 m.reply output.join(" ") else - m.reply "I can't :(" + m.reply _("I can't :(") end end - - def listen(m) - return unless m.kind_of?(PrivMessage) && m.public? - return if m.address? - return if ignore? m.source + + def learn(*lines) + lines.each { |l| @learning_queue.push l } + end + + def unreplied(m) + return if ignore? m # in channel message, the kind we are interested in - message = clean_str m.message - - wordlist = message.split(/\s+/) - return unless wordlist.length > 2 - @lastline = message - word1, word2 = :nonword, :nonword + message = m.plainmessage + + if m.action? + message = "#{m.sourcenick} #{message}" + end + + learn message + random_markov(m, message) unless m.replied? + end + + def learn_triplet(word1, word2, word3) + k = "#{word1} #{word2}" + @chains_mutex.synchronize do + total = 0 + hash = Hash.new(0) + if @chains.key? k + t2, h2 = @chains[k] + total += t2 + hash.update h2 + end + hash[word3] += 1 + total += 1 + @chains[k] = [total, hash] + end + end + + def learn_line(message) + # debug "learning #{message.inspect}" + wordlist = clean_str(message).split(/\s+/).map { |w| w.intern } + return unless wordlist.length >= 2 + word1, word2 = MARKER, MARKER + wordlist << MARKER wordlist.each do |word3| - @registry["#{word1}/#{word2}"] = @registry["#{word1}/#{word2}"].push(word3) + learn_triplet(word1, word2, word3) word1, word2 = word2, word3 end - @registry["#{word1}/#{word2}"] = [:nonword] + end + + # TODO allow learning from URLs + def learn_from(m, params) + begin + path = params[:file] + file = File.open(path, "r") + pattern = params[:pattern].empty? ? nil : Regexp.new(params[:pattern].to_s) + rescue Errno::ENOENT + m.reply _("no such file") + return + end + + if file.eof? + m.reply _("the file is empty!") + return + end + + if params[:testing] + lines = [] + range = case params[:lines] + when /^\d+\.\.\d+$/ + Range.new(*params[:lines].split("..").map { |e| e.to_i }) + when /^\d+$/ + Range.new(1, params[:lines].to_i) + else + Range.new(1, [@bot.config['send.max_lines'], 3].max) + end + + file.each do |line| + next unless file.lineno >= range.begin + lines << line.chomp + break if file.lineno == range.end + end + + lines = lines.map do |l| + pattern ? l.scan(pattern).to_s : l + end.reject { |e| e.empty? } + + if pattern + unless lines.empty? + m.reply _("example matches for that pattern at lines %{range} include: %{lines}") % { + :lines => lines.map { |e| Underline+e+Underline }.join(", "), + :range => range.to_s + } + else + m.reply _("the pattern doesn't match anything at lines %{range}") % { + :range => range.to_s + } + end + else + m.reply _("learning from the file without a pattern would learn, for example: ") + lines.each { |l| m.reply l } + end + + return + end + + if pattern + file.each { |l| learn(l.scan(pattern).to_s) } + else + file.each { |l| learn(l.chomp) } + end - random_markov(m, message) + m.okay end end + plugin = MarkovPlugin.new plugin.map 'markov ignore :action :option', :action => "ignore" plugin.map 'markov ignore :action', :action => "ignore" @@ -173,7 +605,16 @@ plugin.map 'markov ignore', :action => "ignore" plugin.map 'markov enable', :action => "enable" plugin.map 'markov disable', :action => "disable" plugin.map 'markov status', :action => "status" -plugin.map 'chat about :seed1 :seed2', :action => "chat" +plugin.map 'chat about :seed1 [:seed2]', :action => "chat" plugin.map 'chat', :action => "rand_chat" -plugin.map 'markov probability :probability', :action => "probability", - :requirements => {:probability => /^\d+$/} +plugin.map 'markov probability [:probability]', :action => "probability", + :requirements => {:probability => /^\d+%?$/} +plugin.map 'markov learn from :file [:testing [:lines lines]] [using pattern *pattern]', :action => "learn_from", :thread => true, + :requirements => { + :testing => /^testing$/, + :lines => /^(?:\d+\.\.\d+|\d+)$/ } + +plugin.default_auth('ignore', false) +plugin.default_auth('probability', false) +plugin.default_auth('learn', false) +