X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=data%2Frbot%2Fplugins%2Fmarkov.rb;h=abb10172d639db03f91e98386285c40bfdb6b5ef;hb=90656f4203a0a989b6fb110d4a07598dd186b84c;hp=e6d9d1a87c0bc5ef35650f5b1d8ffa8bdc691b0b;hpb=fa683e65dd0108da044074a66a5068f71a3fb904;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/data/rbot/plugins/markov.rb b/data/rbot/plugins/markov.rb old mode 100755 new mode 100644 index e6d9d1a8..abb10172 --- a/data/rbot/plugins/markov.rb +++ b/data/rbot/plugins/markov.rb @@ -27,18 +27,21 @@ class MarkovPlugin < Plugin :default => 50, :validate => Proc.new { |v| (0..100).include? v }, :desc => "Maximum number of words the bot should put in a sentence") - Config.register Config::IntegerValue.new('markov.learn_delay', + Config.register Config::FloatValue.new('markov.learn_delay', :default => 0.5, :validate => Proc.new { |v| v >= 0 }, :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.") Config.register Config::IntegerValue.new('markov.delay', - :default => true, + :default => 5, :validate => Proc.new { |v| v >= 0 }, :desc => "Wait short time before contributing to conversation.") Config.register Config::IntegerValue.new('markov.answer_addressed', :default => 50, :validate => Proc.new { |v| (0..100).include? v }, :desc => "Probability of answer when addressed by nick") + Config.register Config::ArrayValue.new('markov.ignore_patterns', + :default => [], + :desc => "Ignore these word patterns") MARKER = :"\r\n" @@ -150,7 +153,7 @@ class MarkovPlugin < Plugin next else # intern after clearing leftover end-of-actions if present - sym = w.chomp("\001").intern + sym = w.chomp("\001") end end hash[sym] += 1 @@ -180,6 +183,9 @@ class MarkovPlugin < Plugin @upgrade_queue.push nil @upgrade_thread = Thread.new do + @registry.recovery = Proc.new { |val| + return [val] + } logfile = File.open(@bot.path('markov-conversion.log'), 'a') logfile.puts "=== conversion thread started #{Time.now} ===" while k = @upgrade_queue.pop @@ -196,6 +202,7 @@ class MarkovPlugin < Plugin end logfile.puts "=== conversion thread stopped #{Time.now} ===" logfile.close + @registry.recovery = nil end @upgrade_thread.priority = -1 end @@ -221,7 +228,10 @@ class MarkovPlugin < Plugin @chains = @registry.sub_registry('v2') @chains.set_default([]) + @rchains = @registry.sub_registry('v2r') + @rchains.set_default([]) @chains_mutex = Mutex.new + @rchains_mutex = Mutex.new @upgrade_queue = Queue.new @upgrade_thread = nil @@ -247,21 +257,25 @@ class MarkovPlugin < Plugin end debug 'closing learning thread' + @learning_queue.clear @learning_queue.push nil @learning_thread.join debug 'learning thread closed' + @chains.close + @rchains.close + super end - # if passed a pair, pick a word from the registry using the pair as key. - # otherwise, pick a word from an given list - def pick_word(word1, word2=MARKER) - if word1.kind_of? Array - wordlist = word1 - else - k = "#{word1} #{word2}" - return MARKER unless @chains.key? k - wordlist = @chains[k] - end + # pick a word from the registry using the pair as key. + def pick_word(word1, word2=MARKER, chainz=@chains) + k = "#{word1} #{word2}" + return MARKER unless chainz.key? k + wordlist = chainz[k] + pick_word_from_list wordlist + end + + # pick a word from weighted hash + def pick_word_from_list(wordlist) total = wordlist.first hash = wordlist.last return MARKER if total == 0 @@ -281,16 +295,9 @@ class MarkovPlugin < Plugin def generate_string(word1, word2) # limit to max of markov.max_words words if word2 - output = "#{word1} #{word2}" - else - output = word1.to_s - end - - if @chains.key? output - wordlist = @chains[output] - wordlist.last.delete(MARKER) + output = [word1, word2] else - output.downcase! + output = word1 keys = [] @chains.each_key(output) do |key| if key.downcase.include? output @@ -299,35 +306,25 @@ class MarkovPlugin < Plugin break end end - if keys.empty? - keys = @chains.keys.select { |k| k.downcase.include? output } - end return nil if keys.empty? - while key = keys.delete_one - wordlist = @chains[key] - wordlist.last.delete(MARKER) - unless wordlist.empty? - output = key - # split using / / so that we can properly catch the marker - word1, word2 = output.split(/ /).map {|w| w.intern} - break - end + output = keys[rand(keys.size)].split(/ /) + end + output = output.split(/ /) unless output.is_a? Array + input = [word1, word2] + while output.length < @bot.config['markov.max_words'] and (output.first != MARKER or output.last != MARKER) do + if output.last != MARKER + output << pick_word(output[-2], output[-1]) + end + if output.first != MARKER + output.insert 0, pick_word(output[0], output[1], @rchains) end end - - word3 = pick_word(wordlist) - return nil if word3 == MARKER - - output << " #{word3}" - word1, word2 = word2, word3 - - (@bot.config['markov.max_words'] - 1).times do - word3 = pick_word(word1, word2) - break if word3 == MARKER - output << " #{word3}" - word1, word2 = word2, word3 + output.delete MARKER + if output == input + nil + else + output.join(" ") end - return output end def help(plugin, topic="") @@ -369,14 +366,21 @@ class MarkovPlugin < Plugin else "markov chat => try to say something intelligent" end + when "learn" + ["markov learn from [testing [ lines]] [using pattern ]:", + "learn from the text in the specified , optionally using the given to filter the text.", + "you can sample what would be learned by specifying 'testing lines'"].join(' ') else - "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, readonly, delay, status, probability, chat, chat about" + "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, readonly, delay, status, probability, chat, chat about, learn" end end - def clean_str(s) - str = s.dup - str.gsub!(/^\S+[:,;]/, "") + def clean_message(m) + str = m.plainmessage.dup + str =~ /^(\S+)([:,;])/ + if $1 and m.target.is_a? Irc::Channel and m.target.user_nicks.include? $1.downcase + str.gsub!(/^(\S+)([:,;])\s+/, "") + end str.gsub!(/\s{2,}/, ' ') # fix for two or more spaces return str.strip end @@ -387,7 +391,7 @@ class MarkovPlugin < Plugin def status(m,params) if @bot.config['markov.enabled'] - reply = _("markov is currently enabled, %{p}% chance of chipping in") % { :p => probability? } + reply = _("markov is currently enabled, %{p}%% chance of chipping in") % { :p => probability? } l = @learning_queue.length reply << (_(", %{l} messages in queue") % {:l => l}) if l > 0 l = @upgrade_queue.length @@ -490,7 +494,7 @@ class MarkovPlugin < Plugin @bot.config['markov.probability'] = params[:probability].to_i m.okay else - m.reply _("markov has a %{prob}% chance of chipping in") % { :prob => probability? } + m.reply _("markov has a %{prob}%% chance of chipping in") % { :prob => probability? } end end @@ -499,9 +503,9 @@ class MarkovPlugin < Plugin m.okay end - def should_talk + def should_talk(m) return false unless @bot.config['markov.enabled'] - prob = probability? + prob = m.address? ? @bot.config['markov.answer_addressed'] : probability? return true if prob > rand(100) return false end @@ -531,7 +535,7 @@ class MarkovPlugin < Plugin def reply_delay(m, line) m.replied = true if @bot.config['markov.delay'] > 0 - @bot.timer.add_once(@bot.config['markov.delay']) { + @bot.timer.add_once(1 + rand(@bot.config['markov.delay'])) { m.reply line, :nick => false, :to => :public } else @@ -540,21 +544,20 @@ class MarkovPlugin < Plugin end def random_markov(m, message) - return unless (should_talk or (m.address? and @bot.config['markov.answer_addressed'] > rand(100))) + return unless should_talk(m) - words = clean_str(message).split(/\s+/) + words = clean_message(m).split(/\s+/) if words.length < 2 line = generate_string words.first, nil - if line - return if message.index(line) == 0 + if line and message.index(line) != 0 reply_delay m, line return end else pairs = seq_pairs(words).sort_by { rand } pairs.each do |word1, word2| - line = generate_string(word1.intern, word2.intern) + line = generate_string(word1, word2) if line and message.index(line) != 0 reply_delay m, line return @@ -610,13 +613,14 @@ class MarkovPlugin < Plugin message = "#{m.sourcenick} #{message}" end - learn message random_markov(m, message) unless readonly? m or m.replied? + learn clean_message(m) end def learn_triplet(word1, word2, word3) k = "#{word1} #{word2}" + rk = "#{word2} #{word3}" @chains_mutex.synchronize do total = 0 hash = Hash.new(0) @@ -629,16 +633,34 @@ class MarkovPlugin < Plugin total += 1 @chains[k] = [total, hash] end + @rchains_mutex.synchronize do + # Reverse + total = 0 + hash = Hash.new(0) + if @rchains.key? rk + t2, h2 = @rchains[rk] + total += t2 + hash.update h2 + end + hash[word1] += 1 + total += 1 + @rchains[rk] = [total, hash] + end end + def learn_line(message) # debug "learning #{message.inspect}" - wordlist = clean_str(message).split(/\s+/).map { |w| w.intern } + wordlist = message.strip.split(/\s+/).reject do |w| + @bot.config['markov.ignore_patterns'].map do |pat| + w =~ Regexp.new(pat.to_s) + end.select{|v| v}.size != 0 + end return unless wordlist.length >= 2 word1, word2 = MARKER, MARKER wordlist << MARKER wordlist.each do |word3| - learn_triplet(word1, word2, word3) + learn_triplet(word1, word2, word3.to_sym) word1, word2 = word2, word3 end end @@ -707,6 +729,11 @@ class MarkovPlugin < Plugin m.okay end + + def stats(m, params) + m.reply "Markov status: chains: #{@chains.length} forward, #{@rchains.length} reverse, queued phrases: #{@learning_queue.size}" + end + end plugin = MarkovPlugin.new @@ -721,9 +748,11 @@ plugin.map 'markov readonly', :action => "readonly" plugin.map 'markov enable', :action => "enable" plugin.map 'markov disable', :action => "disable" plugin.map 'markov status', :action => "status" -plugin.map 'chat about :seed1 [:seed2]', :action => "chat" +plugin.map 'markov stats', :action => "stats" +plugin.map 'chat about :seed1 [:seed2]', :action => "chat", :defaults => {:seed2 => nil} plugin.map 'chat', :action => "rand_chat" plugin.map 'markov probability [:probability]', :action => "probability", + :defaults => {:probability => nil}, :requirements => {:probability => /^\d+%?$/} plugin.map 'markov learn from :file [:testing [:lines lines]] [using pattern *pattern]', :action => "learn_from", :thread => true, :requirements => {