:default => 50,
:validate => Proc.new { |v| (0..100).include? v },
:desc => "Maximum number of words the bot should put in a sentence")
- Config.register Config::IntegerValue.new('markov.learn_delay',
+ Config.register Config::FloatValue.new('markov.learn_delay',
:default => 0.5,
:validate => Proc.new { |v| v >= 0 },
:desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
:default => 50,
:validate => Proc.new { |v| (0..100).include? v },
:desc => "Probability of answer when addressed by nick")
+ Config.register Config::ArrayValue.new('markov.ignore_patterns',
+ :default => [],
+ :desc => "Ignore these word patterns")
MARKER = :"\r\n"
@chains = @registry.sub_registry('v2')
@chains.set_default([])
+ @rchains = @registry.sub_registry('v2r')
+ @rchains.set_default([])
@chains_mutex = Mutex.new
+ @rchains_mutex = Mutex.new
@upgrade_queue = Queue.new
@upgrade_thread = nil
# if passed a pair, pick a word from the registry using the pair as key.
# otherwise, pick a word from an given list
- def pick_word(word1, word2=MARKER)
+ def pick_word(word1, word2=MARKER, chainz=@chains)
if word1.kind_of? Array
wordlist = word1
else
k = "#{word1} #{word2}"
- return MARKER unless @chains.key? k
- wordlist = @chains[k]
+ return MARKER unless chainz.key? k
+ wordlist = chainz[k]
end
total = wordlist.first
hash = wordlist.last
def generate_string(word1, word2)
# limit to max of markov.max_words words
if word2
- output = "#{word1} #{word2}"
+ output = [word1, word2]
else
- output = word1.to_s
- end
-
- if @chains.key? output
- wordlist = @chains[output]
- wordlist.last.delete(MARKER)
- else
- output.downcase!
+ output = word1
keys = []
@chains.each_key(output) do |key|
- if key.downcase.include? output
- keys << key
- else
- break
- end
- end
- if keys.empty?
- keys = @chains.keys.select { |k| k.downcase.include? output }
+ if key.downcase.include? output
+ keys << key
+ else
+ break
+ end
end
return nil if keys.empty?
- while key = keys.delete_one
- wordlist = @chains[key]
- wordlist.last.delete(MARKER)
- unless wordlist.empty?
- output = key
- # split using / / so that we can properly catch the marker
- word1, word2 = output.split(/ /).map {|w| w.intern}
- break
- end
+ output = keys[rand(keys.size)].split(/ /)
+ end
+ output = output.split(/ /) unless output.is_a? Array
+ input = [word1, word2]
+ while output.length < @bot.config['markov.max_words'] and (output.first != MARKER or output.last != MARKER) do
+ if output.last != MARKER
+ output << pick_word(output[-2], output[-1])
+ end
+ if output.first != MARKER
+ output.insert 0, pick_word(output[0], output[1], @rchains)
end
end
-
- word3 = pick_word(wordlist)
- return nil if word3 == MARKER
-
- output << " #{word3}"
- word1, word2 = word2, word3
-
- (@bot.config['markov.max_words'] - 1).times do
- word3 = pick_word(word1, word2)
- break if word3 == MARKER
- output << " #{word3}"
- word1, word2 = word2, word3
- end
- return output
+ output.delete MARKER
+ if output == input
+ nil
+ else
+ output.join(" ")
+ end
end
def help(plugin, topic="")
return false
end
+ # Generates all sequence pairs from array
+ # seq_pairs [1,2,3,4] == [ [1,2], [2,3], [3,4]]
+ def seq_pairs(arr)
+ res = []
+ 0.upto(arr.size-2) do |i|
+ res << [arr[i], arr[i+1]]
+ end
+ res
+ end
+
def set_delay(m, params)
if params[:delay] == "off"
@bot.config["markov.delay"] = 0
def random_markov(m, message)
return unless (should_talk or (m.address? and @bot.config['markov.answer_addressed'] > rand(100)))
- word1, word2 = clean_str(message).split(/\s+/)
- return unless word1 and word2
- line = generate_string(word1.intern, word2.intern)
- return unless line
- # we do nothing if the line we return is just an initial substring
- # of the line we received
- return if message.index(line) == 0
- reply_delay m, line
+ words = clean_str(message).split(/\s+/)
+ if words.length < 2
+ line = generate_string words.first, nil
+
+ if line and message.index(line) != 0
+ reply_delay m, line
+ return
+ end
+ else
+ pairs = seq_pairs(words).sort_by { rand }
+ pairs.each do |word1, word2|
+ line = generate_string(word1.intern, word2.intern)
+ if line and message.index(line) != 0
+ reply_delay m, line
+ return
+ end
+ end
+ words.sort_by { rand }.each do |word|
+ line = generate_string word.first, nil
+ if line and message.index(line) != 0
+ reply_delay m, line
+ return
+ end
+ end
+ end
end
def chat(m, params)
def learn_triplet(word1, word2, word3)
k = "#{word1} #{word2}"
+ rk = "#{word2} #{word3}"
@chains_mutex.synchronize do
total = 0
hash = Hash.new(0)
total += 1
@chains[k] = [total, hash]
end
+ @rchains_mutex.synchronize do
+ # Reverse
+ total = 0
+ hash = Hash.new(0)
+ if @rchains.key? rk
+ t2, h2 = @rchains[rk]
+ total += t2
+ hash.update h2
+ end
+ hash[word1] += 1
+ total += 1
+ @rchains[rk] = [total, hash]
+ end
end
+
def learn_line(message)
# debug "learning #{message.inspect}"
- wordlist = clean_str(message).split(/\s+/).map { |w| w.intern }
+ wordlist = clean_str(message).split(/\s+/).reject do |w|
+ @bot.config['markov.ignore_patterns'].map do |pat|
+ w =~ Regexp.new(pat.to_s)
+ end.select{|v| v}.size != 0
+ end.map { |w| w.intern }
return unless wordlist.length >= 2
word1, word2 = MARKER, MARKER
wordlist << MARKER