:default => 25,
:validate => Proc.new { |v| (0..100).include? v },
:desc => "Percentage chance of markov plugin chipping in")
- Config.register Config::ArrayValue.new('markov.ignore_users',
+ Config.register Config::ArrayValue.new('markov.ignore',
:default => [],
- :desc => "Hostmasks of users to be ignored")
+ :desc => "Hostmasks and channel names markov should NOT learn from (e.g. idiot*!*@*, #privchan).")
+ Config.register Config::IntegerValue.new('markov.max_words',
+ :default => 50,
+ :validate => Proc.new { |v| (0..100).include? v },
+ :desc => "Maximum number of words the bot should put in a sentence")
+ Config.register Config::IntegerValue.new('markov.learn_delay',
+ :default => 0.5,
+ :validate => Proc.new { |v| v >= 0 },
+ :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
+
+ MARKER = :"\r\n"
+
+ # upgrade a registry entry from 0.9.14 and earlier, converting the Arrays
+ # into Hashes of weights
+ def upgrade_entry(k, logfile)
+ logfile.puts "\t#{k.inspect}"
+ logfile.flush
+ logfile.fsync
+
+ ar = @registry[k]
+
+ # wipe the current key
+ @registry.delete(k)
+
+ # discard empty keys
+ if ar.empty?
+ logfile.puts "\tEMPTY"
+ return
+ end
+
+ # otherwise, proceed
+ logfile.puts "\t#{ar.inspect}"
+
+ # re-encode key to UTF-8 and cleanup as needed
+ words = k.split.map do |w|
+ BasicUserMessage.strip_formatting(
+ @bot.socket.filter.in(w)
+ ).sub(/\001$/,'')
+ end
+
+ # old import that failed to split properly?
+ if words.length == 1 and words.first.include? '/'
+ # split at the last /
+ unsplit = words.first
+ at = unsplit.rindex('/')
+ words = [unsplit[0,at], unsplit[at+1..-1]]
+ end
+
+ # if any of the re-split/re-encoded words have spaces,
+ # or are empty, we would get a chain we can't convert,
+ # so drop it
+ if words.first.empty? or words.first.include?(' ') or
+ words.last.empty? or words.last.include?(' ')
+ logfile.puts "\tSKIPPED"
+ return
+ end
+
+ # former unclean CTCP, we can't convert this
+ if words.first[0] == 1
+ logfile.puts "\tSKIPPED"
+ return
+ end
+
+ # nonword CTCP => SKIP
+ # someword CTCP => nonword someword
+ if words.last[0] == 1
+ if words.first == "nonword"
+ logfile.puts "\tSKIPPED"
+ return
+ end
+ words.unshift MARKER
+ words.pop
+ end
+
+ # intern the old keys
+ words.map! do |w|
+ ['nonword', MARKER].include?(w) ? MARKER : w.chomp("\001")
+ end
+
+ newkey = words.join(' ')
+ logfile.puts "\t#{newkey.inspect}"
+
+ # the new key exists already, so we want to merge
+ if k != newkey and @registry.key? newkey
+ ar2 = @registry[newkey]
+ logfile.puts "\tMERGE"
+ logfile.puts "\t\t#{ar2.inspect}"
+ ar.push(*ar2)
+ # and get rid of the key
+ @registry.delete(newkey)
+ end
+
+ total = 0
+ hash = Hash.new(0)
+
+ @chains_mutex.synchronize do
+ if @chains.key? newkey
+ ar2 = @chains[newkey]
+ total += ar2.first
+ hash.update ar2.last
+ end
+
+ ar.each do |word|
+ case word
+ when :nonword
+ # former marker
+ sym = MARKER
+ else
+ # we convert old words into UTF-8, cleanup, resplit if needed,
+ # and only get the first word. we may lose some data for old
+ # missplits, but this is the best we can do
+ w = BasicUserMessage.strip_formatting(
+ @bot.socket.filter.in(word).split.first
+ )
+ case w
+ when /^\001\S+$/, "\001", ""
+ # former unclean CTCP or end of CTCP
+ next
+ else
+ # intern after clearing leftover end-of-actions if present
+ sym = w.chomp("\001").intern
+ end
+ end
+ hash[sym] += 1
+ total += 1
+ end
+ if hash.empty?
+ logfile.puts "\tSKIPPED"
+ return
+ end
+ logfile.puts "\t#{[total, hash].inspect}"
+ @chains[newkey] = [total, hash]
+ end
+ end
+
+ def upgrade_registry
+ # we load all the keys and then iterate over this array because
+ # running each() on the registry and updating it at the same time
+ # doesn't work
+ keys = @registry.keys
+ # no registry, nothing to do
+ return if keys.empty?
+
+ ki = 0
+ log "starting markov database conversion thread (v1 to v2, #{keys.length} keys)"
+
+ keys.each { |k| @upgrade_queue.push k }
+ @upgrade_queue.push nil
+
+ @upgrade_thread = Thread.new do
+ logfile = File.open(@bot.path('markov-conversion.log'), 'a')
+ logfile.puts "=== conversion thread started #{Time.now} ==="
+ while k = @upgrade_queue.pop
+ ki += 1
+ logfile.puts "Key #{ki} (#{@upgrade_queue.length} in queue):"
+ begin
+ upgrade_entry(k, logfile)
+ rescue Exception => e
+ logfile.puts "=== ERROR ==="
+ logfile.puts e.pretty_inspect
+ logfile.puts "=== EREND ==="
+ end
+ sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
+ end
+ logfile.puts "=== conversion thread stopped #{Time.now} ==="
+ logfile.close
+ end
+ @upgrade_thread.priority = -1
+ end
+
+ attr_accessor :chains
def initialize
super
@bot.config['markov.probability'] = @registry['probability']
@registry.delete('probability')
end
+ if @bot.config['markov.ignore_users']
+ debug "moving markov.ignore_users to markov.ignore"
+ @bot.config['markov.ignore'] = @bot.config['markov.ignore_users'].dup
+ @bot.config.delete('markov.ignore_users'.to_sym)
+ end
+
+ @chains = @registry.sub_registry('v2')
+ @chains.set_default([])
+ @chains_mutex = Mutex.new
+
+ @upgrade_queue = Queue.new
+ @upgrade_thread = nil
+ upgrade_registry
+
@learning_queue = Queue.new
@learning_thread = Thread.new do
while s = @learning_queue.pop
- learn s
- sleep 0.5
+ learn_line s
+ sleep @bot.config['markov.learn_delay'] unless @bot.config['markov.learn_delay'].zero?
end
end
@learning_thread.priority = -1
end
def cleanup
+ if @upgrade_thread and @upgrade_thread.alive?
+ debug 'closing conversion thread'
+ @upgrade_queue.clear
+ @upgrade_queue.push nil
+ @upgrade_thread.join
+ debug 'conversion thread closed'
+ end
+
debug 'closing learning thread'
@learning_queue.push nil
@learning_thread.join
debug 'learning thread closed'
end
+ # if passed a pair, pick a word from the registry using the pair as key.
+ # otherwise, pick a word from an given list
+ def pick_word(word1, word2=MARKER)
+ if word1.kind_of? Array
+ wordlist = word1
+ else
+ k = "#{word1} #{word2}"
+ return MARKER unless @chains.key? k
+ wordlist = @chains[k]
+ end
+ total = wordlist.first
+ hash = wordlist.last
+ return MARKER if total == 0
+ return hash.keys.first if hash.length == 1
+ hit = rand(total)
+ ret = MARKER
+ hash.each do |k, w|
+ hit -= w
+ if hit < 0
+ ret = k
+ break
+ end
+ end
+ return ret
+ end
+
def generate_string(word1, word2)
- # limit to max of 50 words
- output = word1 + " " + word2
-
- # try to avoid :nonword in the first iteration
- wordlist = @registry["#{word1} #{word2}"]
- wordlist.delete(:nonword)
- if not wordlist.empty?
- word3 = wordlist[rand(wordlist.length)]
- output = output + " " + word3
- word1, word2 = word2, word3
+ # limit to max of markov.max_words words
+ if word2
+ output = "#{word1} #{word2}"
+ else
+ output = word1.to_s
+ end
+
+ if @chains.key? output
+ wordlist = @chains[output]
+ wordlist.last.delete(MARKER)
+ else
+ output.downcase!
+ keys = []
+ @chains.each_key(output) do |key|
+ if key.downcase.include? output
+ keys << key
+ else
+ break
+ end
+ end
+ if keys.empty?
+ keys = @chains.keys.select { |k| k.downcase.include? output }
+ end
+ return nil if keys.empty?
+ while key = keys.delete_one
+ wordlist = @chains[key]
+ wordlist.last.delete(MARKER)
+ unless wordlist.empty?
+ output = key
+ # split using / / so that we can properly catch the marker
+ word1, word2 = output.split(/ /).map {|w| w.intern}
+ break
+ end
+ end
end
- 49.times do
- wordlist = @registry["#{word1} #{word2}"]
- break if wordlist.empty?
- word3 = wordlist[rand(wordlist.length)]
- break if word3 == :nonword
- output = output + " " + word3
+ word3 = pick_word(wordlist)
+ return nil if word3 == MARKER
+
+ output << " #{word3}"
+ word1, word2 = word2, word3
+
+ (@bot.config['markov.max_words'] - 1).times do
+ word3 = pick_word(word1, word2)
+ break if word3 == MARKER
+ output << " #{word3}"
word1, word2 = word2, word3
end
return output
end
def help(plugin, topic="")
- "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. usage: 'markov' to attempt to say something relevant to the last line of chat, if it can. other options to markov: 'ignore' => ignore a hostmask (accept no input), 'status' => show current status, 'probability [<chance>]' => set the % chance of rbot responding to input, or display the current probability, 'chat' => try and say something intelligent, 'chat about <foo> <bar>' => riff on a word pair (if possible)"
+ topic, subtopic = topic.split
+
+ case topic
+ when "ignore"
+ case subtopic
+ when "add"
+ "markov ignore add <hostmask|channel> => ignore a hostmask or a channel"
+ when "list"
+ "markov ignore list => show ignored hostmasks and channels"
+ when "remove"
+ "markov ignore remove <hostmask|channel> => unignore a hostmask or channel"
+ else
+ "ignore hostmasks or channels -- topics: add, remove, list"
+ end
+ when "status"
+ "markov status => show if markov is enabled, probability and amount of messages in queue for learning"
+ when "probability"
+ "markov probability [<percent>] => set the % chance of rbot responding to input, or display the current probability"
+ when "chat"
+ case subtopic
+ when "about"
+ "markov chat about <word> [<another word>] => talk about <word> or riff on a word pair (if possible)"
+ else
+ "markov chat => try to say something intelligent"
+ end
+ else
+ "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, status, probability, chat, chat about"
+ end
end
def clean_str(s)
def status(m,params)
if @bot.config['markov.enabled']
- m.reply "markov is currently enabled, #{probability?}% chance of chipping in"
+ reply = _("markov is currently enabled, %{p}% chance of chipping in") % { :p => probability? }
+ l = @learning_queue.length
+ reply << (_(", %{l} messages in queue") % {:l => l}) if l > 0
+ l = @upgrade_queue.length
+ reply << (_(", %{l} chains to upgrade") % {:l => l}) if l > 0
else
- m.reply "markov is currently disabled"
+ reply = _("markov is currently disabled")
end
+ m.reply reply
end
- def ignore?(user=nil)
- return false unless user
- @bot.config['markov.ignore_users'].each do |mask|
- return true if user.matches?(mask)
+ def ignore?(m=nil)
+ return false unless m
+ return true if m.address? or m.private?
+ @bot.config['markov.ignore'].each do |mask|
+ return true if m.channel.downcase == mask.downcase
+ return true if m.source.matches?(mask)
end
return false
end
user = params[:option]
case action
when 'remove':
- if @bot.config['markov.ignore_users'].include? user
- s = @bot.config['markov.ignore_users']
+ if @bot.config['markov.ignore'].include? user
+ s = @bot.config['markov.ignore']
s.delete user
- @bot.config['ignore_users'] = s
- m.reply "#{user} removed"
+ @bot.config['ignore'] = s
+ m.reply _("%{u} removed") % { :u => user }
else
- m.reply "not found in list"
+ m.reply _("not found in list")
end
when 'add':
if user
- if @bot.config['markov.ignore_users'].include?(user)
- m.reply "#{user} already in list"
+ if @bot.config['markov.ignore'].include?(user)
+ m.reply _("%{u} already in list") % { :u => user }
else
- @bot.config['markov.ignore_users'] = @bot.config['markov.ignore_users'].push user
- m.reply "#{user} added to markov ignore list"
+ @bot.config['markov.ignore'] = @bot.config['markov.ignore'].push user
+ m.reply _("%{u} added to markov ignore list") % { :u => user }
end
else
- m.reply "give the name of a person to ignore"
+ m.reply _("give the name of a person or channel to ignore")
end
when 'list':
- m.reply "I'm ignoring #{@bot.config['markov.ignore_users'].join(", ")}"
+ m.reply _("I'm ignoring %{ignored}") % { :ignored => @bot.config['markov.ignore'].join(", ") }
else
- m.reply "have markov ignore the input from a hostmask. usage: markov ignore add <mask>; markov ignore remove <mask>; markov ignore list"
+ m.reply _("have markov ignore the input from a hostmask or a channel. usage: markov ignore add <mask or channel>; markov ignore remove <mask or channel>; markov ignore list")
end
end
def random_markov(m, message)
return unless should_talk
- word1, word2 = message.split(/\s+/)
- line = generate_string(word1, word2)
+ word1, word2 = clean_str(message).split(/\s+/)
+ return unless word1 and word2
+ line = generate_string(word1.intern, word2.intern)
return unless line
- return if line == message
+ # we do nothing if the line we return is just an initial substring
+ # of the line we received
+ return if message.index(line) == 0
@bot.timer.add_once(delay) {
- m.reply line
+ m.reply line, :nick => false, :to => :public
}
end
def chat(m, params)
line = generate_string(params[:seed1], params[:seed2])
- if line != "#{params[:seed1]} #{params[:seed2]}"
- m.reply line
+ if line and line != [params[:seed1], params[:seed2]].compact.join(" ")
+ m.reply line
else
- m.reply "I can't :("
+ m.reply _("I can't :(")
end
end
def rand_chat(m, params)
# pick a random pair from the db and go from there
- word1, word2 = :nonword, :nonword
+ word1, word2 = MARKER, MARKER
output = Array.new
- 50.times do
- wordlist = @registry["#{word1} #{word2}"]
- break if wordlist.empty?
- word3 = wordlist[rand(wordlist.length)]
- break if word3 == :nonword
+ @bot.config['markov.max_words'].times do
+ word3 = pick_word(word1, word2)
+ break if word3 == MARKER
output << word3
word1, word2 = word2, word3
end
if output.length > 1
m.reply output.join(" ")
else
- m.reply "I can't :("
+ m.reply _("I can't :(")
end
end
-
- def message(m)
- return unless m.public?
- return if m.address?
- return if ignore? m.source
+
+ def learn(*lines)
+ lines.each { |l| @learning_queue.push l }
+ end
+
+ def unreplied(m)
+ return if ignore? m
# in channel message, the kind we are interested in
- message = clean_str m.plainmessage
+ message = m.plainmessage
if m.action?
message = "#{m.sourcenick} #{message}"
end
-
- @learning_queue.push message
+
+ learn message
random_markov(m, message) unless m.replied?
end
- def learn(message)
- # debug "learning #{message}"
- wordlist = message.split(/\s+/)
+ def learn_triplet(word1, word2, word3)
+ k = "#{word1} #{word2}"
+ @chains_mutex.synchronize do
+ total = 0
+ hash = Hash.new(0)
+ if @chains.key? k
+ t2, h2 = @chains[k]
+ total += t2
+ hash.update h2
+ end
+ hash[word3] += 1
+ total += 1
+ @chains[k] = [total, hash]
+ end
+ end
+
+ def learn_line(message)
+ # debug "learning #{message.inspect}"
+ wordlist = clean_str(message).split(/\s+/).map { |w| w.intern }
return unless wordlist.length >= 2
- word1, word2 = :nonword, :nonword
+ word1, word2 = MARKER, MARKER
+ wordlist << MARKER
wordlist.each do |word3|
- k = "#{word1} #{word2}"
- @registry[k] = @registry[k].push(word3)
+ learn_triplet(word1, word2, word3)
word1, word2 = word2, word3
end
- k = "#{word1} #{word2}"
- @registry[k] = @registry[k].push(:nonword)
+ end
+
+ # TODO allow learning from URLs
+ def learn_from(m, params)
+ begin
+ path = params[:file]
+ file = File.open(path, "r")
+ pattern = params[:pattern].empty? ? nil : Regexp.new(params[:pattern].to_s)
+ rescue Errno::ENOENT
+ m.reply _("no such file")
+ return
+ end
+
+ if file.eof?
+ m.reply _("the file is empty!")
+ return
+ end
+
+ if params[:testing]
+ lines = []
+ range = case params[:lines]
+ when /^\d+\.\.\d+$/
+ Range.new(*params[:lines].split("..").map { |e| e.to_i })
+ when /^\d+$/
+ Range.new(1, params[:lines].to_i)
+ else
+ Range.new(1, [@bot.config['send.max_lines'], 3].max)
+ end
+
+ file.each do |line|
+ next unless file.lineno >= range.begin
+ lines << line.chomp
+ break if file.lineno == range.end
+ end
+
+ lines = lines.map do |l|
+ pattern ? l.scan(pattern).to_s : l
+ end.reject { |e| e.empty? }
+
+ if pattern
+ unless lines.empty?
+ m.reply _("example matches for that pattern at lines %{range} include: %{lines}") % {
+ :lines => lines.map { |e| Underline+e+Underline }.join(", "),
+ :range => range.to_s
+ }
+ else
+ m.reply _("the pattern doesn't match anything at lines %{range}") % {
+ :range => range.to_s
+ }
+ end
+ else
+ m.reply _("learning from the file without a pattern would learn, for example: ")
+ lines.each { |l| m.reply l }
+ end
+
+ return
+ end
+
+ if pattern
+ file.each { |l| learn(l.scan(pattern).to_s) }
+ else
+ file.each { |l| learn(l.chomp) }
+ end
+
+ m.okay
end
end
plugin.map 'markov enable', :action => "enable"
plugin.map 'markov disable', :action => "disable"
plugin.map 'markov status', :action => "status"
-plugin.map 'chat about :seed1 :seed2', :action => "chat"
+plugin.map 'chat about :seed1 [:seed2]', :action => "chat"
plugin.map 'chat', :action => "rand_chat"
plugin.map 'markov probability [:probability]', :action => "probability",
:requirements => {:probability => /^\d+%?$/}
+plugin.map 'markov learn from :file [:testing [:lines lines]] [using pattern *pattern]', :action => "learn_from", :thread => true,
+ :requirements => {
+ :testing => /^testing$/,
+ :lines => /^(?:\d+\.\.\d+|\d+)$/ }
+
+plugin.default_auth('ignore', false)
+plugin.default_auth('probability', false)
+plugin.default_auth('learn', false)
+