markov: document 'learn from <file>'

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / markov.rb
diff --git a/data/rbot/plugins/markov.rb b/data/rbot/plugins/markov.rb

index ce5a35a2324f9cf93f5fb12c929af90fddafefb8..9e4bbb9247c344f3e07ba939a28df5741ea859b6 100755 (executable)
--- a/data/rbot/plugins/markov.rb
+++ b/data/rbot/plugins/markov.rb
@@ -32,7 +32,7 @@ class MarkovPlugin < Plugin
      :validate => Proc.new { |v| v >= 0 },
      :desc => "Time the learning thread spends sleeping after learning a line. If set to zero, learning from files can be very CPU intensive, but also faster.")
     Config.register Config::IntegerValue.new('markov.delay',
-    :default => true,
+    :default => 5,
      :validate => Proc.new { |v| v >= 0 },
      :desc => "Wait short time before contributing to conversation.")
     Config.register Config::IntegerValue.new('markov.answer_addressed',
@@ -226,6 +226,8 @@ class MarkovPlugin < Plugin
      @chains.set_default([])
      @rchains = @registry.sub_registry('v2r')
      @rchains.set_default([])
+    @chains_mutex = Mutex.new
+    @rchains_mutex = Mutex.new
  
      @upgrade_queue = Queue.new
      @upgrade_thread = nil
@@ -360,14 +362,21 @@ class MarkovPlugin < Plugin
        else
          "markov chat => try to say something intelligent"
        end
+    when "learn"
+      ["markov learn from <file> [testing [<num> lines]] [using pattern <pattern>]:",
+       "learn from the text in the specified <file>, optionally using the given <pattern> to filter the text.",
+       "you can sample what would be learned by specifying 'testing <num> lines'"].join(' ')
      else
        "markov plugin: listens to chat to build a markov chain, with which it can (perhaps) attempt to (inanely) contribute to 'discussion'. Sort of.. Will get a *lot* better after listening to a lot of chat. Usage: 'chat' to attempt to say something relevant to the last line of chat, if it can -- help topics: ignore, readonly, delay, status, probability, chat, chat about"
      end
    end
  
-  def clean_str(s)
-    str = s.dup
-    str.gsub!(/^\S+[:,;]/, "")
+  def clean_message(m)
+    str = m.plainmessage.dup
+    str =~ /^(\S+)([:,;])/
+    if $1 and m.target.is_a? Irc::Channel and m.target.user_nicks.include? $1.downcase
+      str.gsub!(/^(\S+)([:,;])\s+/, "")
+    end
      str.gsub!(/\s{2,}/, ' ') # fix for two or more spaces
      return str.strip
    end
@@ -522,7 +531,7 @@ class MarkovPlugin < Plugin
    def reply_delay(m, line)
      m.replied = true
      if @bot.config['markov.delay'] > 0
-      @bot.timer.add_once(@bot.config['markov.delay']) {
+      @bot.timer.add_once(1 + rand(@bot.config['markov.delay'])) {
          m.reply line, :nick => false, :to => :public
        }
      else
@@ -533,7 +542,7 @@ class MarkovPlugin < Plugin
    def random_markov(m, message)
      return unless should_talk(m)
  
-    words = clean_str(message).split(/\s+/)
+    words = clean_message(m).split(/\s+/)
      if words.length < 2
        line = generate_string words.first, nil
  
@@ -601,40 +610,44 @@ class MarkovPlugin < Plugin
      end
  
      random_markov(m, message) unless readonly? m or m.replied?
-    learn message
+    learn clean_message(m)
    end
  
  
    def learn_triplet(word1, word2, word3)
        k = "#{word1} #{word2}"
        rk = "#{word2} #{word3}"
-      total = 0
-      hash = Hash.new(0)
-      if @chains.key? k
-        t2, h2 = @chains[k]
-        total += t2
-        hash.update h2
+      @chains_mutex.synchronize do
+        total = 0
+        hash = Hash.new(0)
+        if @chains.key? k
+          t2, h2 = @chains[k]
+          total += t2
+          hash.update h2
+        end
+        hash[word3] += 1
+        total += 1
+        @chains[k] = [total, hash]
        end
-      hash[word3] += 1
-      total += 1
-      @chains[k] = [total, hash]
-      # Reverse
-      total = 0
-      hash = Hash.new(0)
-      if @rchains.key? rk
-        t2, h2 = @rchains[rk]
-        total += t2
-        hash.update h2
+      @rchains_mutex.synchronize do
+        # Reverse
+        total = 0
+        hash = Hash.new(0)
+        if @rchains.key? rk
+          t2, h2 = @rchains[rk]
+          total += t2
+          hash.update h2
+        end
+        hash[word1] += 1
+        total += 1
+        @rchains[rk] = [total, hash]
        end
-      hash[word1] += 1
-      total += 1
-      @rchains[rk] = [total, hash]
    end
  
  
    def learn_line(message)
      # debug "learning #{message.inspect}"
-    wordlist = clean_str(message).split(/\s+/).reject do |w|
+    wordlist = message.strip.split(/\s+/).reject do |w|
        @bot.config['markov.ignore_patterns'].map do |pat|
          w =~ Regexp.new(pat.to_s)
        end.select{|v| v}.size != 0