plugin(imdb): fix title regex, closes #44

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / imdb.rb
diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb

index 0df8e2d4433f9d200bc55316e837c24b7d34aeaa..3ca2103572ec3046242bb8cadaafbe73d20e82f1 100644 (file)
--- a/data/rbot/plugins/imdb.rb
+++ b/data/rbot/plugins/imdb.rb
@@ -12,7 +12,7 @@
  # License:: MIT license
  
  class Imdb
-  IMDB = "http://www.imdb.com"
+  IMDB = "https://www.imdb.com"
    TITLE_OR_NAME_MATCH = /<a\s+href="(\/(?:title|name)\/(?:tt|nm)[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
    TITLE_MATCH = /<a\s+href="(\/title\/tt[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
    NAME_MATCH = /<a\s+onclick="[^"]+"\s+href="(\/name\/nm[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
@@ -133,7 +133,7 @@ class Imdb
      resp = nil
      begin
        # movie urls without tailing / trigger a redirect
-      sr += '/' if sr[-1] != '/'
+      sr += '/' if sr[-1,1] != '/'
        resp = @bot.httputil.get_response(IMDB + sr, :max_redir => -1)
      rescue Exception => e
        error e.message
@@ -150,7 +150,7 @@ class Imdb
        debug title_date
        # note that the date dash for series is a - (ndash), not a - (minus sign)
        # also, the second date, if missing, is an no-break space
-      pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)\)\s*(.+)?$/).first
+      pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)[^\)]*\)\s*(.+)?$/).first
        extra.strip! if extra
        pre_title.strip!
        title = fix_article(pre_title)
@@ -191,8 +191,8 @@ class Imdb
        end
  
        genre = Array.new
-      resp.body.scan(/<a\s+onclick="[^"]+"\s+href="\/genre\/[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
-        genre << gnr
+      resp.body.scan(/<a\s+href="\/genre\/[^\?]+\?[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
+        genre << gnr.first.strip
        end
  
        plot = resp.body.match(DESC_MATCH)[3] rescue nil