plugin(imdb): fix title regex, closes #44

[user/henk/code/ruby/rbot.git] / data / rbot / plugins / imdb.rb
diff --git a/data/rbot/plugins/imdb.rb b/data/rbot/plugins/imdb.rb

index 60826299648c3ae2fbf241904ddbbb32e12b2e84..3ca2103572ec3046242bb8cadaafbe73d20e82f1 100644 (file)
--- a/data/rbot/plugins/imdb.rb
+++ b/data/rbot/plugins/imdb.rb
@@ -12,7 +12,7 @@
  # License:: MIT license
  
  class Imdb
-  IMDB = "http://www.imdb.com"
+  IMDB = "https://www.imdb.com"
    TITLE_OR_NAME_MATCH = /<a\s+href="(\/(?:title|name)\/(?:tt|nm)[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
    TITLE_MATCH = /<a\s+href="(\/title\/tt[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
    NAME_MATCH = /<a\s+onclick="[^"]+"\s+href="(\/name\/nm[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
@@ -150,7 +150,7 @@ class Imdb
        debug title_date
        # note that the date dash for series is a - (ndash), not a - (minus sign)
        # also, the second date, if missing, is an no-break space
-      pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)\)\s*(.+)?$/).first
+      pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)[^\)]*\)\s*(.+)?$/).first
        extra.strip! if extra
        pre_title.strip!
        title = fix_article(pre_title)
@@ -191,8 +191,8 @@ class Imdb
        end
  
        genre = Array.new
-      resp.body.scan(/<a\s+onclick="[^"]+"\s+href="\/genre\/[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
-        genre << gnr
+      resp.body.scan(/<a\s+href="\/genre\/[^\?]+\?[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
+        genre << gnr.first.strip
        end
  
        plot = resp.body.match(DESC_MATCH)[3] rescue nil