# License:: MIT license
class Imdb
- IMDB = "http://www.imdb.com"
+ IMDB = "https://www.imdb.com"
TITLE_OR_NAME_MATCH = /<a\s+href="(\/(?:title|name)\/(?:tt|nm)[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
TITLE_MATCH = /<a\s+href="(\/title\/tt[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
NAME_MATCH = /<a\s+onclick="[^"]+"\s+href="(\/name\/nm[0-9]+\/?)[^"]*"(?:[^>]*)>([^<]*)<\/a>/
debug title_date
# note that the date dash for series is a - (ndash), not a - (minus sign)
# also, the second date, if missing, is an no-break space
- pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)\)\s*(.+)?$/).first
+ pre_title, extra, date, junk = title_date.scan(/^(.*)\((.+?\s+)?(\d\d\d\d(?:–(?:\d\d\d\d| )?)?(?:\/[IV]+)?)[^\)]*\)\s*(.+)?$/).first
extra.strip! if extra
pre_title.strip!
title = fix_article(pre_title)
end
genre = Array.new
- resp.body.scan(/<a\s+onclick="[^"]+"\s+href="\/genre\/[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
- genre << gnr
+ resp.body.scan(/<a\s+href="\/genre\/[^\?]+\?[^"]+"\s+>([^<]+)<\/a>/) do |gnr|
+ genre << gnr.first.strip
end
plot = resp.body.match(DESC_MATCH)[3] rescue nil