- if e.message =~ /\(404 - Not Found\)/i
- # see if we failed to find the thing because of trailing punctuation
- # but check that we still have 'something' in the URL
- retry if urlstr.chop! and urlstr =~ /^https?:\/\/./
+ debug e
+ # we might get a 404 because of trailing punctuation, so we try again
+ # with the last character stripped. this might generate invalid URIs
+ # (e.g. because "some.url" gets chopped to some.url%2, so catch that too
+ if e.message =~ /\(404 - Not Found\)/i or e.kind_of?(URI::InvalidURIError)
+ # chop off last non-word character from the unescaped version of
+ # the URL, and retry if we still have enough string to look like a
+ # minimal URL
+ unescaped = URI.unescape(urlstr)
+ debug "Unescaped: #{unescaped}"
+ if unescaped.sub!(/\W$/,'') and unescaped =~ /^https?:\/\/./
+ urlstr.replace URI.escape(unescaped, OUR_UNSAFE)
+ retry
+ else
+ debug "Not retrying #{unescaped}"
+ end