+# encoding: UTF-8
#-- vim:sw=2:et
#++
#
require 'resolv'
require 'net/http'
require 'cgi'
-begin
- require 'iconv'
-rescue LoadError => e
- error "Couldn't load 'iconv': #{e}"
- error "Non-UTF-8 webpages will not be properly supported"
-end
begin
- require 'net/https'
+ require 'nokogiri'
rescue LoadError => e
- error "Couldn't load 'net/https': #{e}"
- error "Secured HTTP connections will fail"
- # give a nicer error than "undefined method `use_ssl='"
- ::Net::HTTP.class_eval <<-EOC
- define_method :use_ssl= do |val|
- # does anybody really set it to false?
- break if !val
- raise _("I can't do secure HTTP, sorry (%{msg})") % {
- :msg => e.message
- }
- end
- EOC
+ error "No nokogiri library found, some features might not be available!"
end
# To handle Gzipped pages
ctype = self['content-type'] || 'text/html'
return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i
- charsets = ['latin1'] # should be in config
+ charsets = ['ISO-8859-1'] # should be in config
if ctype.match(/charset=["']?([^\s"']+)["']?/i)
charsets << $1
debug "charset #{charsets.last} added from header"
end
+ # str might be invalid utf-8 that will crash on the pattern match:
+ str.encode!('UTF-8', 'UTF-8', :invalid => :replace)
case str
when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
charsets << $1
def body_to_utf(str)
charsets = self.body_charset(str) or return str
- return str unless defined? Iconv
charsets.reverse_each do |charset|
- # XXX: this one is really ugly, but i don't know how to make it better
- # -jsn
-
- 0.upto(5) do |off|
- begin
- debug "trying #{charset} / offset #{off}"
- return Iconv.iconv('utf-8//ignore',
- charset,
- str.slice(0 .. (-1 - off))).first
- rescue
- debug "conversion failed for #{charset} / offset #{off}"
+ begin
+ debug "try decoding using #{charset}"
+ str.force_encoding(charset)
+ tmp = str.encode('UTF-16le', :invalid => :replace, :replace => '').encode('UTF-8')
+ if tmp
+ str = tmp
+ break
end
+ rescue
+ error 'failed to use encoding'
+ error $!
end
end
+
return str
end
# If we can't unpack the whole stream (e.g. because we're doing a
# partial read
debug "full gunzipping failed (#{e}), trying to recover as much as possible"
- ret = ""
+ ret = ''
+ ret.force_encoding(Encoding::ASCII_8BIT)
begin
Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
ret << byte
return self.body_to_utf(self.decompress_body(partial))
end
+
+ def xpath(path)
+ document = Nokogiri::HTML.parse(self.body)
+ document.xpath(path)
+ end
+
+ def to_json
+ JSON::parse(self.body)
+ end
end
end
-Net::HTTP.version_1_2
-
module ::Irc
module Utils
resp = get_response(uri, options, &block)
raise "http error: #{resp}" unless Net::HTTPOK === resp ||
Net::HTTPPartialContent === resp
- return resp.body
+ if options[:resp]
+ return resp
+ else
+ return resp.body
+ end
rescue Exception => e
error e
end