end
end
+ # Turn a number of seconds into a hours:minutes:seconds e.g.
+ # 3:18:10 or 5'12" or 7s
+ #
+ def Utils.secs_to_short(seconds)
+ secs = seconds.to_i # make sure it's an integer
+ mins, secs = secs.divmod 60
+ hours, mins = mins.divmod 60
+ if hours > 0
+ return ("%s:%s:%s" % [hours, mins, secs])
+ elsif mins > 0
+ return ("%s'%s\"" % [mins, secs])
+ else
+ return ("%ss" % [secs])
+ end
+ end
+
# Execute an external program, returning a String obtained by redirecting
# the program's standards errors and output
str.gsub(/(&(.+?);)/) {
symbol = $2
# remove the 0-paddng from unicode integers
- if symbol =~ /#(.+)/
- symbol = "##{$1.to_i.to_s}"
+ if symbol =~ /^#(\d+)$/
+ symbol = $1.to_i.to_s
end
# output the symbol's irc-translated character, or a * if it's unknown
- UNESCAPE_TABLE[symbol] || (symbol.match(/^\d+$/) ? [$0.to_i].pack("U") : '*')
+ UNESCAPE_TABLE[symbol] || (symbol.match(/^\d+$/) ? [symbol.to_i].pack("U") : '*')
}
end
end
when Net::HTTPResponse
Utils.get_resp_html_info(doc, opts)
when URI
- ret = Hash.new
+ ret = DataStream.new
@@bot.httputil.get_response(doc) { |resp|
- ret = Utils.get_resp_html_info(resp, opts)
+ ret.replace Utils.get_resp_html_info(resp, opts)
}
return ret
else
# This method extracts title, content (first par) and extra
# information from the given Net::HTTPResponse _resp_.
#
- # Currently, the only accepted option (in _opts_) is
+ # Currently, the only accepted options (in _opts_) are
# uri_fragment:: the URI fragment of the original request
+ # full_body:: get the whole body instead of
+ # @@bot.config['http.info_bytes'] bytes only
#
- # Returns a Hash with the following keys:
+ # Returns a DataStream with the following keys:
+ # text:: the (partial) body
# title:: the title of the document (if any)
# content:: the first paragraph of the document (if any)
# headers::
# header fields, and whose values are Arrays.
#
def Utils.get_resp_html_info(resp, opts={})
- ret = Hash.new
case resp
when Net::HTTPSuccess
+ loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil
+ if loc and loc.fragment and not loc.fragment.empty?
+ opts[:uri_fragment] ||= loc.fragment
+ end
+ ret = DataStream.new(opts.dup)
ret[:headers] = resp.to_hash
+ ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes'])
- partial = resp.partial_body(@@bot.config['http.info_bytes'])
- if resp['content-type'] =~ /^text\/|(?:x|ht)ml/
- loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil
- if loc and loc.fragment and not loc.fragment.empty?
- opts[:uri_fragment] ||= loc.fragment
- end
+ filtered = Utils.try_htmlinfo_filters(ret)
+
+ if filtered
+ return filtered
+ elsif resp['content-type'] =~ /^text\/|(?:x|ht)ml/
ret.merge!(Utils.get_string_html_info(partial, opts))
end
return ret
end
end
+ # This method runs an appropriately-crafted DataStream _ds_ through the
+ # filters in the :htmlinfo filter group, in order. If one of the filters
+ # returns non-nil, its results are merged in _ds_ and returned. Otherwise
+ # nil is returned.
+ #
+ # The input DataStream shuold have the downloaded HTML as primary key
+ # (:text) and possibly a :headers key holding the resonse headers.
+ #
+ def Utils.try_htmlinfo_filters(ds)
+ filters = @@bot.filter_names(:htmlinfo)
+ return nil if filters.empty?
+ cur = nil
+ # TODO filter priority
+ filters.each { |n|
+ debug "testing filter #{n}"
+ cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds)
+ debug "returned #{cur.pretty_inspect}"
+ break if cur
+ }
+ return ds.merge(cur) if cur
+ end
+
+ # HTML info filters often need to check if the webpage location
+ # of a passed DataStream _ds_ matches a given Regexp.
+ def Utils.check_location(ds, rx)
+ debug ds[:headers]
+ if h = ds[:headers]
+ loc = [h['x-rbot-location'],h['location']].flatten.grep(rx)
+ end
+ loc ||= []
+ debug loc
+ return loc.empty? ? nil : loc
+ end
+
# This method extracts title and content (first par)
# from the given HTML or XML document _text_, using
# standard methods (String#ircify_html_title,
# uri_fragment:: the URI fragment of the original request
#
def Utils.get_string_html_info(text, opts={})
+ debug "getting string html info"
txt = text.dup
title = txt.ircify_html_title
+ debug opts
if frag = opts[:uri_fragment] and not frag.empty?
- fragreg = /.*?<a\s+[^>]*name=["']?#{frag}["']?.*?>/im
- txt.sub!(fragreg,'')
+ fragreg = /<a\s+[^>]*name=["']?#{frag}["']?[^>]*>/im
+ debug fragreg
+ debug txt
+ if txt.match(fragreg)
+ # grab the post-match
+ txt = $'
+ end
end
c_opts = opts.dup
c_opts[:strip] ||= title