+# encoding: UTF-8
#-- vim:sw=2:et
#++
#
'raquo' => '»',
'quot' => '"',
'apos' => '\'',
+ 'deg' => '°',
'micro' => 'µ',
'copy' => '©',
'trade' => '™',
'gt' => '>',
'hellip' => '…',
'nbsp' => ' ',
+ 'ndash' => '–',
'Agrave' => 'À',
'Aacute' => 'Á',
'Acirc' => 'Â',
# Some blogging and forum platforms use spans or divs with a 'body' or 'message' or 'text' in their class
# to mark actual text
- AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
+ AFTER_PAR1_REGEX = /<\w+\s+[^>]*(?:body|message|text|post)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im
# At worst, we can try stuff which is comprised between two <br>
AFTER_PAR2_REGEX = /<br(?:\s+[^>]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im
# Miscellaneous useful functions
module Utils
- @@bot = nil unless defined? @@bot
- @@safe_save_dir = nil unless defined?(@@safe_save_dir)
-
- # The bot instance
- def Utils.bot
- @@bot
- end
-
- # Set up some Utils routines which depend on the associated bot.
- def Utils.bot=(b)
- debug "initializing utils"
- @@bot = b
- @@safe_save_dir = @@bot.path('safe_save')
- end
-
-
# Seconds per minute
SEC_PER_MIN = 60
# Seconds per hour
when 0
raise "Empty ret array!"
when 1
- return ret.to_s
+ return ret[0].to_s
else
return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and "))
end
# Execute an external program, returning a String obtained by redirecting
# the program's standards errors and output
#
+ # TODO: find a way to expose some common errors (e.g. Errno::NOENT)
+ # to the caller
def Utils.safe_exec(command, *args)
- IO.popen("-") { |p|
+ output = IO.popen("-") { |p|
if p
- return p.readlines.join("\n")
+ break p.readlines.join("\n")
else
begin
$stderr.reopen($stdout)
exec(command, *args)
rescue Exception => e
- puts "exec of #{command} led to exception: #{e.pretty_inspect}"
- Kernel::exit! 0
+ puts "exception #{e.pretty_inspect} trying to run #{command}"
+ Kernel::exit! 1
end
puts "exec of #{command} failed"
- Kernel::exit! 0
+ Kernel::exit! 1
end
}
+ raise "safe execution of #{command} returned #{$?}" unless $?.success?
+ return output
end
-
- # Safely (atomically) save to _file_, by passing a tempfile to the block
- # and then moving the tempfile to its final location when done.
- #
- # call-seq: Utils.safe_save(file, &block)
- #
- def Utils.safe_save(file)
- raise 'No safe save directory defined!' if @@safe_save_dir.nil?
- basename = File.basename(file)
- temp = Tempfile.new(basename,@@safe_save_dir)
- temp.binmode
- yield temp if block_given?
- temp.close
- File.rename(temp.path, file)
+ # Try executing an external program, returning true if the run was successful
+ # and false otherwise
+ def Utils.try_exec(command, *args)
+ IO.popen("-") { |p|
+ if p.nil?
+ begin
+ $stderr.reopen($stdout)
+ exec(command, *args)
+ rescue Exception => e
+ Kernel::exit! 1
+ end
+ Kernel::exit! 1
+ else
+ debug p.readlines
+ end
+ }
+ debug $?
+ return $?.success?
end
# Decode HTML entities in the String _str_, using HTMLEntities if the
# package was found, or UNESCAPE_TABLE otherwise.
#
- def Utils.decode_html_entities(str)
- if defined? ::HTMLEntities
- return HTMLEntities.decode_entities(str)
+
+ if defined? ::HTMLEntities
+ if ::HTMLEntities.respond_to? :decode_entities
+ def Utils.decode_html_entities(str)
+ return HTMLEntities.decode_entities(str)
+ end
else
- str.gsub(/(&(.+?);)/) {
+ @@html_entities = HTMLEntities.new
+ def Utils.decode_html_entities(str)
+ return @@html_entities.decode str
+ end
+ end
+ else
+ def Utils.decode_html_entities(str)
+ return str.gsub(/(&(.+?);)/) {
symbol = $2
# remove the 0-paddng from unicode integers
- if symbol =~ /^#(\d+)$/
+ case symbol
+ when /^#x([0-9a-fA-F]+)$/
+ symbol = $1.to_i(16).to_s
+ when /^#(\d+)$/
symbol = $1.to_i.to_s
end
# HTML first par grabber without hpricot
def Utils.ircify_first_html_par_woh(xml_org, opts={})
- xml = xml_org.gsub(/<!--.*?-->/m, '').gsub(/<script(?:\s+[^>]*)?>.*?<\/script>/im, "").gsub(/<style(?:\s+[^>]*)?>.*?<\/style>/im, "")
+ xml = xml_org.gsub(/<!--.*?-->/m,
+ "").gsub(/<script(?:\s+[^>]*)?>.*?<\/script>/im,
+ "").gsub(/<style(?:\s+[^>]*)?>.*?<\/style>/im,
+ "").gsub(/<select(?:\s+[^>]*)?>.*?<\/select>/im,
+ "")
strip = opts[:strip]
strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
# information is retrieved, and special title/summary
# extraction routines are used if possible.
#
- def Utils.get_html_info(doc, opts={})
+ def Utils.get_html_info(bot, doc, opts={})
case doc
when String
Utils.get_string_html_info(doc, opts)
when Net::HTTPResponse
- Utils.get_resp_html_info(doc, opts)
+ Utils.get_resp_html_info(bot, doc, opts)
when URI
ret = DataStream.new
- @@bot.httputil.get_response(doc) { |resp|
- ret.replace Utils.get_resp_html_info(resp, opts)
+ bot.httputil.get_response(doc) { |resp|
+ ret.replace Utils.get_resp_html_info(bot, resp, opts)
}
return ret
else
# Currently, the only accepted options (in _opts_) are
# uri_fragment:: the URI fragment of the original request
# full_body:: get the whole body instead of
- # @@bot.config['http.info_bytes'] bytes only
+ # bot.config['http.info_bytes'] bytes only
#
# Returns a DataStream with the following keys:
# text:: the (partial) body
# a Hash whose keys are lowercase forms of the HTTP
# header fields, and whose values are Arrays.
#
- def Utils.get_resp_html_info(resp, opts={})
+ def Utils.get_resp_html_info(bot, resp, opts={})
case resp
when Net::HTTPSuccess
loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil
end
ret = DataStream.new(opts.dup)
ret[:headers] = resp.to_hash
- ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes'])
+ ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(bot.config['http.info_bytes'])
- filtered = Utils.try_htmlinfo_filters(ret)
+ filtered = Utils.try_htmlinfo_filters(bot, ret)
if filtered
return filtered
# The input DataStream should have the downloaded HTML as primary key
# (:text) and possibly a :headers key holding the resonse headers.
#
- def Utils.try_htmlinfo_filters(ds)
- filters = @@bot.filter_names(:htmlinfo)
+ def Utils.try_htmlinfo_filters(bot, ds)
+ filters = bot.filter_names(:htmlinfo)
return nil if filters.empty?
cur = nil
# TODO filter priority
filters.each { |n|
debug "testing htmlinfo filter #{n}"
- cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds)
+ cur = bot.filter(bot.global_filter_name(n, :htmlinfo), ds)
debug "returned #{cur.pretty_inspect}"
break if cur
}
# If (optional) _opts_ :message is specified, those paragraphs are
# echoed as replies to the IRC message passed as _opts_ :message
#
- def Utils.get_first_pars(urls, count, opts={})
+ def Utils.get_first_pars(bot, urls, count, opts={})
idx = 0
msg = opts[:message]
retval = Array.new
idx += 1
begin
- info = Utils.get_html_info(URI.parse(url), opts)
+ info = Utils.get_html_info(bot, URI.parse(url), opts)
par = info[:content]
retval.push(par)
end
end
-
-Irc::Utils.bot = Irc::Bot::Plugins.manager.bot