lib/rbot/core/utils/extends.rb

   1 #-- vim:sw=2:et
   2 #++
   3 #
   4 # :title: Standard classes extensions
   5 #
   6 # Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
   7 # Copyright:: (C) 2006,2007 Giuseppe Bilotta
   8 # License:: GPL v2
   9 #
  10 # This file collects extensions to standard Ruby classes and to some core rbot
  11 # classes to be used by the various plugins
  12 #
  13 # Please note that global symbols have to be prefixed by :: because this plugin
  14 # will be read into an anonymous module
  15
  16 # Extensions to the Module class
  17 #
  18 class ::Module
  19
  20   # Many plugins define Struct objects to hold their data. On rescans, lots of
  21   # warnings are echoed because of the redefinitions. Using this method solves
  22   # the problem, by checking if the Struct already exists, and if it has the
  23   # same attributes
  24   #
  25   def define_structure(name, *members)
  26     sym = name.to_sym
  27     if Struct.const_defined?(sym)
  28       kl = Struct.const_get(sym)
  29       if kl.new.members.map { |member| member.intern } == members.map
  30         debug "Struct #{sym} previously defined, skipping"
  31         const_set(sym, kl)
  32         return
  33       end
  34     end
  35     debug "Defining struct #{sym} with members #{members.inspect}"
  36     const_set(sym, Struct.new(name.to_s, *members))
  37   end
  38 end
  39
  40
  41 # DottedIndex mixin: extend a Hash or Array class with this module
  42 # to achieve [] and []= methods that automatically split indices
  43 # at dots (indices are automatically converted to symbols, too)
  44 #
  45 # You have to define the single_retrieve(_key_) and
  46 # single_assign(_key_,_value_) methods (usually aliased at the
  47 # original :[] and :[]= methods)
  48 #
  49 module ::DottedIndex
  50   def rbot_index_split(*ar)
  51     keys = ([] << ar).flatten
  52     keys.map! { |k|
  53       k.to_s.split('.').map { |kk| kk.to_sym rescue nil }.compact
  54     }.flatten
  55   end
  56
  57   def [](*ar)
  58     keys = self.rbot_index_split(ar)
  59     return self.single_retrieve(keys.first) if keys.length == 1
  60     h = self
  61     while keys.length > 1
  62       k = keys.shift
  63       h[k] ||= self.class.new
  64       h = h[k]
  65     end
  66     h[keys.last]
  67   end
  68
  69   def []=(*arr)
  70     val = arr.last
  71     ar = arr[0..-2]
  72     keys = self.rbot_index_split(ar)
  73     return self.single_assign(keys.first, val) if keys.length == 1
  74     h = self
  75     while keys.length > 1
  76       k = keys.shift
  77       h[k] ||= self.class.new
  78       h = h[k]
  79     end
  80     h[keys.last] = val
  81   end
  82 end
  83
  84
  85 # Extensions to the Array class
  86 #
  87 class ::Array
  88
  89   # This method returns a random element from the array, or nil if the array is
  90   # empty
  91   #
  92   def pick_one
  93     return nil if self.empty?
  94     self[rand(self.length)]
  95   end
  96
  97   # This method returns a random element from the array, deleting it from the
  98   # array itself. The method returns nil if the array is empty
  99   #
 100   def delete_one
 101     return nil if self.empty?
 102     self.delete_at(rand(self.length))
 103   end
 104 end
 105
 106 # Extensions to the Range class
 107 #
 108 class ::Range
 109
 110   # This method returns a random number between the lower and upper bound
 111   #
 112   def pick_one
 113     len = self.last - self.first
 114     len += 1 unless self.exclude_end?
 115     self.first + Kernel::rand(len)
 116   end
 117   alias :rand :pick_one
 118 end
 119
 120 # Extensions for the Numeric classes
 121 #
 122 class ::Numeric
 123
 124   # This method forces a real number to be not more than a given positive
 125   # number or not less than a given positive number, or between two any given
 126   # numbers
 127   #
 128   def clip(left,right=0)
 129     raise ArgumentError unless left.kind_of?(Numeric) and right.kind_of?(Numeric)
 130     l = [left,right].min
 131     u = [left,right].max
 132     return l if self < l
 133     return u if self > u
 134     return self
 135   end
 136 end
 137
 138 # Extensions to the String class
 139 #
 140 # TODO make riphtml() just call ircify_html() with stronger purify options.
 141 #
 142 class ::String
 143
 144   # This method will return a purified version of the receiver, with all HTML
 145   # stripped off and some of it converted to IRC formatting
 146   #
 147   def ircify_html(opts={})
 148     txt = self.dup
 149
 150     # remove scripts
 151     txt.gsub!(/<script(?:\s+[^>]*)?>.*?<\/script>/im, "")
 152
 153     # remove styles
 154     txt.gsub!(/<style(?:\s+[^>]*)?>.*?<\/style>/im, "")
 155
 156     # bold and strong -> bold
 157     txt.gsub!(/<\/?(?:b|strong)(?:\s+[^>]*)?>/im, "#{Bold}")
 158
 159     # italic, emphasis and underline -> underline
 160     txt.gsub!(/<\/?(?:i|em|u)(?:\s+[^>]*)?>/im, "#{Underline}")
 161
 162     ## This would be a nice addition, but the results are horrible
 163     ## Maybe make it configurable?
 164     # txt.gsub!(/<\/?a( [^>]*)?>/, "#{Reverse}")
 165     case val = opts[:a_href]
 166     when Reverse, Bold, Underline
 167       txt.gsub!(/<(?:\/a\s*|a (?:[^>]*\s+)?href\s*=\s*(?:[^>]*\s*)?)>/, val)
 168     when :link_out
 169       # Not good for nested links, but the best we can do without something like hpricot
 170       txt.gsub!(/<a (?:[^>]*\s+)?href\s*=\s*(?:([^"'>][^\s>]*)\s+|"((?:[^"]|\\")*)"|'((?:[^']|\\')*)')(?:[^>]*\s+)?>(.*?)<\/a>/) { |match|
 171         debug match
 172         debug [$1, $2, $3, $4].inspect
 173         link = $1 || $2 || $3
 174         str = $4
 175         str + ": " + link
 176       }
 177     else
 178       warning "unknown :a_href option #{val} passed to ircify_html" if val
 179     end
 180
 181     # Paragraph and br tags are converted to whitespace
 182     txt.gsub!(/<\/?(p|br)(?:\s+[^>]*)?\s*\/?\s*>/i, ' ')
 183     txt.gsub!("\n", ' ')
 184     txt.gsub!("\r", ' ')
 185
 186     # Superscripts and subscripts are turned into ^{...} and _{...}
 187     # where the {} are omitted for single characters
 188     txt.gsub!(/<sup>(.*?)<\/sup>/, '^{\1}')
 189     txt.gsub!(/<sub>(.*?)<\/sub>/, '_{\1}')
 190     txt.gsub!(/(^|_)\{(.)\}/, '\1\2')
 191
 192     # List items are converted to *). We don't have special support for
 193     # nested or ordered lists.
 194     txt.gsub!(/<li>/, ' *) ')
 195
 196     # All other tags are just removed
 197     txt.gsub!(/<[^>]+>/, '')
 198
 199     # Convert HTML entities. We do it now to be able to handle stuff
 200     # such as &nbsp;
 201     txt = Utils.decode_html_entities(txt)
 202
 203     # Keep unbreakable spaces or conver them to plain spaces?
 204     case val = opts[:nbsp]
 205     when :space, ' '
 206       txt.gsub!([160].pack('U'), ' ')
 207     else
 208       warning "unknown :nbsp option #{val} passed to ircify_html" if val
 209     end
 210
 211     # Remove double formatting options, since they only waste bytes
 212     txt.gsub!(/#{Bold}(\s*)#{Bold}/, '\1')
 213     txt.gsub!(/#{Underline}(\s*)#{Underline}/, '\1')
 214
 215     # Simplify whitespace that appears on both sides of a formatting option
 216     txt.gsub!(/\s+(#{Bold}|#{Underline})\s+/, ' \1')
 217     txt.sub!(/\s+(#{Bold}|#{Underline})\z/, '\1')
 218     txt.sub!(/\A(#{Bold}|#{Underline})\s+/, '\1')
 219
 220     # And finally whitespace is squeezed
 221     txt.gsub!(/\s+/, ' ')
 222     txt.strip!
 223
 224     if opts[:limit] && txt.size > opts[:limit]
 225       txt = txt.slice(0, opts[:limit]) + "#{Reverse}...#{Reverse}"
 226     end
 227
 228     # Decode entities and strip whitespace
 229     return txt
 230   end
 231
 232   # As above, but modify the receiver
 233   #
 234   def ircify_html!(opts={})
 235     old_hash = self.hash
 236     replace self.ircify_html(opts)
 237     return self unless self.hash == old_hash
 238   end
 239
 240   # This method will strip all HTML crud from the receiver
 241   #
 242   def riphtml
 243     self.gsub(/<[^>]+>/, '').gsub(/&amp;/,'&').gsub(/&quot;/,'"').gsub(/&lt;/,'<').gsub(/&gt;/,'>').gsub(/&ellip;/,'...').gsub(/&apos;/, "'").gsub("\n",'')
 244   end
 245
 246   # This method tries to find an HTML title in the string,
 247   # and returns it if found
 248   def get_html_title
 249     if defined? ::Hpricot
 250       Hpricot(self).at("title").inner_html
 251     else
 252       return unless Irc::Utils::TITLE_REGEX.match(self)
 253       $1
 254     end
 255   end
 256
 257   # This method returns the IRC-formatted version of an
 258   # HTML title found in the string
 259   def ircify_html_title
 260     self.get_html_title.ircify_html rescue nil
 261   end
 262 end
 263
 264
 265 # Extensions to the Regexp class, with some common and/or complex regular
 266 # expressions.
 267 #
 268 class ::Regexp
 269
 270   # A method to build a regexp that matches a list of something separated by
 271   # optional commas and/or the word "and", an optionally repeated prefix,
 272   # and whitespace.
 273   def Regexp.new_list(reg, pfx = "")
 274     if pfx.kind_of?(String) and pfx.empty?
 275       return %r(#{reg}(?:,?(?:\s+and)?\s+#{reg})*)
 276     else
 277       return %r(#{reg}(?:,?(?:\s+and)?(?:\s+#{pfx})?\s+#{reg})*)
 278     end
 279   end
 280
 281   IN_ON = /in|on/
 282
 283   module Irc
 284     # Match a list of channel anmes separated by optional commas, whitespace
 285     # and optionally the word "and"
 286     CHAN_LIST = Regexp.new_list(GEN_CHAN)
 287
 288     # Match "in #channel" or "on #channel" and/or "in private" (optionally
 289     # shortened to "in pvt"), returning the channel name or the word 'private'
 290     # or 'pvt' as capture
 291     IN_CHAN = /#{IN_ON}\s+(#{GEN_CHAN})|(here)|/
 292     IN_CHAN_PVT = /#{IN_CHAN}|in\s+(private|pvt)/
 293
 294     # As above, but with channel lists
 295     IN_CHAN_LIST_SFX = Regexp.new_list(/#{GEN_CHAN}|here/, IN_ON)
 296     IN_CHAN_LIST = /#{IN_ON}\s+#{IN_CHAN_LIST_SFX}|anywhere|everywhere/
 297     IN_CHAN_LIST_PVT_SFX = Regexp.new_list(/#{GEN_CHAN}|here|private|pvt/, IN_ON)
 298     IN_CHAN_LIST_PVT = /#{IN_ON}\s+#{IN_CHAN_LIST_PVT_SFX}|anywhere|everywhere/
 299
 300     # Match a list of nicknames separated by optional commas, whitespace and
 301     # optionally the word "and"
 302     NICK_LIST = Regexp.new_list(GEN_NICK)
 303
 304   end
 305
 306 end
 307
 308
 309 module ::Irc
 310
 311
 312   class BasicUserMessage
 313
 314     # We extend the BasicUserMessage class with a method that parses a string
 315     # which is a channel list as matched by IN_CHAN(_LIST) and co. The method
 316     # returns an array of channel names, where 'private' or 'pvt' is replaced
 317     # by the Symbol :"?", 'here' is replaced by the channel of the message or
 318     # by :"?" (depending on whether the message target is the bot or a
 319     # Channel), and 'anywhere' and 'everywhere' are replaced by Symbol :*
 320     #
 321     def parse_channel_list(string)
 322       return [:*] if [:anywhere, :everywhere].include? string.to_sym
 323       string.scan(
 324       /(?:^|,?(?:\s+and)?\s+)(?:in|on\s+)?(#{Regexp::Irc::GEN_CHAN}|here|private|pvt)/
 325                  ).map { |chan_ar|
 326         chan = chan_ar.first
 327         case chan.to_sym
 328         when :private, :pvt
 329           :"?"
 330         when :here
 331           case self.target
 332           when Channel
 333             self.target.name
 334           else
 335             :"?"
 336           end
 337         else
 338           chan
 339         end
 340       }.uniq
 341     end
 342   end
 343 end