X-Git-Url: https://git.netwichtig.de/gitweb/?a=blobdiff_plain;f=lib%2Frbot%2Firc.rb;h=c2c1e82fdd518c30889a1eed9cad0830857ea9de;hb=c705ba5a89cd7b5c19677f4950c9784828ffc5c6;hp=6d46f75fc3500a47730bb30c0d3382378699f900;hpb=82b553fe5a5ad6406045229d5929c6328da6ecc2;p=user%2Fhenk%2Fcode%2Fruby%2Frbot.git diff --git a/lib/rbot/irc.rb b/lib/rbot/irc.rb index 6d46f75f..c2c1e82f 100644 --- a/lib/rbot/irc.rb +++ b/lib/rbot/irc.rb @@ -475,6 +475,103 @@ class ArrayOf < Array end +# We extend the Regexp class with an Irc module which will contain some +# Irc-specific regexps +# +class Regexp + + # We start with some general-purpose ones which will be used in the + # Irc module too, but are useful regardless + DIGITS = /\d+/ + HEX_DIGIT = /[0-9A-Fa-f]/ + HEX_DIGITS = /#{HEX_DIGIT}+/ + HEX_OCTET = /#{HEX_DIGIT}#{HEX_DIGIT}?/ + DEC_OCTET = /[01]?\d?\d|2[0-4]\d|25[0-5]/ + DEC_IP_ADDR = /#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}/ + HEX_IP_ADDR = /#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}/ + IP_ADDR = /#{DEC_IP_ADDR}|#{HEX_IP_ADDR}/ + + # IPv6, from Resolv::IPv6, without the \A..\z anchors + HEX_16BIT = /#{HEX_DIGIT}{1,4}/ + IP6_8Hex = /(?:#{HEX_16BIT}:){7}#{HEX_16BIT}/ + IP6_CompressedHex = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)/ + IP6_6Hex4Dec = /((?:#{HEX_16BIT}:){6,6})#{DEC_IP_ADDR}/ + IP6_CompressedHex4Dec = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}:)*)#{DEC_IP_ADDR}/ + IP6_ADDR = /(?:#{IP6_8Hex})|(?:#{IP6_CompressedHex})|(?:#{IP6_6Hex4Dec})|(?:#{IP6_CompressedHex4Dec})/ + + # We start with some IRC related regular expressions, used to match + # Irc::User nicks and users and Irc::Channel names + # + # For each of them we define two versions of the regular expression: + # * a generic one, which should match for any server but may turn out to + # match more than a specific server would accept + # * an RFC-compliant matcher + # + module Irc + + # Channel-name-matching regexps + CHAN_FIRST = /[#&+]/ + CHAN_SAFE = /![A-Z0-9]{5}/ + CHAN_ANY = /[^\x00\x07\x0A\x0D ,:]/ + GEN_CHAN = /(?:#{CHAN_FIRST}|#{CHAN_SAFE})#{CHAN_ANY}+/ + RFC_CHAN = /#{CHAN_FIRST}#{CHAN_ANY}{1,49}|#{CHAN_SAFE}#{CHAN_ANY}{1,44}/ + + # Nick-matching regexps + SPECIAL_CHAR = /[\x5b-\x60\x7b-\x7d]/ + NICK_FIRST = /#{SPECIAL_CHAR}|[[:alpha:]]/ + NICK_ANY = /#{SPECIAL_CHAR}|[[:alnum:]]|-/ + GEN_NICK = /#{NICK_FIRST}#{NICK_ANY}+/ + RFC_NICK = /#{NICK_FIRST}#{NICK_ANY}{0,8}/ + + USER_CHAR = /[^\x00\x0a\x0d @]/ + GEN_USER = /#{USER_CHAR}+/ + + # Host-matching regexps + HOSTNAME_COMPONENT = /[[:alnum:]](?:[[:alnum:]]|-)*[[:alnum:]]*/ + HOSTNAME = /#{HOSTNAME_COMPONENT}(?:\.#{HOSTNAME_COMPONENT})*/ + HOSTADDR = /#{IP_ADDR}|#{IP6_ADDR}/ + + GEN_HOST = /#{HOSTNAME}|#{HOSTADDR}/ + + # # FreeNode network replaces the host of affiliated users with + # # 'virtual hosts' + # # FIXME we need the true syntax to match it properly ... + # PDPC_HOST_PART = /[0-9A-Za-z.-]+/ + # PDPC_HOST = /#{PDPC_HOST_PART}(?:\/#{PDPC_HOST_PART})+/ + + # # NOTE: the final optional and non-greedy dot is needed because some + # # servers (e.g. FreeNode) send the hostname of the services as "services." + # # which is not RFC compliant, but sadly done. + # GEN_HOST_EXT = /#{PDPC_HOST}|#{GEN_HOST}\.??/ + + # Sadly, different networks have different, RFC-breaking ways of cloaking + # the actualy host address: see above for an example to handle FreeNode. + # Another example would be Azzurra, wich also inserts a "=" in the + # cloacked host. So let's just not care about this and go with the simplest + # thing: + GEN_HOST_EXT = /\S+/ + + # User-matching Regexp + GEN_USER_ID = /(#{GEN_NICK})(?:(?:!(#{GEN_USER}))?@(#{GEN_HOST_EXT}))?/ + + # Things such has the BIP proxy send invalid nicks in a complete netmask, + # so we want to match this, rather: this matches either a compliant nick + # or a a string with a very generic nick, a very generic hostname after an + # @ sign, and an optional user after a ! + BANG_AT = /#{GEN_NICK}|\S+?(?:!\S+?)?@\S+?/ + + # # For Netmask, we want to allow wildcards * and ? in the nick + # # (they are already allowed in the user and host part + # GEN_NICK_MASK = /(?:#{NICK_FIRST}|[?*])?(?:#{NICK_ANY}|[?*])+/ + + # # Netmask-matching Regexp + # GEN_MASK = /(#{GEN_NICK_MASK})(?:(?:!(#{GEN_USER}))?@(#{GEN_HOST_EXT}))?/ + + end + +end + + module Irc @@ -517,7 +614,9 @@ module Irc # Now we can see if the given string _str_ is an actual Netmask if str.respond_to?(:to_str) case str.to_str - when /^(?:(\S+?)(?:!(\S+)@(?:(\S+))?)?)?$/ + # We match a pretty generic string, to work around non-compliant + # servers + when /^(?:(\S+?)(?:(?:!(\S+?))?@(\S+))?)?$/ # We do assignment using our internal methods self.nick = $1 self.user = $2 @@ -1331,8 +1430,8 @@ module Irc :typec => nil, # Type C: needs a parameter when set :typed => nil # Type D: must not have a parameter }, - :channellen => 200, - :chantypes => "#&", + :channellen => 50, + :chantypes => "#&!+", :excepts => nil, :idchan => {}, :invex => nil, @@ -1370,6 +1469,7 @@ module Irc def clear reset_lists reset_capabilities + @hostname = @version = @usermodes = @chanmodes = nil end # This method is used to parse a 004 RPL_MY_INFO line