diff options
-rw-r--r-- | lib/rbot/core/utils/extends.rb | 36 | ||||
-rw-r--r-- | lib/rbot/irc.rb | 78 | ||||
-rw-r--r-- | lib/rbot/rfc2812.rb | 7 |
3 files changed, 84 insertions, 37 deletions
diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb index e882148b..c43f3f3b 100644 --- a/lib/rbot/core/utils/extends.rb +++ b/lib/rbot/core/utils/extends.rb @@ -95,23 +95,9 @@ class ::Regexp IN_ON = /in|on/ - # We start with some IRC related regular expressions, used to match - # Irc::User nicks and Irc::Channel names - # - # For each of them we define three versions of the regular expression: - # * a generic one, which should match for any server but may turn out to - # match more than a specific server would accept - # * an RFC-compliant matcher - # * TODO a server-specific one that uses the Irc::Server#supports method to build - # a matcher valid for a particular server. - # module Irc - CHAN_FIRST = /[#&+]/ - CHAN_SAFE = /![A-Z0-9]{5}/ - CHAN_ANY = /[^\x00\x07\x0A\x0D ,:]/ - GEN_CHAN = /(?:#{CHAN_FIRST}|#{CHAN_SAFE})#{CHAN_ANY}+/ - RFC_CHAN = /#{CHAN_FIRST}#{CHAN_ANY}{1,49}|#{CHAN_SAFE}#{CHAN_ANY}{1,44}/ - + # Match a list of channel anmes separated by optional commas, whitespace + # and optionally the word "and" CHAN_LIST = Regexp.new_list(GEN_CHAN) # Match "in #channel" or "on #channel" and/or "in private" (optionally @@ -126,28 +112,12 @@ class ::Regexp IN_CHAN_LIST_PVT_SFX = Regexp.new_list(/#{GEN_CHAN}|here|private|pvt/, IN_ON) IN_CHAN_LIST_PVT = /#{IN_ON}\s+#{IN_CHAN_LIST_PVT_SFX}|anywhere|everywhere/ - SPECIAL_CHAR = /[\x5b-\x60\x7b-\x7d]/ - NICK_FIRST = /#{SPECIAL_CHAR}|[[:alpha:]]/ - NICK_ANY = /#{SPECIAL_CHAR}|[[:alnum:]]|-/ - GEN_NICK = /#{NICK_FIRST}#{NICK_ANY}+/ - RFC_NICK = /#{NICK_FIRST}#{NICK_ANY}{0,8}/ - # Match a list of nicknames separated by optional commas, whitespace and # optionally the word "and" - NICK_LIST = Regexp.new_list(GEN_CHAN) + NICK_LIST = Regexp.new_list(GEN_NICK) end - # Next, some general purpose ones - DIGITS = /\d+/ - HEX_DIGIT = /[0-9A-Fa-f]/ - HEX_DIGITS = /#{HEX_DIGIT}+/ - HEX_OCTET = /#{HEX_DIGIT}#{HEX_DIGIT}?/ - DEC_OCTET = /[01]?\d?\d|2[0-4]\d|25[0-5]/ - DEC_IP_ADDR = /#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}/ - HEX_IP_ADDR = /#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}/ - IP_ADDR = /#{DEC_IP_ADDR}|#{HEX_IP_ADDR}/ - end diff --git a/lib/rbot/irc.rb b/lib/rbot/irc.rb index a0ffbd91..1c9d4dcd 100644 --- a/lib/rbot/irc.rb +++ b/lib/rbot/irc.rb @@ -475,6 +475,82 @@ class ArrayOf < Array end
+# We extend the Regexp class with an Irc module which will contain some
+# Irc-specific regexps
+#
+class Regexp
+
+ # We start with some general-purpose ones which will be used in the
+ # Irc module too, but are useful regardless
+ DIGITS = /\d+/
+ HEX_DIGIT = /[0-9A-Fa-f]/
+ HEX_DIGITS = /#{HEX_DIGIT}+/
+ HEX_OCTET = /#{HEX_DIGIT}#{HEX_DIGIT}?/
+ DEC_OCTET = /[01]?\d?\d|2[0-4]\d|25[0-5]/
+ DEC_IP_ADDR = /#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}/
+ HEX_IP_ADDR = /#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}/
+ IP_ADDR = /#{DEC_IP_ADDR}|#{HEX_IP_ADDR}/
+
+ # IPv6, from Resolv::IPv6, without the \A..\z anchors
+ HEX_16BIT = /#{HEX_DIGIT}{1,4}/
+ IP6_8Hex = /(?:#{HEX_16BIT}:){7}#{HEX_16BIT}/
+ IP6_CompressedHex = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)/
+ IP6_6Hex4Dec = /((?:#{HEX_16BIT}:){6,6})#{DEC_IP_ADDR}/
+ IP6_CompressedHex4Dec = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}:)*)#{DEC_IP_ADDR}/
+ IP6_ADDR = /(?:#{IP6_8Hex})|(?:#{IP6_CompressedHex})|(?:#{IP6_6Hex4Dec})|(?:#{IP6_CompressedHex4Dec})/
+
+ # We start with some IRC related regular expressions, used to match
+ # Irc::User nicks and users and Irc::Channel names
+ #
+ # For each of them we define two versions of the regular expression:
+ # * a generic one, which should match for any server but may turn out to
+ # match more than a specific server would accept
+ # * an RFC-compliant matcher
+ #
+ module Irc
+
+ # Channel-name-matching regexps
+ CHAN_FIRST = /[#&+]/
+ CHAN_SAFE = /![A-Z0-9]{5}/
+ CHAN_ANY = /[^\x00\x07\x0A\x0D ,:]/
+ GEN_CHAN = /(?:#{CHAN_FIRST}|#{CHAN_SAFE})#{CHAN_ANY}+/
+ RFC_CHAN = /#{CHAN_FIRST}#{CHAN_ANY}{1,49}|#{CHAN_SAFE}#{CHAN_ANY}{1,44}/
+
+ # Nick-matching regexps
+ SPECIAL_CHAR = /[\x5b-\x60\x7b-\x7d]/
+ NICK_FIRST = /#{SPECIAL_CHAR}|[[:alpha:]]/
+ NICK_ANY = /#{SPECIAL_CHAR}|[[:alnum:]]|-/
+ GEN_NICK = /#{NICK_FIRST}#{NICK_ANY}+/
+ RFC_NICK = /#{NICK_FIRST}#{NICK_ANY}{0,8}/
+
+ USER_CHAR = /[^\x00\x0a\x0d @]/
+ GEN_USER = /#{USER_CHAR}+/
+
+ # Host-matching regexps
+ HOSTNAME_COMPONENT = /[[:alnum:]](?:[[:alnum:]]|-)*[[:alnum:]]*/
+ HOSTNAME = /#{HOSTNAME_COMPONENT}(?:\.#{HOSTNAME_COMPONENT})*/
+ HOSTADDR = /#{IP_ADDR}|#{IP6_ADDR}/
+
+ GEN_HOST = /#{HOSTNAME}|#{HOSTADDR}/
+
+ # FreeNode network replaces the host of affiliated users with
+ # 'virtual hosts'
+ # FIXME we need the true syntax to match it properly ...
+ PDPC_HOST_PART = /[0-9A-Za-z.-]+/
+ PDPC_HOST = /#{PDPC_HOST_PART}(?:\/#{PDPC_HOST_PART})+/
+
+ # NOTE: the final optional and non-greedy dot is needed because some
+ # servers (e.g. FreeNode) send the hostname of the services as "services."
+ # which is not RFC compliant, but sadly done.
+ GEN_MASK_HOST = /#{PDPC_HOST}|#{GEN_HOST}\.??/
+
+ # Netmask-matching Regexp
+ GEN_MASK = /(#{GEN_NICK})(?:(?:!(#{GEN_USER}))?@(#{GEN_MASK_HOST}))?/
+ end
+
+end
+
+
module Irc
@@ -517,7 +593,7 @@ module Irc # Now we can see if the given string _str_ is an actual Netmask
if str.respond_to?(:to_str)
case str.to_str
- when /^(?:(\S+?)(?:!(\S+)@(?:(\S+))?)?)?$/
+ when /^(?:#{Regexp::Irc::GEN_MASK})?$/
# We do assignment using our internal methods
self.nick = $1
self.user = $2
diff --git a/lib/rbot/rfc2812.rb b/lib/rbot/rfc2812.rb index 97181b03..efa50035 100644 --- a/lib/rbot/rfc2812.rb +++ b/lib/rbot/rfc2812.rb @@ -902,13 +902,14 @@ module Irc # This is not always true, though, since some servers do not send a # full hostmask for user messages. # - if prefix =~ /^(?:\S+)(?:!\S+)?@(?:\S+)$/ + if prefix =~ /^#{Regexp::Irc::GEN_MASK}$/ data[:source] = @server.user(prefix) else if @server.hostname if @server.hostname != prefix - debug "Origin #{prefix} for message\n\t#{serverstring.inspect}\nis neither a user hostmask nor the server hostname, assuming it's a nick" - data[:source] = @server.user(prefix) + # TODO do we want to be able to differentiated messages that are passed on to us from /other/ servers? + debug "Origin #{prefix} for message\n\t#{serverstring.inspect}\nis neither a user hostmask nor the server hostname\nI'll pretend that it's from the server anyway" + data[:source] = @server else data[:source] = @server end |