1 /* +------------------------------------+
2 * | Inspire Internet Relay Chat Daemon |
3 * +------------------------------------+
5 * InspIRCd: (C) 2002-2009 InspIRCd Development Team
6 * See: http://wiki.inspircd.org/Credits
8 * This program is free but copyrighted software; see
9 * the file COPYING for details.
11 * ---------------------------------------------------
20 /*******************************************************
21 * This file contains classes and templates that deal
22 * with the comparison and hashing of 'irc strings'.
23 * An 'irc string' is a string which compares in a
24 * case insensitive manner, and as per RFC 1459 will
25 * treat [ identical to {, ] identical to }, and \
28 * Our hashing functions are designed to accept
29 * std::string and compare/hash them as type irc::string
30 * by converting them internally. This makes them
31 * backwards compatible with other code which is not
32 * aware of irc::string.
33 *******************************************************/
38 /** A mapping of uppercase to lowercase, including scandinavian
39 * 'oddities' as specified by RFC1459, e.g. { -> [, and | -> \
41 unsigned const char rfc_case_insensitive_map[256] = {
42 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 0-19 */
43 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, /* 20-39 */
44 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, /* 40-59 */
45 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, /* 60-79 */
46 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 94, 95, 96, 97, 98, 99, /* 80-99 */
47 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, /* 100-119 */
48 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, /* 120-139 */
49 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, /* 140-159 */
50 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, /* 160-179 */
51 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, /* 180-199 */
52 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, /* 200-219 */
53 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, /* 220-239 */
54 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 /* 240-255 */
57 /** Seperate from the other casemap tables so that code *can* still exclusively rely on RFC casemapping
60 * This is provided as a pointer so that modules can change it to their custom mapping tables,
61 * e.g. for national character support.
63 CoreExport extern unsigned const char *national_case_insensitive_map;
65 /** Case insensitive map, ASCII rules.
69 unsigned const char ascii_case_insensitive_map[256] = {
70 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 0-19 */
71 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, /* 20-39 */
72 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, /* 40-59 */
73 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, /* 60-79 */
74 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, /* 80-99 */
75 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, /* 100-119 */
76 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, /* 120-139 */
77 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, /* 140-159 */
78 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, /* 160-179 */
79 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, /* 180-199 */
80 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, /* 200-219 */
81 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, /* 220-239 */
82 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 /* 240-255 */
85 /** Case sensitive map.
86 * Can technically also be used for ASCII case sensitive comparisons, as [ != {, etc.
88 unsigned const char rfc_case_sensitive_map[256] = {
89 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
90 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
91 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
92 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
93 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
94 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
95 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
96 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
97 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
98 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
99 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
100 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
101 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
106 template<typename T> const T& SearchAndReplace(T& text, const T& pattern, const T& replace)
109 if ((!pattern.empty()) && (!text.empty()))
111 for (std::string::size_type n = 0; n != text.length(); ++n)
113 if (text.length() >= pattern.length() && text.substr(n, pattern.length()) == pattern)
115 /* Found the pattern in the text, replace it, and advance */
116 replacement.append(replace);
117 n = n + pattern.length() - 1;
121 replacement += text[n];
129 /** The irc namespace contains a number of helper classes.
134 /** This class returns true if two strings match.
135 * Case sensitivity is ignored, and the RFC 'character set'
140 /** The operator () does the actual comparison in hash_map
142 bool operator()(const std::string& s1, const std::string& s2) const;
145 /** The irc_char_traits class is used for RFC-style comparison of strings.
146 * This class is used to implement irc::string, a case-insensitive, RFC-
147 * comparing string class.
149 struct irc_char_traits : std::char_traits<char> {
151 /** Check if two chars match.
152 * @param c1st First character
153 * @param c2nd Second character
154 * @return true if the characters are equal
156 static bool eq(char c1st, char c2nd);
158 /** Check if two chars do NOT match.
159 * @param c1st First character
160 * @param c2nd Second character
161 * @return true if the characters are unequal
163 static bool ne(char c1st, char c2nd);
165 /** Check if one char is less than another.
166 * @param c1st First character
167 * @param c2nd Second character
168 * @return true if c1st is less than c2nd
170 static bool lt(char c1st, char c2nd);
172 /** Compare two strings of size n.
173 * @param str1 First string
174 * @param str2 Second string
175 * @param n Length to compare to
176 * @return similar to strcmp, zero for equal, less than zero for str1
177 * being less and greater than zero for str1 being greater than str2.
179 static CoreExport int compare(const char* str1, const char* str2, size_t n);
181 /** Find a char within a string up to position n.
182 * @param s1 String to find in
183 * @param n Position to search up to
184 * @param c Character to search for
185 * @return Pointer to the first occurance of c in s1
187 static CoreExport const char* find(const char* s1, int n, char c);
190 /** Compose a hex string from raw data.
191 * @param raw The raw data to compose hex from
192 * @pram rawsz The size of the raw data buffer
193 * @return The hex string.
195 CoreExport std::string hex(const unsigned char *raw, size_t rawsz);
197 /** This typedef declares irc::string based upon irc_char_traits.
199 typedef std::basic_string<char, irc_char_traits, std::allocator<char> > string;
201 /** irc::stringjoiner joins string lists into a string, using
202 * the given seperator string.
203 * This class can join a vector of std::string, a deque of
204 * std::string, or a const char* const* array, using overloaded
207 class CoreExport stringjoiner : public classbase
217 /** Join elements of a vector, between (and including) begin and end
218 * @param seperator The string to seperate values with
219 * @param sequence One or more items to seperate
220 * @param begin The starting element in the sequence to be joined
221 * @param end The ending element in the sequence to be joined
223 stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end);
225 /** Join elements of a deque, between (and including) begin and end
226 * @param seperator The string to seperate values with
227 * @param sequence One or more items to seperate
228 * @param begin The starting element in the sequence to be joined
229 * @param end The ending element in the sequence to be joined
231 stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end);
233 /** Join elements of an array of char arrays, between (and including) begin and end
234 * @param seperator The string to seperate values with
235 * @param sequence One or more items to seperate
236 * @param begin The starting element in the sequence to be joined
237 * @param end The ending element in the sequence to be joined
239 stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end);
241 /** Get the joined sequence
242 * @return A reference to the joined string
244 std::string& GetJoined();
247 /** irc::modestacker stacks mode sequences into a list.
248 * It can then reproduce this list, clamped to a maximum of MAXMODES
251 class CoreExport modestacker : public classbase
254 InspIRCd* ServerInstance;
256 /** The mode sequence and its parameters
258 std::deque<std::string> sequence;
260 /** True if the mode sequence is initially adding
261 * characters, false if it is initially removing
267 /** Construct a new modestacker.
268 * @param add True if the stack is adding modes,
269 * false if it is removing them
271 modestacker(InspIRCd* Instance, bool add);
273 /** Push a modeletter and its parameter onto the stack.
274 * No checking is performed as to if this mode actually
275 * requires a parameter. If you stack invalid mode
276 * sequences, they will be tidied if and when they are
277 * passed to a mode parser.
278 * @param modeletter The mode letter to insert
279 * @param parameter The parameter for the mode
281 void Push(char modeletter, const std::string ¶meter);
283 /** Push a modeletter without parameter onto the stack.
284 * No checking is performed as to if this mode actually
285 * requires a parameter. If you stack invalid mode
286 * sequences, they will be tidied if and when they are
287 * passed to a mode parser.
288 * @param modeletter The mode letter to insert
290 void Push(char modeletter);
292 /** Push a '+' symbol onto the stack.
296 /** Push a '-' symbol onto the stack.
300 /** Return zero or more elements which form the
301 * mode line. This will be clamped to a max of
302 * MAXMODES items (MAXMODES-1 mode parameters and
303 * one mode sequence string), and max_line_size
304 * characters. As specified below, this function
305 * should be called in a loop until it returns zero,
306 * indicating there are no more modes to return.
307 * @param result The vector to populate. This will not
308 * be cleared before it is used.
309 * @param max_line_size The maximum size of the line
310 * to build, in characters, seperate to MAXMODES.
311 * @return The number of elements in the deque.
312 * The function should be called repeatedly until it
313 * returns 0, in case there are multiple lines of
314 * mode changes to be obtained.
316 int GetStackedLine(std::vector<std::string> &result, int max_line_size = 360);
318 /** deprecated compatability interface - TODO remove */
319 int GetStackedLine(std::deque<std::string> &result, int max_line_size = 360) {
320 std::vector<std::string> r;
321 int n = GetStackedLine(r, max_line_size);
323 result.insert(result.end(), r.begin(), r.end());
328 /** irc::tokenstream reads a string formatted as per RFC1459 and RFC2812.
329 * It will split the string into 'tokens' each containing one parameter
331 * For instance, if it is instantiated with the string:
332 * "PRIVMSG #test :foo bar baz qux"
333 * then each successive call to tokenstream::GetToken() will return
334 * "PRIVMSG", "#test", "foo bar baz qux", "".
335 * Note that if the whole string starts with a colon this is not taken
336 * to mean the string is all one parameter, and the first item in the
337 * list will be ":item". This is to allow for parsing 'source' fields
340 class CoreExport tokenstream : public classbase
348 /** Last position of a seperator token
350 std::string::iterator last_starting_position;
352 /** Current string position
354 std::string::iterator n;
356 /** True if the last value was an ending value
361 /** Create a tokenstream and fill it with the provided data
363 tokenstream(const std::string &source);
369 /** Fetch the next token from the stream as a std::string
370 * @param token The next token available, or an empty string if none remain
371 * @return True if tokens are left to be read, false if the last token was just retrieved.
373 bool GetToken(std::string &token);
375 /** Fetch the next token from the stream as an irc::string
376 * @param token The next token available, or an empty string if none remain
377 * @return True if tokens are left to be read, false if the last token was just retrieved.
379 bool GetToken(irc::string &token);
381 /** Fetch the next token from the stream as an integer
382 * @param token The next token available, or undefined if none remain
383 * @return True if tokens are left to be read, false if the last token was just retrieved.
385 bool GetToken(int &token);
387 /** Fetch the next token from the stream as a long integer
388 * @param token The next token available, or undefined if none remain
389 * @return True if tokens are left to be read, false if the last token was just retrieved.
391 bool GetToken(long &token);
394 /** irc::sepstream allows for splitting token seperated lists.
395 * Each successive call to sepstream::GetToken() returns
396 * the next token, until none remain, at which point the method returns
399 class CoreExport sepstream : public classbase
405 /** Last position of a seperator token
407 std::string::iterator last_starting_position;
408 /** Current string position
410 std::string::iterator n;
415 /** Create a sepstream and fill it with the provided data
417 sepstream(const std::string &source, char seperator);
421 virtual ~sepstream();
423 /** Fetch the next token from the stream
424 * @param token The next token from the stream is placed here
425 * @return True if tokens still remain, false if there are none left
427 virtual bool GetToken(std::string &token);
429 /** Fetch the entire remaining stream, without tokenizing
430 * @return The remaining part of the stream
432 virtual const std::string GetRemaining();
434 /** Returns true if the end of the stream has been reached
435 * @return True if the end of the stream has been reached, otherwise false
437 virtual bool StreamEnd();
440 /** A derived form of sepstream, which seperates on commas
442 class CoreExport commasepstream : public sepstream
445 /** Initialize with comma seperator
447 commasepstream(const std::string &source) : sepstream(source, ',')
452 /** A derived form of sepstream, which seperates on spaces
454 class CoreExport spacesepstream : public sepstream
457 /** Initialize with space seperator
459 spacesepstream(const std::string &source) : sepstream(source, ' ')
464 /** The portparser class seperates out a port range into integers.
465 * A port range may be specified in the input string in the form
466 * "6660,6661,6662-6669,7020". The end of the stream is indicated by
467 * a return value of 0 from portparser::GetToken(). If you attempt
468 * to specify an illegal range (e.g. one where start >= end, or
469 * start or end < 0) then GetToken() will return the first element
470 * of the pair of numbers.
472 class CoreExport portparser : public classbase
476 /** Used to split on commas
480 /** Current position in a range of ports
484 /** Starting port in a range of ports
488 /** Ending port in a range of ports
492 /** Allow overlapped port ranges
496 /** Used to determine overlapping of ports
497 * without O(n) algorithm being used
499 std::map<long, bool> overlap_set;
501 /** Returns true if val overlaps an existing range
503 bool Overlaps(long val);
506 /** Create a portparser and fill it with the provided data
507 * @param source The source text to parse from
508 * @param allow_overlapped Allow overlapped ranges
510 portparser(const std::string &source, bool allow_overlapped = true);
512 /** Frees the internal commasepstream object
516 /** Fetch the next token from the stream
517 * @return The next port number is returned, or 0 if none remain
522 /** Turn _ characters in a string into spaces
523 * @param n String to translate
524 * @return The new value with _ translated to space.
526 CoreExport const char* Spacify(const char* n);
529 /* Define operators for using >> and << with irc::string to an ostream on an istream. */
530 /* This was endless fun. No. Really. */
531 /* It was also the first core change Ommeh made, if anyone cares */
533 /** Operator << for irc::string
535 inline std::ostream& operator<<(std::ostream &os, const irc::string &str) { return os << str.c_str(); }
537 /** Operator >> for irc::string
539 inline std::istream& operator>>(std::istream &is, irc::string &str)
547 /* Define operators for + and == with irc::string to std::string for easy assignment
552 inline std::string operator+ (std::string& leftval, irc::string& rightval)
554 return leftval + std::string(rightval.c_str());
557 /* Define operators for + and == with irc::string to std::string for easy assignment
562 inline irc::string operator+ (irc::string& leftval, std::string& rightval)
564 return leftval + irc::string(rightval.c_str());
567 /* Define operators for + and == with irc::string to std::string for easy assignment
572 inline bool operator== (const std::string& leftval, const irc::string& rightval)
574 return (leftval.c_str() == rightval);
577 /* Define operators for + and == with irc::string to std::string for easy assignment
582 inline bool operator== (const irc::string& leftval, const std::string& rightval)
584 return (leftval == rightval.c_str());
587 /* Define operators != for irc::string to std::string for easy comparison
589 inline bool operator!= (const irc::string& leftval, const std::string& rightval)
591 return !(leftval == rightval.c_str());
594 /* Define operators != for std::string to irc::string for easy comparison
596 inline bool operator!= (const std::string& leftval, const irc::string& rightval)
598 return !(leftval.c_str() == rightval);
601 // FIXME MAXBUF messes up these
603 template<std::size_t N>
604 static inline bool operator == (std::string const &lhs, char const (&rhs)[N])
606 return lhs.length() == N - 1 && !std::memcmp(lhs.data(), rhs, N - 1);
609 template<std::size_t N>
610 static inline bool operator != (std::string const &lhs, char const (&rhs)[N])
612 return !(lhs == rhs);
616 /** Assign an irc::string to a std::string.
618 inline std::string assign(const irc::string &other) { return other.c_str(); }
620 /** Assign a std::string to an irc::string.
622 inline irc::string assign(const std::string &other) { return other.c_str(); }
624 /** Trim the leading and trailing spaces from a std::string.
626 inline std::string& trim(std::string &str)
628 std::string::size_type start = str.find_first_not_of(" ");
629 std::string::size_type end = str.find_last_not_of(" ");
630 if (start == std::string::npos || end == std::string::npos)
633 str = str.substr(start, end-start+1);
638 /** Hashing stuff is totally different on vc++'s hash_map implementation, so to save a buttload of
639 * #ifdefs we'll just do it all at once. Except, of course, with TR1, when it's the same as GCC.
641 BEGIN_HASHMAP_NAMESPACE
643 /** Hashing function to hash irc::string
645 #if defined(WINDOWS) && !defined(HAS_TR1_UNORDERED)
646 template<> class CoreExport hash_compare<irc::string, std::less<irc::string> >
649 enum { bucket_size = 4, min_buckets = 8 }; /* Got these numbers from the CRT source, if anyone wants to change them feel free. */
651 /** Compare two irc::string values for hashing in hash_map
653 bool operator()(const irc::string & s1, const irc::string & s2) const
655 if(s1.length() != s2.length()) return true;
656 return (irc::irc_char_traits::compare(s1.c_str(), s2.c_str(), (size_t)s1.length()) < 0);
659 /** Hash an irc::string value for hash_map
661 size_t operator()(const irc::string & s) const;
664 template<> class CoreExport hash_compare<std::string, std::less<std::string> >
667 enum { bucket_size = 4, min_buckets = 8 }; /* Again, from the CRT source */
669 /** Compare two std::string values for hashing in hash_map
671 bool operator()(const std::string & s1, const std::string & s2) const
673 if(s1.length() != s2.length()) return true;
674 return (irc::irc_char_traits::compare(s1.c_str(), s2.c_str(), (size_t)s1.length()) < 0);
677 /** Hash a std::string using RFC1459 case sensitivity rules
678 * @param s A string to hash
679 * @return The hash value
681 size_t operator()(const std::string & s) const;
685 template<> struct hash<irc::string>
687 /** Hash an irc::string using RFC1459 case sensitivity rules
688 * @param s A string to hash
689 * @return The hash value
691 size_t CoreExport operator()(const irc::string &s) const;
694 /* XXX FIXME: Implement a hash function overriding std::string's that works with TR1! */
696 #ifdef HASHMAP_DEPRECATED
699 CoreExport template<> struct hash<std::string>
702 size_t CoreExport operator()(const std::string &s) const;
707 /** Convert a string to lower case respecting RFC1459
708 * @param n A string to lowercase
710 void strlower(char *n);
712 END_HASHMAP_NAMESPACE