1 /* +------------------------------------+
2 * | Inspire Internet Relay Chat Daemon |
3 * +------------------------------------+
5 * InspIRCd: (C) 2002-2009 InspIRCd Development Team
6 * See: http://wiki.inspircd.org/Credits
8 * This program is free but copyrighted software; see
9 * the file COPYING for details.
11 * ---------------------------------------------------
20 /*******************************************************
21 * This file contains classes and templates that deal
22 * with the comparison and hashing of 'irc strings'.
23 * An 'irc string' is a string which compares in a
24 * case insensitive manner, and as per RFC 1459 will
25 * treat [ identical to {, ] identical to }, and \
28 * Our hashing functions are designed to accept
29 * std::string and compare/hash them as type irc::string
30 * by converting them internally. This makes them
31 * backwards compatible with other code which is not
32 * aware of irc::string.
33 *******************************************************/
38 /** A mapping of uppercase to lowercase, including scandinavian
39 * 'oddities' as specified by RFC1459, e.g. { -> [, and | -> \
41 unsigned const char rfc_case_insensitive_map[256] = {
42 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 0-19 */
43 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, /* 20-39 */
44 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, /* 40-59 */
45 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, /* 60-79 */
46 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 94, 95, 96, 97, 98, 99, /* 80-99 */
47 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, /* 100-119 */
48 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, /* 120-139 */
49 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, /* 140-159 */
50 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, /* 160-179 */
51 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, /* 180-199 */
52 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, /* 200-219 */
53 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, /* 220-239 */
54 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 /* 240-255 */
57 /** Seperate from the other casemap tables so that code *can* still exclusively rely on RFC casemapping
60 * This is provided as a pointer so that modules can change it to their custom mapping tables,
61 * e.g. for national character support.
63 CoreExport extern unsigned const char *national_case_insensitive_map;
65 /** Case insensitive map, ASCII rules.
69 unsigned const char ascii_case_insensitive_map[256] = {
70 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 0-19 */
71 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, /* 20-39 */
72 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, /* 40-59 */
73 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, /* 60-79 */
74 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, /* 80-99 */
75 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, /* 100-119 */
76 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, /* 120-139 */
77 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, /* 140-159 */
78 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, /* 160-179 */
79 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, /* 180-199 */
80 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, /* 200-219 */
81 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, /* 220-239 */
82 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 /* 240-255 */
85 /** Case sensitive map.
86 * Can technically also be used for ASCII case sensitive comparisons, as [ != {, etc.
88 unsigned const char rfc_case_sensitive_map[256] = {
89 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
90 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
91 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
92 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
93 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
94 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
95 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
96 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
97 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
98 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
99 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
100 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
101 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
106 template<typename T> const T& SearchAndReplace(T& text, const T& pattern, const T& replace)
109 if ((!pattern.empty()) && (!text.empty()))
111 for (std::string::size_type n = 0; n != text.length(); ++n)
113 if (text.length() >= pattern.length() && text.substr(n, pattern.length()) == pattern)
115 /* Found the pattern in the text, replace it, and advance */
116 replacement.append(replace);
117 n = n + pattern.length() - 1;
121 replacement += text[n];
129 /** The irc namespace contains a number of helper classes.
134 /** This class returns true if two strings match.
135 * Case sensitivity is ignored, and the RFC 'character set'
140 /** The operator () does the actual comparison in hash_map
142 bool operator()(const std::string& s1, const std::string& s2) const;
145 /** The irc_char_traits class is used for RFC-style comparison of strings.
146 * This class is used to implement irc::string, a case-insensitive, RFC-
147 * comparing string class.
149 struct irc_char_traits : std::char_traits<char> {
151 /** Check if two chars match.
152 * @param c1st First character
153 * @param c2nd Second character
154 * @return true if the characters are equal
156 static bool eq(char c1st, char c2nd);
158 /** Check if two chars do NOT match.
159 * @param c1st First character
160 * @param c2nd Second character
161 * @return true if the characters are unequal
163 static bool ne(char c1st, char c2nd);
165 /** Check if one char is less than another.
166 * @param c1st First character
167 * @param c2nd Second character
168 * @return true if c1st is less than c2nd
170 static bool lt(char c1st, char c2nd);
172 /** Compare two strings of size n.
173 * @param str1 First string
174 * @param str2 Second string
175 * @param n Length to compare to
176 * @return similar to strcmp, zero for equal, less than zero for str1
177 * being less and greater than zero for str1 being greater than str2.
179 static CoreExport int compare(const char* str1, const char* str2, size_t n);
181 /** Find a char within a string up to position n.
182 * @param s1 String to find in
183 * @param n Position to search up to
184 * @param c Character to search for
185 * @return Pointer to the first occurance of c in s1
187 static CoreExport const char* find(const char* s1, int n, char c);
190 /** Compose a hex string from raw data.
191 * @param raw The raw data to compose hex from
192 * @pram rawsz The size of the raw data buffer
193 * @return The hex string.
195 CoreExport std::string hex(const unsigned char *raw, size_t rawsz);
197 /** This typedef declares irc::string based upon irc_char_traits.
199 typedef std::basic_string<char, irc_char_traits, std::allocator<char> > string;
201 /** irc::stringjoiner joins string lists into a string, using
202 * the given seperator string.
203 * This class can join a vector of std::string, a deque of
204 * std::string, or a const char* const* array, using overloaded
207 class CoreExport stringjoiner
217 /** Join elements of a vector, between (and including) begin and end
218 * @param seperator The string to seperate values with
219 * @param sequence One or more items to seperate
220 * @param begin The starting element in the sequence to be joined
221 * @param end The ending element in the sequence to be joined
223 stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end);
225 /** Join elements of a deque, between (and including) begin and end
226 * @param seperator The string to seperate values with
227 * @param sequence One or more items to seperate
228 * @param begin The starting element in the sequence to be joined
229 * @param end The ending element in the sequence to be joined
231 stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end);
233 /** Join elements of an array of char arrays, between (and including) begin and end
234 * @param seperator The string to seperate values with
235 * @param sequence One or more items to seperate
236 * @param begin The starting element in the sequence to be joined
237 * @param end The ending element in the sequence to be joined
239 stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end);
241 /** Get the joined sequence
242 * @return A reference to the joined string
244 std::string& GetJoined();
247 /** irc::modestacker stacks mode sequences into a list.
248 * It can then reproduce this list, clamped to a maximum of MAXMODES
251 class CoreExport modestacker
254 /** The mode sequence and its parameters
256 std::deque<std::string> sequence;
258 /** True if the mode sequence is initially adding
259 * characters, false if it is initially removing
265 /** Construct a new modestacker.
266 * @param add True if the stack is adding modes,
267 * false if it is removing them
269 modestacker(bool add);
271 /** Push a modeletter and its parameter onto the stack.
272 * No checking is performed as to if this mode actually
273 * requires a parameter. If you stack invalid mode
274 * sequences, they will be tidied if and when they are
275 * passed to a mode parser.
276 * @param modeletter The mode letter to insert
277 * @param parameter The parameter for the mode
279 void Push(char modeletter, const std::string ¶meter);
281 /** Push a modeletter without parameter onto the stack.
282 * No checking is performed as to if this mode actually
283 * requires a parameter. If you stack invalid mode
284 * sequences, they will be tidied if and when they are
285 * passed to a mode parser.
286 * @param modeletter The mode letter to insert
288 void Push(char modeletter);
290 /** Push a '+' symbol onto the stack.
294 /** Push a '-' symbol onto the stack.
298 /** Return zero or more elements which form the
299 * mode line. This will be clamped to a max of
300 * MAXMODES items (MAXMODES-1 mode parameters and
301 * one mode sequence string), and max_line_size
302 * characters. As specified below, this function
303 * should be called in a loop until it returns zero,
304 * indicating there are no more modes to return.
305 * @param result The vector to populate. This will not
306 * be cleared before it is used.
307 * @param max_line_size The maximum size of the line
308 * to build, in characters, seperate to MAXMODES.
309 * @return The number of elements in the deque.
310 * The function should be called repeatedly until it
311 * returns 0, in case there are multiple lines of
312 * mode changes to be obtained.
314 int GetStackedLine(std::vector<std::string> &result, int max_line_size = 360);
316 /** deprecated compatability interface - TODO remove */
317 int GetStackedLine(std::deque<std::string> &result, int max_line_size = 360) {
318 std::vector<std::string> r;
319 int n = GetStackedLine(r, max_line_size);
321 result.insert(result.end(), r.begin(), r.end());
326 /** irc::tokenstream reads a string formatted as per RFC1459 and RFC2812.
327 * It will split the string into 'tokens' each containing one parameter
329 * For instance, if it is instantiated with the string:
330 * "PRIVMSG #test :foo bar baz qux"
331 * then each successive call to tokenstream::GetToken() will return
332 * "PRIVMSG", "#test", "foo bar baz qux", "".
333 * Note that if the whole string starts with a colon this is not taken
334 * to mean the string is all one parameter, and the first item in the
335 * list will be ":item". This is to allow for parsing 'source' fields
338 class CoreExport tokenstream
346 /** Last position of a seperator token
348 std::string::iterator last_starting_position;
350 /** Current string position
352 std::string::iterator n;
354 /** True if the last value was an ending value
359 /** Create a tokenstream and fill it with the provided data
361 tokenstream(const std::string &source);
367 /** Fetch the next token from the stream as a std::string
368 * @param token The next token available, or an empty string if none remain
369 * @return True if tokens are left to be read, false if the last token was just retrieved.
371 bool GetToken(std::string &token);
373 /** Fetch the next token from the stream as an irc::string
374 * @param token The next token available, or an empty string if none remain
375 * @return True if tokens are left to be read, false if the last token was just retrieved.
377 bool GetToken(irc::string &token);
379 /** Fetch the next token from the stream as an integer
380 * @param token The next token available, or undefined if none remain
381 * @return True if tokens are left to be read, false if the last token was just retrieved.
383 bool GetToken(int &token);
385 /** Fetch the next token from the stream as a long integer
386 * @param token The next token available, or undefined if none remain
387 * @return True if tokens are left to be read, false if the last token was just retrieved.
389 bool GetToken(long &token);
392 /** irc::sepstream allows for splitting token seperated lists.
393 * Each successive call to sepstream::GetToken() returns
394 * the next token, until none remain, at which point the method returns
397 class CoreExport sepstream
403 /** Last position of a seperator token
405 std::string::iterator last_starting_position;
406 /** Current string position
408 std::string::iterator n;
413 /** Create a sepstream and fill it with the provided data
415 sepstream(const std::string &source, char seperator);
419 virtual ~sepstream();
421 /** Fetch the next token from the stream
422 * @param token The next token from the stream is placed here
423 * @return True if tokens still remain, false if there are none left
425 virtual bool GetToken(std::string &token);
427 /** Fetch the entire remaining stream, without tokenizing
428 * @return The remaining part of the stream
430 virtual const std::string GetRemaining();
432 /** Returns true if the end of the stream has been reached
433 * @return True if the end of the stream has been reached, otherwise false
435 virtual bool StreamEnd();
438 /** A derived form of sepstream, which seperates on commas
440 class CoreExport commasepstream : public sepstream
443 /** Initialize with comma seperator
445 commasepstream(const std::string &source) : sepstream(source, ',')
450 /** A derived form of sepstream, which seperates on spaces
452 class CoreExport spacesepstream : public sepstream
455 /** Initialize with space seperator
457 spacesepstream(const std::string &source) : sepstream(source, ' ')
462 /** The portparser class seperates out a port range into integers.
463 * A port range may be specified in the input string in the form
464 * "6660,6661,6662-6669,7020". The end of the stream is indicated by
465 * a return value of 0 from portparser::GetToken(). If you attempt
466 * to specify an illegal range (e.g. one where start >= end, or
467 * start or end < 0) then GetToken() will return the first element
468 * of the pair of numbers.
470 class CoreExport portparser
474 /** Used to split on commas
478 /** Current position in a range of ports
482 /** Starting port in a range of ports
486 /** Ending port in a range of ports
490 /** Allow overlapped port ranges
494 /** Used to determine overlapping of ports
495 * without O(n) algorithm being used
497 std::map<long, bool> overlap_set;
499 /** Returns true if val overlaps an existing range
501 bool Overlaps(long val);
504 /** Create a portparser and fill it with the provided data
505 * @param source The source text to parse from
506 * @param allow_overlapped Allow overlapped ranges
508 portparser(const std::string &source, bool allow_overlapped = true);
510 /** Frees the internal commasepstream object
514 /** Fetch the next token from the stream
515 * @return The next port number is returned, or 0 if none remain
520 /** Turn _ characters in a string into spaces
521 * @param n String to translate
522 * @return The new value with _ translated to space.
524 CoreExport const char* Spacify(const char* n);
527 /* Define operators for using >> and << with irc::string to an ostream on an istream. */
528 /* This was endless fun. No. Really. */
529 /* It was also the first core change Ommeh made, if anyone cares */
531 /** Operator << for irc::string
533 inline std::ostream& operator<<(std::ostream &os, const irc::string &str) { return os << str.c_str(); }
535 /** Operator >> for irc::string
537 inline std::istream& operator>>(std::istream &is, irc::string &str)
545 /* Define operators for + and == with irc::string to std::string for easy assignment
550 inline std::string operator+ (std::string& leftval, irc::string& rightval)
552 return leftval + std::string(rightval.c_str());
555 /* Define operators for + and == with irc::string to std::string for easy assignment
560 inline irc::string operator+ (irc::string& leftval, std::string& rightval)
562 return leftval + irc::string(rightval.c_str());
565 /* Define operators for + and == with irc::string to std::string for easy assignment
570 inline bool operator== (const std::string& leftval, const irc::string& rightval)
572 return (leftval.c_str() == rightval);
575 /* Define operators for + and == with irc::string to std::string for easy assignment
580 inline bool operator== (const irc::string& leftval, const std::string& rightval)
582 return (leftval == rightval.c_str());
585 /* Define operators != for irc::string to std::string for easy comparison
587 inline bool operator!= (const irc::string& leftval, const std::string& rightval)
589 return !(leftval == rightval.c_str());
592 /* Define operators != for std::string to irc::string for easy comparison
594 inline bool operator!= (const std::string& leftval, const irc::string& rightval)
596 return !(leftval.c_str() == rightval);
599 // FIXME MAXBUF messes up these
601 template<std::size_t N>
602 static inline bool operator == (std::string const &lhs, char const (&rhs)[N])
604 return lhs.length() == N - 1 && !std::memcmp(lhs.data(), rhs, N - 1);
607 template<std::size_t N>
608 static inline bool operator != (std::string const &lhs, char const (&rhs)[N])
610 return !(lhs == rhs);
614 /** Assign an irc::string to a std::string.
616 inline std::string assign(const irc::string &other) { return other.c_str(); }
618 /** Assign a std::string to an irc::string.
620 inline irc::string assign(const std::string &other) { return other.c_str(); }
622 /** Trim the leading and trailing spaces from a std::string.
624 inline std::string& trim(std::string &str)
626 std::string::size_type start = str.find_first_not_of(" ");
627 std::string::size_type end = str.find_last_not_of(" ");
628 if (start == std::string::npos || end == std::string::npos)
631 str = str.substr(start, end-start+1);
636 /** Hashing stuff is totally different on vc++'s hash_map implementation, so to save a buttload of
637 * #ifdefs we'll just do it all at once. Except, of course, with TR1, when it's the same as GCC.
639 BEGIN_HASHMAP_NAMESPACE
641 /** Hashing function to hash irc::string
643 #if defined(WINDOWS) && !defined(HAS_TR1_UNORDERED)
644 template<> class CoreExport hash_compare<irc::string, std::less<irc::string> >
647 enum { bucket_size = 4, min_buckets = 8 }; /* Got these numbers from the CRT source, if anyone wants to change them feel free. */
649 /** Compare two irc::string values for hashing in hash_map
651 bool operator()(const irc::string & s1, const irc::string & s2) const
653 if(s1.length() != s2.length()) return true;
654 return (irc::irc_char_traits::compare(s1.c_str(), s2.c_str(), (size_t)s1.length()) < 0);
657 /** Hash an irc::string value for hash_map
659 size_t operator()(const irc::string & s) const;
662 template<> class CoreExport hash_compare<std::string, std::less<std::string> >
665 enum { bucket_size = 4, min_buckets = 8 }; /* Again, from the CRT source */
667 /** Compare two std::string values for hashing in hash_map
669 bool operator()(const std::string & s1, const std::string & s2) const
671 if(s1.length() != s2.length()) return true;
672 return (irc::irc_char_traits::compare(s1.c_str(), s2.c_str(), (size_t)s1.length()) < 0);
675 /** Hash a std::string using RFC1459 case sensitivity rules
676 * @param s A string to hash
677 * @return The hash value
679 size_t operator()(const std::string & s) const;
683 template<> struct hash<irc::string>
685 /** Hash an irc::string using RFC1459 case sensitivity rules
686 * @param s A string to hash
687 * @return The hash value
689 size_t CoreExport operator()(const irc::string &s) const;
692 /* XXX FIXME: Implement a hash function overriding std::string's that works with TR1! */
694 #ifdef HASHMAP_DEPRECATED
697 CoreExport template<> struct hash<std::string>
700 size_t CoreExport operator()(const std::string &s) const;
705 /** Convert a string to lower case respecting RFC1459
706 * @param n A string to lowercase
708 void strlower(char *n);
710 END_HASHMAP_NAMESPACE