src/hashcomp.cpp

   1 /*       +------------------------------------+
   2  *       | Inspire Internet Relay Chat Daemon |
   3  *       +------------------------------------+
   4  *
   5  *  InspIRCd: (C) 2002-2010 InspIRCd Development Team
   6  * See: http://wiki.inspircd.org/Credits
   7  *
   8  * This program is free but copyrighted software; see
   9  *          the file COPYING for details.
  10  *
  11  * ---------------------------------------------------
  12  */
  13
  14 /* $Core */
  15
  16 #include "inspircd.h"
  17 #include "hashcomp.h"
  18 #include "hash_map.h"
  19
  20 /******************************************************
  21  *
  22  * The hash functions of InspIRCd are the centrepoint
  23  * of the entire system. If these functions are
  24  * inefficient or wasteful, the whole program suffers
  25  * as a result. A lot of C programmers in the ircd
  26  * scene spend a lot of time debating (arguing) about
  27  * the best way to write hash functions to hash irc
  28  * nicknames, channels etc.
  29  * We are lucky as C++ developers as hash_map does
  30  * a lot of this for us. It does intellegent memory
  31  * requests, bucketing, search functions, insertion
  32  * and deletion etc. All we have to do is write some
  33  * overloaded comparison and hash value operators which
  34  * cause it to act in an irc-like way. The features we
  35  * add to the standard hash_map are:
  36  *
  37  * Case insensitivity: The hash_map will be case
  38  * insensitive.
  39  *
  40  * Scandanavian Comparisons: The characters [, ], \ will
  41  * be considered the lowercase of {, } and |.
  42  *
  43  ******************************************************/
  44
  45 /** A mapping of uppercase to lowercase, including scandinavian
  46  * 'oddities' as specified by RFC1459, e.g. { -> [, and | -> \
  47  */
  48 unsigned const char rfc_case_insensitive_map[256] = {
  49         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  50         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  51         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  52         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  53         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 94, 95, 96, 97, 98, 99,           /* 80-99 */
  54         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  55         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  56         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  57         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  58         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  59         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  60         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  61         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  62 };
  63
  64 /** Case insensitive map, ASCII rules.
  65  * That is;
  66  * [ != {, but A == a.
  67  */
  68 unsigned const char ascii_case_insensitive_map[256] = {
  69         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  70         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  71         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  72         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  73         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,              /* 80-99 */
  74         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  75         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  76         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  77         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  78         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  79         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  80         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  81         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  82 };
  83
  84 /** Case sensitive map.
  85  * Can technically also be used for ASCII case sensitive comparisons, as [ != {, etc.
  86  */
  87 unsigned const char rfc_case_sensitive_map[256] = {
  88         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
  89         21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  90         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
  91         61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
  92         81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
  93         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
  94         121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
  95         141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
  96         161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
  97         181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
  98         201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
  99         221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
 100         241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
 101 };
 102
 103 /* convert a string to lowercase. Note following special circumstances
 104  * taken from RFC 1459. Many "official" server branches still hold to this
 105  * rule so i will too;
 106  *
 107  *  Because of IRC's scandanavian origin, the characters {}| are
 108  *  considered to be the lower case equivalents of the characters []\,
 109  *  respectively. This is a critical issue when determining the
 110  *  equivalence of two nicknames.
 111  */
 112 void nspace::strlower(char *n)
 113 {
 114         if (n)
 115         {
 116                 for (char* t = n; *t; t++)
 117                         *t = national_case_insensitive_map[(unsigned char)*t];
 118         }
 119 }
 120
 121 #if defined(WINDOWS) && !defined(HASHMAP_DEPRECATED)
 122         size_t nspace::hash_compare<std::string, std::less<std::string> >::operator()(const std::string &s) const
 123 #else
 124         #ifdef HASHMAP_DEPRECATED
 125                 size_t CoreExport nspace::insensitive::operator()(const std::string &s) const
 126         #else
 127                 size_t nspace::hash<std::string>::operator()(const std::string &s) const
 128         #endif
 129 #endif
 130 {
 131         /* XXX: NO DATA COPIES! :)
 132          * The hash function here is practically
 133          * a copy of the one in STL's hash_fun.h,
 134          * only with *x replaced with national_case_insensitive_map[*x].
 135          * This avoids a copy to use hash<const char*>
 136          */
 137         register size_t t = 0;
 138         for (std::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 139                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 140         return t;
 141 }
 142
 143
 144 #if defined(WINDOWS) && !defined(HASHMAP_DEPRECATED)
 145         size_t nspace::hash_compare<irc::string, std::less<irc::string> >::operator()(const irc::string &s) const
 146 #else
 147         size_t CoreExport nspace::hash<irc::string>::operator()(const irc::string &s) const
 148 #endif
 149 {
 150         register size_t t = 0;
 151         for (irc::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 152                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 153         return t;
 154 }
 155
 156 bool irc::StrHashComp::operator()(const std::string& s1, const std::string& s2) const
 157 {
 158         const unsigned char* n1 = (const unsigned char*)s1.c_str();
 159         const unsigned char* n2 = (const unsigned char*)s2.c_str();
 160         for (; *n1 && *n2; n1++, n2++)
 161                 if (national_case_insensitive_map[*n1] != national_case_insensitive_map[*n2])
 162                         return false;
 163         return (national_case_insensitive_map[*n1] == national_case_insensitive_map[*n2]);
 164 }
 165
 166 /******************************************************
 167  *
 168  * This is the implementation of our special irc::string
 169  * class which is a case-insensitive equivalent to
 170  * std::string which is not only case-insensitive but
 171  * can also do scandanavian comparisons, e.g. { = [, etc.
 172  *
 173  * This class depends on the const array 'national_case_insensitive_map'.
 174  *
 175  ******************************************************/
 176
 177 bool irc::irc_char_traits::eq(char c1st, char c2nd)
 178 {
 179         return national_case_insensitive_map[(unsigned char)c1st] == national_case_insensitive_map[(unsigned char)c2nd];
 180 }
 181
 182 bool irc::irc_char_traits::ne(char c1st, char c2nd)
 183 {
 184         return national_case_insensitive_map[(unsigned char)c1st] != national_case_insensitive_map[(unsigned char)c2nd];
 185 }
 186
 187 bool irc::irc_char_traits::lt(char c1st, char c2nd)
 188 {
 189         return national_case_insensitive_map[(unsigned char)c1st] < national_case_insensitive_map[(unsigned char)c2nd];
 190 }
 191
 192 int irc::irc_char_traits::compare(const char* str1, const char* str2, size_t n)
 193 {
 194         for(unsigned int i = 0; i < n; i++)
 195         {
 196                 if(national_case_insensitive_map[(unsigned char)*str1] > national_case_insensitive_map[(unsigned char)*str2])
 197                         return 1;
 198
 199                 if(national_case_insensitive_map[(unsigned char)*str1] < national_case_insensitive_map[(unsigned char)*str2])
 200                         return -1;
 201
 202                 if(*str1 == 0 || *str2 == 0)
 203                         return 0;
 204
 205                 str1++;
 206                 str2++;
 207         }
 208         return 0;
 209 }
 210
 211 const char* irc::irc_char_traits::find(const char* s1, int  n, char c)
 212 {
 213         while(n-- > 0 && national_case_insensitive_map[(unsigned char)*s1] != national_case_insensitive_map[(unsigned char)c])
 214                 s1++;
 215         return (n >= 0) ? s1 : NULL;
 216 }
 217
 218 irc::tokenstream::tokenstream(const std::string &source) : tokens(source), last_pushed(false)
 219 {
 220         /* Record starting position and current position */
 221         last_starting_position = tokens.begin();
 222         n = tokens.begin();
 223 }
 224
 225 irc::tokenstream::~tokenstream()
 226 {
 227 }
 228
 229 bool irc::tokenstream::GetToken(std::string &token)
 230 {
 231         std::string::iterator lsp = last_starting_position;
 232
 233         while (n != tokens.end())
 234         {
 235                 /** Skip multi space, converting "  " into " "
 236                  */
 237                 while ((n+1 != tokens.end()) && (*n == ' ') && (*(n+1) == ' '))
 238                         n++;
 239
 240                 if ((last_pushed) && (*n == ':'))
 241                 {
 242                         /* If we find a token thats not the first and starts with :,
 243                          * this is the last token on the line
 244                          */
 245                         std::string::iterator curr = ++n;
 246                         n = tokens.end();
 247                         token = std::string(curr, tokens.end());
 248                         return true;
 249                 }
 250
 251                 last_pushed = false;
 252
 253                 if ((*n == ' ') || (n+1 == tokens.end()))
 254                 {
 255                         /* If we find a space, or end of string, this is the end of a token.
 256                          */
 257                         last_starting_position = n+1;
 258                         last_pushed = *n == ' ';
 259
 260                         std::string strip(lsp, n+1 == tokens.end() ? n+1  : n++);
 261                         while ((strip.length()) && (strip.find_last_of(' ') == strip.length() - 1))
 262                                 strip.erase(strip.end() - 1);
 263
 264                         token = strip;
 265                         return !token.empty();
 266                 }
 267
 268                 n++;
 269         }
 270         token.clear();
 271         return false;
 272 }
 273
 274 bool irc::tokenstream::GetToken(irc::string &token)
 275 {
 276         std::string stdstring;
 277         bool returnval = GetToken(stdstring);
 278         token = assign(stdstring);
 279         return returnval;
 280 }
 281
 282 bool irc::tokenstream::GetToken(int &token)
 283 {
 284         std::string tok;
 285         bool returnval = GetToken(tok);
 286         token = ConvToInt(tok);
 287         return returnval;
 288 }
 289
 290 bool irc::tokenstream::GetToken(long &token)
 291 {
 292         std::string tok;
 293         bool returnval = GetToken(tok);
 294         token = ConvToInt(tok);
 295         return returnval;
 296 }
 297
 298 irc::sepstream::sepstream(const std::string &source, char seperator) : tokens(source), sep(seperator)
 299 {
 300         last_starting_position = tokens.begin();
 301         n = tokens.begin();
 302 }
 303
 304 bool irc::sepstream::GetToken(std::string &token)
 305 {
 306         std::string::iterator lsp = last_starting_position;
 307
 308         for(; n != tokens.end(); ++n)
 309         {
 310                 if(*n == sep || n+1 == tokens.end())
 311                 {
 312                         last_starting_position = n+1;
 313                         token = std::string(lsp, n+1 == tokens.end() ? ++n : n++);
 314                         return true;
 315                 }
 316         }
 317
 318         token = "";
 319         return false;
 320 }
 321
 322 const std::string irc::sepstream::GetRemaining()
 323 {
 324         return std::string(n, tokens.end());
 325 }
 326
 327 bool irc::sepstream::StreamEnd()
 328 {
 329         return n == tokens.end();
 330 }
 331
 332 irc::sepstream::~sepstream()
 333 {
 334 }
 335
 336 std::string irc::hex(const unsigned char *raw, size_t rawsz)
 337 {
 338         if (!rawsz)
 339                 return "";
 340
 341         /* EWW! This used to be using sprintf, which is WAY inefficient. -Special */
 342
 343         const char *hex = "0123456789abcdef";
 344         static char hexbuf[MAXBUF];
 345
 346         size_t i, j;
 347         for (i = 0, j = 0; j < rawsz; ++j)
 348         {
 349                 hexbuf[i++] = hex[raw[j] / 16];
 350                 hexbuf[i++] = hex[raw[j] % 16];
 351         }
 352         hexbuf[i] = 0;
 353
 354         return hexbuf;
 355 }
 356
 357 CoreExport const char* irc::Spacify(const char* n)
 358 {
 359         static char x[MAXBUF];
 360         strlcpy(x,n,MAXBUF);
 361         for (char* y = x; *y; y++)
 362                 if (*y == '_')
 363                         *y = ' ';
 364         return x;
 365 }
 366
 367
 368 irc::modestacker::modestacker(bool add) : adding(add)
 369 {
 370         sequence.clear();
 371         sequence.push_back("");
 372 }
 373
 374 void irc::modestacker::Push(char modeletter, const std::string &parameter)
 375 {
 376         *(sequence.begin()) += modeletter;
 377         sequence.push_back(parameter);
 378 }
 379
 380 void irc::modestacker::Push(char modeletter)
 381 {
 382         this->Push(modeletter,"");
 383 }
 384
 385 void irc::modestacker::PushPlus()
 386 {
 387         this->Push('+',"");
 388 }
 389
 390 void irc::modestacker::PushMinus()
 391 {
 392         this->Push('-',"");
 393 }
 394
 395 int irc::modestacker::GetStackedLine(std::vector<std::string> &result, int max_line_size)
 396 {
 397         if (sequence.empty())
 398         {
 399                 return 0;
 400         }
 401
 402         unsigned int n = 0;
 403         int size = 1; /* Account for initial +/- char */
 404         int nextsize = 0;
 405         int start = result.size();
 406         std::string modeline = adding ? "+" : "-";
 407         result.push_back(modeline);
 408
 409         if (sequence.size() > 1)
 410                 nextsize = sequence[1].length() + 2;
 411
 412         while (!sequence[0].empty() && (sequence.size() > 1) && (n < ServerInstance->Config->Limits.MaxModes) && ((size + nextsize) < max_line_size))
 413         {
 414                 modeline += *(sequence[0].begin());
 415                 if (!sequence[1].empty())
 416                 {
 417                         result.push_back(sequence[1]);
 418                         size += nextsize; /* Account for mode character and whitespace */
 419                 }
 420                 sequence[0].erase(sequence[0].begin());
 421                 sequence.erase(sequence.begin() + 1);
 422
 423                 if (sequence.size() > 1)
 424                         nextsize = sequence[1].length() + 2;
 425
 426                 n++;
 427         }
 428         result[start] = modeline;
 429
 430         return n;
 431 }
 432
 433 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end)
 434 {
 435         if (end < begin)
 436                 throw "stringjoiner logic error, this causes problems.";
 437
 438         for (int v = begin; v < end; v++)
 439                 joined.append(sequence[v]).append(seperator);
 440         joined.append(sequence[end]);
 441 }
 442
 443 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end)
 444 {
 445         if (end < begin)
 446                 throw "stringjoiner logic error, this causes problems.";
 447
 448         for (int v = begin; v < end; v++)
 449                 joined.append(sequence[v]).append(seperator);
 450         joined.append(sequence[end]);
 451 }
 452
 453 irc::stringjoiner::stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end)
 454 {
 455         if (end < begin)
 456                 throw "stringjoiner logic error, this causes problems.";
 457
 458         for (int v = begin; v < end; v++)
 459                 joined.append(sequence[v]).append(seperator);
 460         joined.append(sequence[end]);
 461 }
 462
 463 std::string& irc::stringjoiner::GetJoined()
 464 {
 465         return joined;
 466 }
 467
 468 irc::portparser::portparser(const std::string &source, bool allow_overlapped) : in_range(0), range_begin(0), range_end(0), overlapped(allow_overlapped)
 469 {
 470         sep = new irc::commasepstream(source);
 471         overlap_set.clear();
 472 }
 473
 474 irc::portparser::~portparser()
 475 {
 476         delete sep;
 477 }
 478
 479 bool irc::portparser::Overlaps(long val)
 480 {
 481         if (!overlapped)
 482                 return false;
 483
 484         if (overlap_set.find(val) == overlap_set.end())
 485         {
 486                 overlap_set[val] = true;
 487                 return false;
 488         }
 489         else
 490                 return true;
 491 }
 492
 493 long irc::portparser::GetToken()
 494 {
 495         if (in_range > 0)
 496         {
 497                 in_range++;
 498                 if (in_range <= range_end)
 499                 {
 500                         if (!Overlaps(in_range))
 501                         {
 502                                 return in_range;
 503                         }
 504                         else
 505                         {
 506                                 while (((Overlaps(in_range)) && (in_range <= range_end)))
 507                                         in_range++;
 508
 509                                 if (in_range <= range_end)
 510                                         return in_range;
 511                         }
 512                 }
 513                 else
 514                         in_range = 0;
 515         }
 516
 517         std::string x;
 518         sep->GetToken(x);
 519
 520         if (x.empty())
 521                 return 0;
 522
 523         while (Overlaps(atoi(x.c_str())))
 524         {
 525                 if (!sep->GetToken(x))
 526                         return 0;
 527         }
 528
 529         std::string::size_type dash = x.rfind('-');
 530         if (dash != std::string::npos)
 531         {
 532                 std::string sbegin = x.substr(0, dash);
 533                 std::string send = x.substr(dash+1, x.length());
 534                 range_begin = atoi(sbegin.c_str());
 535                 range_end = atoi(send.c_str());
 536
 537                 if ((range_begin > 0) && (range_end > 0) && (range_begin < 65536) && (range_end < 65536) && (range_begin < range_end))
 538                 {
 539                         in_range = range_begin;
 540                         return in_range;
 541                 }
 542                 else
 543                 {
 544                         /* Assume its just the one port */
 545                         return atoi(sbegin.c_str());
 546                 }
 547         }
 548         else
 549         {
 550                 return atoi(x.c_str());
 551         }
 552 }
 553
 554 /*const std::basic_string& SearchAndReplace(std::string& text, const std::string& pattern, const std::string& replace)
 555 {
 556         std::string replacement;
 557         if ((!pattern.empty()) && (!text.empty()))
 558         {
 559                 for (std::string::size_type n = 0; n != text.length(); ++n)
 560                 {
 561                         if (text.length() >= pattern.length() && text.substr(n, pattern.length()) == pattern)
 562                         {
 563                                 replacement.append(replace);
 564                                 n = n + pattern.length() - 1;
 565                         }
 566                         else
 567                         {
 568                                 replacement += text[n];
 569                         }
 570                 }
 571         }
 572         text = replacement;
 573         return text;
 574 }*/