src/hashcomp.cpp

   1 /*       +------------------------------------+
   2  *       | Inspire Internet Relay Chat Daemon |
   3  *       +------------------------------------+
   4  *
   5  *  InspIRCd: (C) 2002-2010 InspIRCd Development Team
   6  * See: http://wiki.inspircd.org/Credits
   7  *
   8  * This program is free but copyrighted software; see
   9  *          the file COPYING for details.
  10  *
  11  * ---------------------------------------------------
  12  */
  13
  14 /* $Core */
  15
  16 #include "inspircd.h"
  17 #include "hashcomp.h"
  18 #include "hash_map.h"
  19
  20 /******************************************************
  21  *
  22  * The hash functions of InspIRCd are the centrepoint
  23  * of the entire system. If these functions are
  24  * inefficient or wasteful, the whole program suffers
  25  * as a result. A lot of C programmers in the ircd
  26  * scene spend a lot of time debating (arguing) about
  27  * the best way to write hash functions to hash irc
  28  * nicknames, channels etc.
  29  * We are lucky as C++ developers as hash_map does
  30  * a lot of this for us. It does intellegent memory
  31  * requests, bucketing, search functions, insertion
  32  * and deletion etc. All we have to do is write some
  33  * overloaded comparison and hash value operators which
  34  * cause it to act in an irc-like way. The features we
  35  * add to the standard hash_map are:
  36  *
  37  * Case insensitivity: The hash_map will be case
  38  * insensitive.
  39  *
  40  * Scandanavian Comparisons: The characters [, ], \ will
  41  * be considered the lowercase of {, } and |.
  42  *
  43  ******************************************************/
  44
  45 /** A mapping of uppercase to lowercase, including scandinavian
  46  * 'oddities' as specified by RFC1459, e.g. { -> [, and | -> \
  47  */
  48 unsigned const char rfc_case_insensitive_map[256] = {
  49         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  50         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  51         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  52         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  53         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 94, 95, 96, 97, 98, 99,           /* 80-99 */
  54         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  55         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  56         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  57         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  58         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  59         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  60         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  61         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  62 };
  63
  64 /** Case insensitive map, ASCII rules.
  65  * That is;
  66  * [ != {, but A == a.
  67  */
  68 unsigned const char ascii_case_insensitive_map[256] = {
  69         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  70         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  71         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  72         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  73         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,              /* 80-99 */
  74         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  75         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  76         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  77         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  78         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  79         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  80         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  81         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  82 };
  83
  84 /** Case sensitive map.
  85  * Can technically also be used for ASCII case sensitive comparisons, as [ != {, etc.
  86  */
  87 unsigned const char rfc_case_sensitive_map[256] = {
  88         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
  89         21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  90         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
  91         61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
  92         81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
  93         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
  94         121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
  95         141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
  96         161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
  97         181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
  98         201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
  99         221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
 100         241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
 101 };
 102
 103 /* convert a string to lowercase. Note following special circumstances
 104  * taken from RFC 1459. Many "official" server branches still hold to this
 105  * rule so i will too;
 106  *
 107  *  Because of IRC's scandanavian origin, the characters {}| are
 108  *  considered to be the lower case equivalents of the characters []\,
 109  *  respectively. This is a critical issue when determining the
 110  *  equivalence of two nicknames.
 111  */
 112 void nspace::strlower(char *n)
 113 {
 114         if (n)
 115         {
 116                 for (char* t = n; *t; t++)
 117                         *t = national_case_insensitive_map[(unsigned char)*t];
 118         }
 119 }
 120
 121 #if defined(WINDOWS) && !defined(HASHMAP_DEPRECATED)
 122         size_t nspace::hash_compare<std::string, std::less<std::string> >::operator()(const std::string &s) const
 123 #else
 124         #ifdef HASHMAP_DEPRECATED
 125                 size_t CoreExport nspace::insensitive::operator()(const std::string &s) const
 126         #else
 127                 size_t nspace::hash<std::string>::operator()(const std::string &s) const
 128         #endif
 129 #endif
 130 {
 131         /* XXX: NO DATA COPIES! :)
 132          * The hash function here is practically
 133          * a copy of the one in STL's hash_fun.h,
 134          * only with *x replaced with national_case_insensitive_map[*x].
 135          * This avoids a copy to use hash<const char*>
 136          */
 137         register size_t t = 0;
 138         for (std::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 139                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 140         return t;
 141 }
 142
 143
 144 #if defined(WINDOWS) && !defined(HASHMAP_DEPRECATED)
 145         size_t nspace::hash_compare<irc::string, std::less<irc::string> >::operator()(const irc::string &s) const
 146 #else
 147         size_t CoreExport nspace::hash<irc::string>::operator()(const irc::string &s) const
 148 #endif
 149 {
 150         register size_t t = 0;
 151         for (irc::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 152                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 153         return t;
 154 }
 155
 156 bool irc::StrHashComp::operator()(const std::string& s1, const std::string& s2) const
 157 {
 158         const unsigned char* n1 = (const unsigned char*)s1.c_str();
 159         const unsigned char* n2 = (const unsigned char*)s2.c_str();
 160         for (; *n1 && *n2; n1++, n2++)
 161                 if (national_case_insensitive_map[*n1] != national_case_insensitive_map[*n2])
 162                         return false;
 163         return (national_case_insensitive_map[*n1] == national_case_insensitive_map[*n2]);
 164 }
 165
 166 /******************************************************
 167  *
 168  * This is the implementation of our special irc::string
 169  * class which is a case-insensitive equivalent to
 170  * std::string which is not only case-insensitive but
 171  * can also do scandanavian comparisons, e.g. { = [, etc.
 172  *
 173  * This class depends on the const array 'national_case_insensitive_map'.
 174  *
 175  ******************************************************/
 176
 177 bool irc::irc_char_traits::eq(char c1st, char c2nd)
 178 {
 179         return national_case_insensitive_map[(unsigned char)c1st] == national_case_insensitive_map[(unsigned char)c2nd];
 180 }
 181
 182 bool irc::irc_char_traits::ne(char c1st, char c2nd)
 183 {
 184         return national_case_insensitive_map[(unsigned char)c1st] != national_case_insensitive_map[(unsigned char)c2nd];
 185 }
 186
 187 bool irc::irc_char_traits::lt(char c1st, char c2nd)
 188 {
 189         return national_case_insensitive_map[(unsigned char)c1st] < national_case_insensitive_map[(unsigned char)c2nd];
 190 }
 191
 192 int irc::irc_char_traits::compare(const char* str1, const char* str2, size_t n)
 193 {
 194         for(unsigned int i = 0; i < n; i++)
 195         {
 196                 if(national_case_insensitive_map[(unsigned char)*str1] > national_case_insensitive_map[(unsigned char)*str2])
 197                         return 1;
 198
 199                 if(national_case_insensitive_map[(unsigned char)*str1] < national_case_insensitive_map[(unsigned char)*str2])
 200                         return -1;
 201
 202                 if(*str1 == 0 || *str2 == 0)
 203                         return 0;
 204
 205                 str1++;
 206                 str2++;
 207         }
 208         return 0;
 209 }
 210
 211 const char* irc::irc_char_traits::find(const char* s1, int  n, char c)
 212 {
 213         while(n-- > 0 && national_case_insensitive_map[(unsigned char)*s1] != national_case_insensitive_map[(unsigned char)c])
 214                 s1++;
 215         return (n >= 0) ? s1 : NULL;
 216 }
 217
 218 irc::tokenstream::tokenstream(const std::string &source) : tokens(source), last_pushed(false)
 219 {
 220         /* Record starting position and current position */
 221         last_starting_position = tokens.begin();
 222         n = tokens.begin();
 223 }
 224
 225 irc::tokenstream::~tokenstream()
 226 {
 227 }
 228
 229 bool irc::tokenstream::GetToken(std::string &token)
 230 {
 231         std::string::iterator lsp = last_starting_position;
 232
 233         while (n != tokens.end())
 234         {
 235                 /** Skip multi space, converting "  " into " "
 236                  */
 237                 while ((n+1 != tokens.end()) && (*n == ' ') && (*(n+1) == ' '))
 238                         n++;
 239
 240                 if ((last_pushed) && (*n == ':'))
 241                 {
 242                         /* If we find a token thats not the first and starts with :,
 243                          * this is the last token on the line
 244                          */
 245                         std::string::iterator curr = ++n;
 246                         n = tokens.end();
 247                         token = std::string(curr, tokens.end());
 248                         return true;
 249                 }
 250
 251                 last_pushed = false;
 252
 253                 if ((*n == ' ') || (n+1 == tokens.end()))
 254                 {
 255                         /* If we find a space, or end of string, this is the end of a token.
 256                          */
 257                         last_starting_position = n+1;
 258                         last_pushed = *n == ' ';
 259
 260                         std::string strip(lsp, n+1 == tokens.end() ? n+1  : n++);
 261                         while ((strip.length()) && (strip.find_last_of(' ') == strip.length() - 1))
 262                                 strip.erase(strip.end() - 1);
 263
 264                         token = strip;
 265                         return !token.empty();
 266                 }
 267
 268                 n++;
 269         }
 270         token.clear();
 271         return false;
 272 }
 273
 274 bool irc::tokenstream::GetToken(irc::string &token)
 275 {
 276         std::string stdstring;
 277         bool returnval = GetToken(stdstring);
 278         token = assign(stdstring);
 279         return returnval;
 280 }
 281
 282 bool irc::tokenstream::GetToken(int &token)
 283 {
 284         std::string tok;
 285         bool returnval = GetToken(tok);
 286         token = ConvToInt(tok);
 287         return returnval;
 288 }
 289
 290 bool irc::tokenstream::GetToken(long &token)
 291 {
 292         std::string tok;
 293         bool returnval = GetToken(tok);
 294         token = ConvToInt(tok);
 295         return returnval;
 296 }
 297
 298 irc::sepstream::sepstream(const std::string &source, char seperator) : tokens(source), sep(seperator)
 299 {
 300         last_starting_position = tokens.begin();
 301         n = tokens.begin();
 302 }
 303
 304 bool irc::sepstream::GetToken(std::string &token)
 305 {
 306         std::string::iterator lsp = last_starting_position;
 307
 308         while (n != tokens.end())
 309         {
 310                 if ((*n == sep) || (n+1 == tokens.end()))
 311                 {
 312                         last_starting_position = n+1;
 313                         token = std::string(lsp, n+1 == tokens.end() ? n+1  : n++);
 314
 315                         while ((token.length()) && (token.find_last_of(sep) == token.length() - 1))
 316                                 token.erase(token.end() - 1);
 317
 318                         if (token.empty())
 319                                 n++;
 320
 321                         return n == tokens.end() ? false : true;
 322                 }
 323
 324                 n++;
 325         }
 326
 327         token = "";
 328         return false;
 329 }
 330
 331 const std::string irc::sepstream::GetRemaining()
 332 {
 333         return std::string(n, tokens.end());
 334 }
 335
 336 bool irc::sepstream::StreamEnd()
 337 {
 338         return ((n + 1) == tokens.end());
 339 }
 340
 341 irc::sepstream::~sepstream()
 342 {
 343 }
 344
 345 std::string irc::hex(const unsigned char *raw, size_t rawsz)
 346 {
 347         if (!rawsz)
 348                 return "";
 349
 350         /* EWW! This used to be using sprintf, which is WAY inefficient. -Special */
 351
 352         const char *hex = "0123456789abcdef";
 353         static char hexbuf[MAXBUF];
 354
 355         size_t i, j;
 356         for (i = 0, j = 0; j < rawsz; ++j)
 357         {
 358                 hexbuf[i++] = hex[raw[j] / 16];
 359                 hexbuf[i++] = hex[raw[j] % 16];
 360         }
 361         hexbuf[i] = 0;
 362
 363         return hexbuf;
 364 }
 365
 366 CoreExport const char* irc::Spacify(const char* n)
 367 {
 368         static char x[MAXBUF];
 369         strlcpy(x,n,MAXBUF);
 370         for (char* y = x; *y; y++)
 371                 if (*y == '_')
 372                         *y = ' ';
 373         return x;
 374 }
 375
 376
 377 irc::modestacker::modestacker(bool add) : adding(add)
 378 {
 379         sequence.clear();
 380         sequence.push_back("");
 381 }
 382
 383 void irc::modestacker::Push(char modeletter, const std::string &parameter)
 384 {
 385         *(sequence.begin()) += modeletter;
 386         sequence.push_back(parameter);
 387 }
 388
 389 void irc::modestacker::Push(char modeletter)
 390 {
 391         this->Push(modeletter,"");
 392 }
 393
 394 void irc::modestacker::PushPlus()
 395 {
 396         this->Push('+',"");
 397 }
 398
 399 void irc::modestacker::PushMinus()
 400 {
 401         this->Push('-',"");
 402 }
 403
 404 int irc::modestacker::GetStackedLine(std::vector<std::string> &result, int max_line_size)
 405 {
 406         if (sequence.empty())
 407         {
 408                 return 0;
 409         }
 410
 411         unsigned int n = 0;
 412         int size = 1; /* Account for initial +/- char */
 413         int nextsize = 0;
 414         int start = result.size();
 415         std::string modeline = adding ? "+" : "-";
 416         result.push_back(modeline);
 417
 418         if (sequence.size() > 1)
 419                 nextsize = sequence[1].length() + 2;
 420
 421         while (!sequence[0].empty() && (sequence.size() > 1) && (n < ServerInstance->Config->Limits.MaxModes) && ((size + nextsize) < max_line_size))
 422         {
 423                 modeline += *(sequence[0].begin());
 424                 if (!sequence[1].empty())
 425                 {
 426                         result.push_back(sequence[1]);
 427                         size += nextsize; /* Account for mode character and whitespace */
 428                 }
 429                 sequence[0].erase(sequence[0].begin());
 430                 sequence.erase(sequence.begin() + 1);
 431
 432                 if (sequence.size() > 1)
 433                         nextsize = sequence[1].length() + 2;
 434
 435                 n++;
 436         }
 437         result[start] = modeline;
 438
 439         return n;
 440 }
 441
 442 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end)
 443 {
 444         if (end < begin)
 445                 throw "stringjoiner logic error, this causes problems.";
 446
 447         for (int v = begin; v < end; v++)
 448                 joined.append(sequence[v]).append(seperator);
 449         joined.append(sequence[end]);
 450 }
 451
 452 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end)
 453 {
 454         if (end < begin)
 455                 throw "stringjoiner logic error, this causes problems.";
 456
 457         for (int v = begin; v < end; v++)
 458                 joined.append(sequence[v]).append(seperator);
 459         joined.append(sequence[end]);
 460 }
 461
 462 irc::stringjoiner::stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end)
 463 {
 464         if (end < begin)
 465                 throw "stringjoiner logic error, this causes problems.";
 466
 467         for (int v = begin; v < end; v++)
 468                 joined.append(sequence[v]).append(seperator);
 469         joined.append(sequence[end]);
 470 }
 471
 472 std::string& irc::stringjoiner::GetJoined()
 473 {
 474         return joined;
 475 }
 476
 477 irc::portparser::portparser(const std::string &source, bool allow_overlapped) : in_range(0), range_begin(0), range_end(0), overlapped(allow_overlapped)
 478 {
 479         sep = new irc::commasepstream(source);
 480         overlap_set.clear();
 481 }
 482
 483 irc::portparser::~portparser()
 484 {
 485         delete sep;
 486 }
 487
 488 bool irc::portparser::Overlaps(long val)
 489 {
 490         if (!overlapped)
 491                 return false;
 492
 493         if (overlap_set.find(val) == overlap_set.end())
 494         {
 495                 overlap_set[val] = true;
 496                 return false;
 497         }
 498         else
 499                 return true;
 500 }
 501
 502 long irc::portparser::GetToken()
 503 {
 504         if (in_range > 0)
 505         {
 506                 in_range++;
 507                 if (in_range <= range_end)
 508                 {
 509                         if (!Overlaps(in_range))
 510                         {
 511                                 return in_range;
 512                         }
 513                         else
 514                         {
 515                                 while (((Overlaps(in_range)) && (in_range <= range_end)))
 516                                         in_range++;
 517
 518                                 if (in_range <= range_end)
 519                                         return in_range;
 520                         }
 521                 }
 522                 else
 523                         in_range = 0;
 524         }
 525
 526         std::string x;
 527         sep->GetToken(x);
 528
 529         if (x.empty())
 530                 return 0;
 531
 532         while (Overlaps(atoi(x.c_str())))
 533         {
 534                 if (!sep->GetToken(x))
 535                         return 0;
 536         }
 537
 538         std::string::size_type dash = x.rfind('-');
 539         if (dash != std::string::npos)
 540         {
 541                 std::string sbegin = x.substr(0, dash);
 542                 std::string send = x.substr(dash+1, x.length());
 543                 range_begin = atoi(sbegin.c_str());
 544                 range_end = atoi(send.c_str());
 545
 546                 if ((range_begin > 0) && (range_end > 0) && (range_begin < 65536) && (range_end < 65536) && (range_begin < range_end))
 547                 {
 548                         in_range = range_begin;
 549                         return in_range;
 550                 }
 551                 else
 552                 {
 553                         /* Assume its just the one port */
 554                         return atoi(sbegin.c_str());
 555                 }
 556         }
 557         else
 558         {
 559                 return atoi(x.c_str());
 560         }
 561 }
 562
 563 /*const std::basic_string& SearchAndReplace(std::string& text, const std::string& pattern, const std::string& replace)
 564 {
 565         std::string replacement;
 566         if ((!pattern.empty()) && (!text.empty()))
 567         {
 568                 for (std::string::size_type n = 0; n != text.length(); ++n)
 569                 {
 570                         if (text.length() >= pattern.length() && text.substr(n, pattern.length()) == pattern)
 571                         {
 572                                 replacement.append(replace);
 573                                 n = n + pattern.length() - 1;
 574                         }
 575                         else
 576                         {
 577                                 replacement += text[n];
 578                         }
 579                 }
 580         }
 581         text = replacement;
 582         return text;
 583 }*/