src/hashcomp.cpp

   1 /*
   2  * InspIRCd -- Internet Relay Chat Daemon
   3  *
   4  *   Copyright (C) 2009 Daniel De Graaf <danieldg@inspircd.org>
   5  *   Copyright (C) 2005-2009 Craig Edwards <craigedwards@brainbox.cc>
   6  *   Copyright (C) 2007-2008 Robin Burchell <robin+git@viroteck.net>
   7  *   Copyright (C) 2007 Dennis Friis <peavey@inspircd.org>
   8  *
   9  * This file is part of InspIRCd.  InspIRCd is free software: you can
  10  * redistribute it and/or modify it under the terms of the GNU General Public
  11  * License as published by the Free Software Foundation, version 2.
  12  *
  13  * This program is distributed in the hope that it will be useful, but WITHOUT
  14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  15  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20  */
  21
  22
  23 /* $Core */
  24
  25 #include "inspircd.h"
  26 #include "hashcomp.h"
  27 #include "hash_map.h"
  28
  29 /******************************************************
  30  *
  31  * The hash functions of InspIRCd are the centrepoint
  32  * of the entire system. If these functions are
  33  * inefficient or wasteful, the whole program suffers
  34  * as a result. A lot of C programmers in the ircd
  35  * scene spend a lot of time debating (arguing) about
  36  * the best way to write hash functions to hash irc
  37  * nicknames, channels etc.
  38  * We are lucky as C++ developers as hash_map does
  39  * a lot of this for us. It does intellegent memory
  40  * requests, bucketing, search functions, insertion
  41  * and deletion etc. All we have to do is write some
  42  * overloaded comparison and hash value operators which
  43  * cause it to act in an irc-like way. The features we
  44  * add to the standard hash_map are:
  45  *
  46  * Case insensitivity: The hash_map will be case
  47  * insensitive.
  48  *
  49  * Scandanavian Comparisons: The characters [, ], \ will
  50  * be considered the lowercase of {, } and |.
  51  *
  52  ******************************************************/
  53
  54 /** A mapping of uppercase to lowercase, including scandinavian
  55  * 'oddities' as specified by RFC1459, e.g. { -> [, and | -> \
  56  */
  57 unsigned const char rfc_case_insensitive_map[256] = {
  58         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  59         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  60         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  61         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  62         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 94, 95, 96, 97, 98, 99,           /* 80-99 */
  63         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  64         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  65         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  66         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  67         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  68         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  69         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  70         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  71 };
  72
  73 /** Case insensitive map, ASCII rules.
  74  * That is;
  75  * [ != {, but A == a.
  76  */
  77 unsigned const char ascii_case_insensitive_map[256] = {
  78         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,                                   /* 0-19 */
  79         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,                         /* 20-39 */
  80         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,                         /* 40-59 */
  81         60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,             /* 60-79 */
  82         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,              /* 80-99 */
  83         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,     /* 100-119 */
  84         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,     /* 120-139 */
  85         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,     /* 140-159 */
  86         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,     /* 160-179 */
  87         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,     /* 180-199 */
  88         200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,     /* 200-219 */
  89         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,     /* 220-239 */
  90         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255                          /* 240-255 */
  91 };
  92
  93 /** Case sensitive map.
  94  * Can technically also be used for ASCII case sensitive comparisons, as [ != {, etc.
  95  */
  96 unsigned const char rfc_case_sensitive_map[256] = {
  97         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
  98         21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  99         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 100         61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
 101         81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
 102         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
 103         121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
 104         141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 105         161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
 106         181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
 107         201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
 108         221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
 109         241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
 110 };
 111
 112 /* convert a string to lowercase. Note following special circumstances
 113  * taken from RFC 1459. Many "official" server branches still hold to this
 114  * rule so i will too;
 115  *
 116  *  Because of IRC's scandanavian origin, the characters {}| are
 117  *  considered to be the lower case equivalents of the characters []\,
 118  *  respectively. This is a critical issue when determining the
 119  *  equivalence of two nicknames.
 120  */
 121 void nspace::strlower(char *n)
 122 {
 123         if (n)
 124         {
 125                 for (char* t = n; *t; t++)
 126                         *t = national_case_insensitive_map[(unsigned char)*t];
 127         }
 128 }
 129
 130 #ifdef HASHMAP_DEPRECATED
 131         size_t CoreExport nspace::insensitive::operator()(const std::string &s) const
 132 #else
 133         size_t nspace::hash<std::string>::operator()(const std::string &s) const
 134 #endif
 135
 136 {
 137         /* XXX: NO DATA COPIES! :)
 138          * The hash function here is practically
 139          * a copy of the one in STL's hash_fun.h,
 140          * only with *x replaced with national_case_insensitive_map[*x].
 141          * This avoids a copy to use hash<const char*>
 142          */
 143         register size_t t = 0;
 144         for (std::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 145                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 146         return t;
 147 }
 148
 149
 150 size_t CoreExport irc::hash::operator()(const irc::string &s) const
 151 {
 152         register size_t t = 0;
 153         for (irc::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
 154                 t = 5 * t + national_case_insensitive_map[(unsigned char)*x];
 155         return t;
 156 }
 157
 158 bool irc::StrHashComp::operator()(const std::string& s1, const std::string& s2) const
 159 {
 160         const unsigned char* n1 = (const unsigned char*)s1.c_str();
 161         const unsigned char* n2 = (const unsigned char*)s2.c_str();
 162         for (; *n1 && *n2; n1++, n2++)
 163                 if (national_case_insensitive_map[*n1] != national_case_insensitive_map[*n2])
 164                         return false;
 165         return (national_case_insensitive_map[*n1] == national_case_insensitive_map[*n2]);
 166 }
 167
 168 /******************************************************
 169  *
 170  * This is the implementation of our special irc::string
 171  * class which is a case-insensitive equivalent to
 172  * std::string which is not only case-insensitive but
 173  * can also do scandanavian comparisons, e.g. { = [, etc.
 174  *
 175  * This class depends on the const array 'national_case_insensitive_map'.
 176  *
 177  ******************************************************/
 178
 179 bool irc::irc_char_traits::eq(char c1st, char c2nd)
 180 {
 181         return national_case_insensitive_map[(unsigned char)c1st] == national_case_insensitive_map[(unsigned char)c2nd];
 182 }
 183
 184 bool irc::irc_char_traits::ne(char c1st, char c2nd)
 185 {
 186         return national_case_insensitive_map[(unsigned char)c1st] != national_case_insensitive_map[(unsigned char)c2nd];
 187 }
 188
 189 bool irc::irc_char_traits::lt(char c1st, char c2nd)
 190 {
 191         return national_case_insensitive_map[(unsigned char)c1st] < national_case_insensitive_map[(unsigned char)c2nd];
 192 }
 193
 194 int irc::irc_char_traits::compare(const char* str1, const char* str2, size_t n)
 195 {
 196         for(unsigned int i = 0; i < n; i++)
 197         {
 198                 if(national_case_insensitive_map[(unsigned char)*str1] > national_case_insensitive_map[(unsigned char)*str2])
 199                         return 1;
 200
 201                 if(national_case_insensitive_map[(unsigned char)*str1] < national_case_insensitive_map[(unsigned char)*str2])
 202                         return -1;
 203
 204                 if(*str1 == 0 || *str2 == 0)
 205                         return 0;
 206
 207                 str1++;
 208                 str2++;
 209         }
 210         return 0;
 211 }
 212
 213 const char* irc::irc_char_traits::find(const char* s1, int  n, char c)
 214 {
 215         while(n-- > 0 && national_case_insensitive_map[(unsigned char)*s1] != national_case_insensitive_map[(unsigned char)c])
 216                 s1++;
 217         return (n >= 0) ? s1 : NULL;
 218 }
 219
 220 irc::tokenstream::tokenstream(const std::string &source) : tokens(source), last_pushed(false)
 221 {
 222         /* Record starting position and current position */
 223         last_starting_position = tokens.begin();
 224         n = tokens.begin();
 225 }
 226
 227 irc::tokenstream::~tokenstream()
 228 {
 229 }
 230
 231 bool irc::tokenstream::GetToken(std::string &token)
 232 {
 233         std::string::iterator lsp = last_starting_position;
 234
 235         while (n != tokens.end())
 236         {
 237                 /** Skip multi space, converting "  " into " "
 238                  */
 239                 while ((n+1 != tokens.end()) && (*n == ' ') && (*(n+1) == ' '))
 240                         n++;
 241
 242                 if ((last_pushed) && (*n == ':'))
 243                 {
 244                         /* If we find a token thats not the first and starts with :,
 245                          * this is the last token on the line
 246                          */
 247                         std::string::iterator curr = ++n;
 248                         n = tokens.end();
 249                         token = std::string(curr, tokens.end());
 250                         return true;
 251                 }
 252
 253                 last_pushed = false;
 254
 255                 if ((*n == ' ') || (n+1 == tokens.end()))
 256                 {
 257                         /* If we find a space, or end of string, this is the end of a token.
 258                          */
 259                         last_starting_position = n+1;
 260                         last_pushed = *n == ' ';
 261
 262                         std::string strip(lsp, n+1 == tokens.end() ? n+1  : n++);
 263                         while ((strip.length()) && (strip.find_last_of(' ') == strip.length() - 1))
 264                                 strip.erase(strip.end() - 1);
 265
 266                         token = strip;
 267                         return !token.empty();
 268                 }
 269
 270                 n++;
 271         }
 272         token.clear();
 273         return false;
 274 }
 275
 276 bool irc::tokenstream::GetToken(irc::string &token)
 277 {
 278         std::string stdstring;
 279         bool returnval = GetToken(stdstring);
 280         token = assign(stdstring);
 281         return returnval;
 282 }
 283
 284 bool irc::tokenstream::GetToken(int &token)
 285 {
 286         std::string tok;
 287         bool returnval = GetToken(tok);
 288         token = ConvToInt(tok);
 289         return returnval;
 290 }
 291
 292 bool irc::tokenstream::GetToken(long &token)
 293 {
 294         std::string tok;
 295         bool returnval = GetToken(tok);
 296         token = ConvToInt(tok);
 297         return returnval;
 298 }
 299
 300 irc::sepstream::sepstream(const std::string &source, char seperator) : tokens(source), sep(seperator)
 301 {
 302         last_starting_position = tokens.begin();
 303         n = tokens.begin();
 304 }
 305
 306 bool irc::sepstream::GetToken(std::string &token)
 307 {
 308         std::string::iterator lsp = last_starting_position;
 309
 310         while (n != tokens.end())
 311         {
 312                 if ((*n == sep) || (n+1 == tokens.end()))
 313                 {
 314                         last_starting_position = n+1;
 315                         token = std::string(lsp, n+1 == tokens.end() ? n+1  : n++);
 316
 317                         while ((token.length()) && (token.find_last_of(sep) == token.length() - 1))
 318                                 token.erase(token.end() - 1);
 319
 320                         if (token.empty())
 321                                 n++;
 322
 323                         return n == tokens.end() ? false : true;
 324                 }
 325
 326                 n++;
 327         }
 328
 329         token = "";
 330         return false;
 331 }
 332
 333 const std::string irc::sepstream::GetRemaining()
 334 {
 335         return std::string(n, tokens.end());
 336 }
 337
 338 bool irc::sepstream::StreamEnd()
 339 {
 340         return ((n + 1) == tokens.end());
 341 }
 342
 343 irc::sepstream::~sepstream()
 344 {
 345 }
 346
 347 std::string irc::hex(const unsigned char *raw, size_t rawsz)
 348 {
 349         if (!rawsz)
 350                 return "";
 351
 352         /* EWW! This used to be using sprintf, which is WAY inefficient. -Special */
 353
 354         const char *hex = "0123456789abcdef";
 355         static char hexbuf[MAXBUF];
 356
 357         size_t i, j;
 358         for (i = 0, j = 0; j < rawsz; ++j)
 359         {
 360                 hexbuf[i++] = hex[raw[j] / 16];
 361                 hexbuf[i++] = hex[raw[j] % 16];
 362         }
 363         hexbuf[i] = 0;
 364
 365         return hexbuf;
 366 }
 367
 368 CoreExport const char* irc::Spacify(const char* n)
 369 {
 370         static char x[MAXBUF];
 371         strlcpy(x,n,MAXBUF);
 372         for (char* y = x; *y; y++)
 373                 if (*y == '_')
 374                         *y = ' ';
 375         return x;
 376 }
 377
 378
 379 irc::modestacker::modestacker(bool add) : adding(add)
 380 {
 381         sequence.clear();
 382         sequence.push_back("");
 383 }
 384
 385 void irc::modestacker::Push(char modeletter, const std::string &parameter)
 386 {
 387         *(sequence.begin()) += modeletter;
 388         sequence.push_back(parameter);
 389 }
 390
 391 void irc::modestacker::Push(char modeletter)
 392 {
 393         this->Push(modeletter,"");
 394 }
 395
 396 void irc::modestacker::PushPlus()
 397 {
 398         this->Push('+',"");
 399 }
 400
 401 void irc::modestacker::PushMinus()
 402 {
 403         this->Push('-',"");
 404 }
 405
 406 int irc::modestacker::GetStackedLine(std::vector<std::string> &result, int max_line_size)
 407 {
 408         if (sequence.empty())
 409         {
 410                 return 0;
 411         }
 412
 413         unsigned int n = 0;
 414         int size = 1; /* Account for initial +/- char */
 415         int nextsize = 0;
 416         int start = result.size();
 417         std::string modeline = adding ? "+" : "-";
 418         result.push_back(modeline);
 419
 420         if (sequence.size() > 1)
 421                 nextsize = sequence[1].length() + 2;
 422
 423         while (!sequence[0].empty() && (sequence.size() > 1) && (n < ServerInstance->Config->Limits.MaxModes) && ((size + nextsize) < max_line_size))
 424         {
 425                 modeline += *(sequence[0].begin());
 426                 if (!sequence[1].empty())
 427                 {
 428                         result.push_back(sequence[1]);
 429                         size += nextsize; /* Account for mode character and whitespace */
 430                 }
 431                 sequence[0].erase(sequence[0].begin());
 432                 sequence.erase(sequence.begin() + 1);
 433
 434                 if (sequence.size() > 1)
 435                         nextsize = sequence[1].length() + 2;
 436
 437                 n++;
 438         }
 439         result[start] = modeline;
 440
 441         return n;
 442 }
 443
 444 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end)
 445 {
 446         if (end < begin)
 447                 return; // nothing to do here
 448
 449         for (int v = begin; v < end; v++)
 450                 joined.append(sequence[v]).append(seperator);
 451         joined.append(sequence[end]);
 452 }
 453
 454 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end)
 455 {
 456         if (end < begin)
 457                 return; // nothing to do here
 458
 459         for (int v = begin; v < end; v++)
 460                 joined.append(sequence[v]).append(seperator);
 461         joined.append(sequence[end]);
 462 }
 463
 464 irc::stringjoiner::stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end)
 465 {
 466         if (end < begin)
 467                 return; // nothing to do here
 468
 469         for (int v = begin; v < end; v++)
 470                 joined.append(sequence[v]).append(seperator);
 471         joined.append(sequence[end]);
 472 }
 473
 474 std::string& irc::stringjoiner::GetJoined()
 475 {
 476         return joined;
 477 }
 478
 479 irc::portparser::portparser(const std::string &source, bool allow_overlapped)
 480         : sep(source), in_range(0), range_begin(0), range_end(0), overlapped(allow_overlapped)
 481 {
 482 }
 483
 484 bool irc::portparser::Overlaps(long val)
 485 {
 486         if (overlapped)
 487                 return false;
 488
 489         return (!overlap_set.insert(val).second);
 490 }
 491
 492 long irc::portparser::GetToken()
 493 {
 494         if (in_range > 0)
 495         {
 496                 in_range++;
 497                 if (in_range <= range_end)
 498                 {
 499                         if (!Overlaps(in_range))
 500                         {
 501                                 return in_range;
 502                         }
 503                         else
 504                         {
 505                                 while (((Overlaps(in_range)) && (in_range <= range_end)))
 506                                         in_range++;
 507
 508                                 if (in_range <= range_end)
 509                                         return in_range;
 510                         }
 511                 }
 512                 else
 513                         in_range = 0;
 514         }
 515
 516         std::string x;
 517         sep.GetToken(x);
 518
 519         if (x.empty())
 520                 return 0;
 521
 522         while (Overlaps(atoi(x.c_str())))
 523         {
 524                 if (!sep.GetToken(x))
 525                         return 0;
 526         }
 527
 528         std::string::size_type dash = x.rfind('-');
 529         if (dash != std::string::npos)
 530         {
 531                 std::string sbegin = x.substr(0, dash);
 532                 std::string send = x.substr(dash+1, x.length());
 533                 range_begin = atoi(sbegin.c_str());
 534                 range_end = atoi(send.c_str());
 535
 536                 if ((range_begin > 0) && (range_end > 0) && (range_begin < 65536) && (range_end < 65536) && (range_begin < range_end))
 537                 {
 538                         in_range = range_begin;
 539                         return in_range;
 540                 }
 541                 else
 542                 {
 543                         /* Assume its just the one port */
 544                         return atoi(sbegin.c_str());
 545                 }
 546         }
 547         else
 548         {
 549                 return atoi(x.c_str());
 550         }
 551 }
 552
 553 /*const std::basic_string& SearchAndReplace(std::string& text, const std::string& pattern, const std::string& replace)
 554 {
 555         std::string replacement;
 556         if ((!pattern.empty()) && (!text.empty()))
 557         {
 558                 for (std::string::size_type n = 0; n != text.length(); ++n)
 559                 {
 560                         if (text.length() >= pattern.length() && text.substr(n, pattern.length()) == pattern)
 561                         {
 562                                 replacement.append(replace);
 563                                 n = n + pattern.length() - 1;
 564                         }
 565                         else
 566                         {
 567                                 replacement += text[n];
 568                         }
 569                 }
 570         }
 571         text = replacement;
 572         return text;
 573 }*/