src/modules/m_nationalchars.cpp

   1 /*
   2  * InspIRCd -- Internet Relay Chat Daemon
   3  *
   4  *   Copyright (C) 2019 Matt Schatz <genius3000@g3k.solutions>
   5  *   Copyright (C) 2013, 2015, 2017, 2020 Sadie Powell <sadie@witchery.services>
   6  *   Copyright (C) 2012-2016 Attila Molnar <attilamolnar@hush.com>
   7  *   Copyright (C) 2012, 2019 Robby <robby@chatbelgie.be>
   8  *   Copyright (C) 2011 jackmcbarn <jackmcbarn@inspircd.org>
   9  *   Copyright (C) 2009-2010 Daniel De Graaf <danieldg@inspircd.org>
  10  *   Copyright (C) 2009-2010 Craig Edwards <brain@inspircd.org>
  11  *   Copyright (C) 2009 Uli Schlachter <psychon@inspircd.org>
  12  *   Copyright (C) 2009 Robin Burchell <robin+git@viroteck.net>
  13  *   Copyright (C) 2009 Dennis Friis <peavey@inspircd.org>
  14  *
  15  * This file is part of InspIRCd.  InspIRCd is free software: you can
  16  * redistribute it and/or modify it under the terms of the GNU General Public
  17  * License as published by the Free Software Foundation, version 2.
  18  *
  19  * This program is distributed in the hope that it will be useful, but WITHOUT
  20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  21  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
  22  * details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  26  */
  27
  28
  29 /* Contains a code of Unreal IRCd + Bynets patch (https://www.unrealircd.org and https://bynets.org)
  30    Original patch is made by Dmitry "Killer{R}" Kononko. (http://killprog.com)
  31    Changed at 2008-06-15 - 2009-02-11
  32    by Chernov-Phoenix Alexey (Phoenix@RusNet) mailto:phoenix /email address separator/ pravmail.ru */
  33
  34 #include "inspircd.h"
  35 #include <fstream>
  36
  37 class lwbNickHandler
  38 {
  39  public:
  40         static bool Call(const std::string&);
  41 };
  42
  43                                                                  /*,m_reverse_additionalUp[256];*/
  44 static unsigned char m_reverse_additional[256],m_additionalMB[256],m_additionalUtf8[256],m_additionalUtf8range[256],m_additionalUtf8interval[256];
  45
  46 char utf8checkrest(unsigned char * mb, unsigned char cnt)
  47 {
  48         for (unsigned char * tmp=mb; tmp<mb+cnt; tmp++)
  49         {
  50                 /* & is faster! -- Phoenix (char & b11000000 == b10000000) */
  51                 if ((*tmp & 192) != 128)
  52                         return -1;
  53         }
  54         return cnt + 1;
  55 }
  56
  57
  58 char utf8size(unsigned char * mb)
  59 {
  60         if (!*mb)
  61                 return -1;
  62         if (!(*mb & 128))
  63                 return 1;
  64         if ((*mb & 224) == 192)
  65                 return utf8checkrest(mb + 1,1);
  66         if ((*mb & 240) == 224)
  67                 return utf8checkrest(mb + 1,2);
  68         if ((*mb & 248) == 240)
  69                 return utf8checkrest(mb + 1,3);
  70         return -1;
  71 }
  72
  73
  74 /* Conditions added */
  75 bool lwbNickHandler::Call(const std::string& nick)
  76 {
  77         if (nick.empty() || isdigit(nick[0]))
  78                 return false;
  79
  80         const char* n = nick.c_str();
  81         unsigned int p = 0;
  82         for (const char* i = n; *i; i++, p++)
  83         {
  84                 /* 1. Multibyte encodings support:  */
  85                 /* 1.1. 16bit char. areas, e.g. chinese:*/
  86
  87                 /* if current character is the last, we DO NOT check it against multibyte table */
  88                 /* if there are mbtable ranges, use ONLY them. No 8bit at all */
  89                 if (i[1] && m_additionalMB[0])
  90                 {
  91                         /* otherwise let's take a look at the current character and the following one */
  92                         bool found = false;
  93                         for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
  94                         {
  95                                 if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
  96                                 {
  97                                         /* multibyte range character found */
  98                                         i++;
  99                                         p++;
 100                                         found = true;
 101                                         break;
 102                                 }
 103                         }
 104                         if (found)
 105                                 /* next char! */
 106                                 continue;
 107                         else
 108                                 /* there are ranges, but incorrect char (8bit?) given, sorry */
 109                                 return false;
 110                 }
 111
 112                 /* 2. 8bit character support */
 113                 if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
 114                         /* "A"-"}" can occur anywhere in a nickname */
 115                         continue;
 116
 117                 if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
 118                         /* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
 119                         continue;
 120
 121                 /* 3.1. Check against a simple UTF-8 characters enumeration */
 122                 int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
 123                 /* do check only if current multibyte character is valid UTF-8 only */
 124                 if (ncursize != -1)
 125                 {
 126                         bool found = false;
 127                         for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
 128                         {
 129                                 cursize = utf8size(mb);
 130                                 /* Size differs? Pick the next! */
 131                                 if (cursize != ncursize)
 132                                         continue;
 133
 134                                 if (!strncmp(i, (char *)mb, cursize))
 135                                 {
 136                                         i += cursize - 1;
 137                                         p += cursize - 1;
 138                                         found = true;
 139                                         break;
 140                                 }
 141                         }
 142                         if (found)
 143                                 continue;
 144
 145                         /* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
 146                         found = false;
 147                         for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
 148                         {
 149                                 cursize = utf8size(mb);
 150                                 /* Size differs (or lengthbyte is zero)? Pick the next! */
 151                                 if ((cursize != ncursize) || (!mb[cursize]))
 152                                         continue;
 153
 154                                 unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
 155                                 strncpy((char* ) uright, (char *) mb, cursize);
 156
 157                                 for (int temp = cursize - 1; (temp >= 0) && range; --temp)
 158                                 {
 159                                         /* all but the first char are 64-based */
 160                                         if (temp)
 161                                         {
 162                                                 char part64 = range & 63; /* i.e. % 64 */
 163                                                 /* handle carrying over */
 164                                                 if (uright[temp] + part64 - 1 > 191)
 165                                                 {
 166                                                         uright[temp] -= 64;
 167                                                         range += 64;
 168                                                 }
 169                                                 uright[temp] += part64;
 170                                                 range >>= 6; /* divide it on a 64 */
 171                                         }
 172                                         /* the first char of UTF-8 doesn't follow the rule */
 173                                         else
 174                                         {
 175                                                 uright[temp] += range;
 176                                         }
 177                                 }
 178
 179                                 if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
 180                                 {
 181                                         i += cursize - 1;
 182                                         p += cursize - 1;
 183                                         found = true;
 184                                         break;
 185                                 }
 186                         }
 187                         if (found)
 188                                 continue;
 189
 190                         /* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
 191                         found = false;
 192                         for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
 193                                 && (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
 194                         {
 195                                 cursize = utf8size(mb);
 196                                 cursize2= utf8size(mb+cursize);
 197
 198                                 int minlen  = cursize  > ncursize ? ncursize : cursize;
 199                                 int minlen2 = cursize2 > ncursize ? ncursize : cursize2;
 200
 201                                 unsigned char* uright = mb + cursize;
 202
 203                                 if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
 204                                 {
 205                                         i += cursize - 1;
 206                                         p += cursize - 1;
 207                                         found = true;
 208                                         break;
 209                                 }
 210                         }
 211                         if (found)
 212                                 continue;
 213                 }
 214
 215                 /* invalid character! abort */
 216                 return false;
 217         }
 218
 219         /* too long? or not -- pointer arithmetic rocks */
 220         return (p < ServerInstance->Config->Limits.NickMax);
 221 }
 222
 223
 224 class ModuleNationalChars : public Module
 225 {
 226         std::string charset;
 227         unsigned char m_additional[256], m_additionalUp[256], m_lower[256], m_upper[256];
 228         TR1NS::function<bool(const std::string&)> rememberer;
 229         bool forcequit;
 230         const unsigned char * lowermap_rememberer;
 231         std::string casemapping_rememberer;
 232         unsigned char prev_map[256];
 233
 234         template <typename T>
 235         void RehashHashmap(T& hashmap)
 236         {
 237                 T newhash(hashmap.bucket_count());
 238                 for (typename T::const_iterator i = hashmap.begin(); i != hashmap.end(); ++i)
 239                         newhash.insert(std::make_pair(i->first, i->second));
 240                 hashmap.swap(newhash);
 241         }
 242
 243         void CheckRehash()
 244         {
 245                 // See if anything changed
 246                 if (!memcmp(prev_map, national_case_insensitive_map, sizeof(prev_map)))
 247                         return;
 248
 249                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
 250
 251                 RehashHashmap(ServerInstance->Users.clientlist);
 252                 RehashHashmap(ServerInstance->Users.uuidlist);
 253                 RehashHashmap(ServerInstance->chanlist);
 254         }
 255
 256  public:
 257         ModuleNationalChars()
 258                 : rememberer(ServerInstance->IsNick)
 259                 , lowermap_rememberer(national_case_insensitive_map)
 260                 , casemapping_rememberer(ServerInstance->Config->CaseMapping)
 261         {
 262                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
 263         }
 264
 265         void init() CXX11_OVERRIDE
 266         {
 267                 memcpy(m_lower, rfc_case_insensitive_map, 256);
 268                 national_case_insensitive_map = m_lower;
 269
 270                 ServerInstance->IsNick = &lwbNickHandler::Call;
 271         }
 272
 273         void ReadConfig(ConfigStatus& status) CXX11_OVERRIDE
 274         {
 275                 ConfigTag* tag = ServerInstance->Config->ConfValue("nationalchars");
 276                 charset = tag->getString("file");
 277                 std::string casemapping = tag->getString("casemapping", FileSystem::GetFileName(charset), 1);
 278                 if (casemapping.find(' ') != std::string::npos)
 279                         throw ModuleException("<nationalchars:casemapping> must not contain any spaces!");
 280                 ServerInstance->Config->CaseMapping = casemapping;
 281 #if defined _WIN32
 282                 if (!FileSystem::StartsWithWindowsDriveLetter(charset))
 283                         charset.insert(0, "./locales/");
 284 #else
 285                 if(charset[0] != '/')
 286                         charset.insert(0, "../locales/");
 287 #endif
 288                 unsigned char * tables[8] = { m_additional, m_additionalMB, m_additionalUp, m_lower, m_upper, m_additionalUtf8, m_additionalUtf8range, m_additionalUtf8interval };
 289                 if (!loadtables(charset, tables, 8, 5))
 290                         throw ModuleException("The locale file failed to load. Check your log file for more information.");
 291                 forcequit = tag->getBool("forcequit");
 292                 CheckForceQuit("National character set changed");
 293                 CheckRehash();
 294         }
 295
 296         void CheckForceQuit(const char * message)
 297         {
 298                 if (!forcequit)
 299                         return;
 300
 301                 const UserManager::LocalList& list = ServerInstance->Users.GetLocalUsers();
 302                 for (UserManager::LocalList::const_iterator iter = list.begin(); iter != list.end(); )
 303                 {
 304                         /* Fix by Brain: Dont quit UID users */
 305                         // Quitting the user removes it from the list
 306                         User* n = *iter;
 307                         ++iter;
 308                         if (!isdigit(n->nick[0]) && !ServerInstance->IsNick(n->nick))
 309                                 ServerInstance->Users->QuitUser(n, message);
 310                 }
 311         }
 312
 313         ~ModuleNationalChars()
 314         {
 315                 ServerInstance->IsNick = rememberer;
 316                 national_case_insensitive_map = lowermap_rememberer;
 317                 ServerInstance->Config->CaseMapping = casemapping_rememberer;
 318                 // The core rebuilds ISupport on module unload, but before the dtor.
 319                 ServerInstance->ISupport.Build();
 320                 CheckForceQuit("National characters module unloaded");
 321                 CheckRehash();
 322         }
 323
 324         Version GetVersion() CXX11_OVERRIDE
 325         {
 326                 return Version("Allows the server administrator to define what characters are allowed in nicknames and channel names and how those characters should be compared in a case insensitive way.", VF_VENDOR | VF_COMMON);
 327         }
 328
 329         /*make an array to check against it 8bit characters a bit faster. Whether allowed or uppercase (for your needs).*/
 330         void makereverse(unsigned char * from, unsigned  char * to, unsigned int cnt)
 331         {
 332                 memset(to, 0, cnt);
 333                 for(unsigned char * n=from; (*n) && ((*n)<cnt) && (n<from+cnt); n++)
 334                         to[*n] = 1;
 335         }
 336
 337         /*so Bynets Unreal distribution stuff*/
 338         bool loadtables(std::string filename, unsigned char ** tables, unsigned char cnt, char faillimit)
 339         {
 340                 std::ifstream ifs(ServerInstance->Config->Paths.PrependConfig(filename).c_str());
 341                 if (ifs.fail())
 342                 {
 343                         ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for missing file: %s", filename.c_str());
 344                         return false;
 345                 }
 346
 347                 for (unsigned char n=0; n< cnt; n++)
 348                 {
 349                         memset(tables[n], 0, 256);
 350                 }
 351
 352                 memcpy(m_lower, rfc_case_insensitive_map, 256);
 353
 354                 for (unsigned char n = 0; n < cnt; n++)
 355                 {
 356                         if (loadtable(ifs, tables[n], 255) && (n < faillimit))
 357                         {
 358                                 ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for illegal file: %s (line %d)", filename.c_str(), n+1);
 359                                 return false;
 360                         }
 361                 }
 362
 363                 makereverse(m_additional, m_reverse_additional, sizeof(m_additional));
 364                 return true;
 365         }
 366
 367         unsigned char symtoi(const char *t,unsigned char base)
 368         /* base = 16 for hexadecimal, 10 for decimal, 8 for octal ;) */
 369         {
 370                 unsigned char tmp = 0, current;
 371                 while ((*t) && (*t !=' ') && (*t != 13) && (*t != 10) && (*t != ','))
 372                 {
 373                         tmp *= base;
 374                         current = ascii_case_insensitive_map[(unsigned char)*t];
 375                         if (current >= 'a')
 376                                 current = current - 'a' + 10;
 377                         else
 378                                 current = current - '0';
 379                         tmp+=current;
 380                         t++;
 381                 }
 382                 return tmp;
 383         }
 384
 385         int loadtable(std::ifstream &ifs , unsigned char *chartable, unsigned int maxindex)
 386         {
 387                 std::string buf;
 388                 getline(ifs, buf);
 389
 390                 unsigned int i = 0;
 391                 int fail = 0;
 392
 393                 buf.erase(buf.find_last_not_of("\n") + 1);
 394
 395                 if (buf[0] == '.')      /* simple plain-text string after dot */
 396                 {
 397                         i = buf.size() - 1;
 398
 399                         if (i > (maxindex + 1))
 400                                 i = maxindex + 1;
 401
 402                         memcpy(chartable, buf.c_str() + 1, i);
 403                 }
 404                 else
 405                 {
 406                         const char * p = buf.c_str();
 407                         while (*p)
 408                         {
 409                                 if (i > maxindex)
 410                                 {
 411                                         fail = 1;
 412                                         break;
 413                                 }
 414
 415                                 if (*p != '\'')         /* decimal or hexadecimal char code */
 416                                 {
 417                                         if (*p == '0')
 418                                         {
 419                                                 if (p[1] == 'x')
 420                                                          /* hex with the leading "0x" */
 421                                                         chartable[i] = symtoi(p + 2, 16);
 422                                                 else
 423                                                         chartable[i] = symtoi(p + 1, 8);
 424                                         }
 425                                         /* hex form */
 426                                         else if (*p == 'x')
 427                                         {
 428                                                 chartable[i] = symtoi(p + 1, 16);
 429                                         }else    /* decimal form */
 430                                         {
 431                                                 chartable[i] = symtoi(p, 10);
 432                                         }
 433                                 }
 434                                 else             /* plain-text char between '' */
 435                                 {
 436                                         if (*(p + 1) == '\\')
 437                                         {
 438                                                 chartable[i] = *(p + 2);
 439                                                 p += 3;
 440                                         }else
 441                                         {
 442                                                 chartable[i] = *(p + 1);
 443                                                 p += 2;
 444                                         }
 445                                 }
 446                                 while (*p && (*p != ',') && (*p != ' ') && (*p != 13) && (*p != 10))
 447                                         p++;
 448                                 while (*p && ((*p == ',') || (*p == ' ') || (*p == 13) || (*p == 10)))
 449                                         p++;
 450                                 i++;
 451                         }
 452                 }
 453                 return fail;
 454         }
 455 };
 456
 457 MODULE_INIT(ModuleNationalChars)