]> git.netwichtig.de Git - user/henk/code/inspircd.git/blob - src/modules/m_nationalchars.cpp
91293573fab2a4df6a3d93a810682e009dda5970
[user/henk/code/inspircd.git] / src / modules / m_nationalchars.cpp
1 /*
2  * InspIRCd -- Internet Relay Chat Daemon
3  *
4  *   Copyright (C) 2019 Matt Schatz <genius3000@g3k.solutions>
5  *   Copyright (C) 2013, 2015, 2017, 2020 Sadie Powell <sadie@witchery.services>
6  *   Copyright (C) 2012-2016 Attila Molnar <attilamolnar@hush.com>
7  *   Copyright (C) 2012, 2019 Robby <robby@chatbelgie.be>
8  *   Copyright (C) 2011 jackmcbarn <jackmcbarn@inspircd.org>
9  *   Copyright (C) 2009-2010 Daniel De Graaf <danieldg@inspircd.org>
10  *   Copyright (C) 2009-2010 Craig Edwards <brain@inspircd.org>
11  *   Copyright (C) 2009 Uli Schlachter <psychon@inspircd.org>
12  *   Copyright (C) 2009 Robin Burchell <robin+git@viroteck.net>
13  *   Copyright (C) 2009 Dennis Friis <peavey@inspircd.org>
14  *
15  * This file is part of InspIRCd.  InspIRCd is free software: you can
16  * redistribute it and/or modify it under the terms of the GNU General Public
17  * License as published by the Free Software Foundation, version 2.
18  *
19  * This program is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
26  */
27
28
29 /* Contains a code of Unreal IRCd + Bynets patch (https://www.unrealircd.org and https://bynets.org)
30    Original patch is made by Dmitry "Killer{R}" Kononko. (http://killprog.com)
31    Changed at 2008-06-15 - 2009-02-11
32    by Chernov-Phoenix Alexey (Phoenix@RusNet) mailto:phoenix /email address separator/ pravmail.ru */
33
34 #include "inspircd.h"
35 #include <fstream>
36
37 class lwbNickHandler
38 {
39  public:
40         static bool Call(const std::string&);
41 };
42
43 static unsigned char m_reverse_additional[256],m_additionalMB[256],m_additionalUtf8[256],m_additionalUtf8range[256],m_additionalUtf8interval[256];
44
45 char utf8checkrest(unsigned char * mb, unsigned char cnt)
46 {
47         for (unsigned char * tmp=mb; tmp<mb+cnt; tmp++)
48         {
49                 /* & is faster! -- Phoenix (char & b11000000 == b10000000) */
50                 if ((*tmp & 192) != 128)
51                         return -1;
52         }
53         return cnt + 1;
54 }
55
56
57 char utf8size(unsigned char * mb)
58 {
59         if (!*mb)
60                 return -1;
61         if (!(*mb & 128))
62                 return 1;
63         if ((*mb & 224) == 192)
64                 return utf8checkrest(mb + 1,1);
65         if ((*mb & 240) == 224)
66                 return utf8checkrest(mb + 1,2);
67         if ((*mb & 248) == 240)
68                 return utf8checkrest(mb + 1,3);
69         return -1;
70 }
71
72
73 /* Conditions added */
74 bool lwbNickHandler::Call(const std::string& nick)
75 {
76         if (nick.empty() || isdigit(nick[0]))
77                 return false;
78
79         const char* n = nick.c_str();
80         unsigned int p = 0;
81         for (const char* i = n; *i; i++, p++)
82         {
83                 /* 1. Multibyte encodings support:  */
84                 /* 1.1. 16bit char. areas, e.g. chinese:*/
85
86                 /* if current character is the last, we DO NOT check it against multibyte table */
87                 /* if there are mbtable ranges, use ONLY them. No 8bit at all */
88                 if (i[1] && m_additionalMB[0])
89                 {
90                         /* otherwise let's take a look at the current character and the following one */
91                         bool found = false;
92                         for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
93                         {
94                                 if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
95                                 {
96                                         /* multibyte range character found */
97                                         i++;
98                                         p++;
99                                         found = true;
100                                         break;
101                                 }
102                         }
103                         if (found)
104                                 /* next char! */
105                                 continue;
106                         else
107                                 /* there are ranges, but incorrect char (8bit?) given, sorry */
108                                 return false;
109                 }
110
111                 /* 2. 8bit character support */
112                 if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
113                         /* "A"-"}" can occur anywhere in a nickname */
114                         continue;
115
116                 if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
117                         /* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
118                         continue;
119
120                 /* 3.1. Check against a simple UTF-8 characters enumeration */
121                 int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
122                 /* do check only if current multibyte character is valid UTF-8 only */
123                 if (ncursize != -1)
124                 {
125                         bool found = false;
126                         for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
127                         {
128                                 cursize = utf8size(mb);
129                                 /* Size differs? Pick the next! */
130                                 if (cursize != ncursize)
131                                         continue;
132
133                                 if (!strncmp(i, (char *)mb, cursize))
134                                 {
135                                         i += cursize - 1;
136                                         p += cursize - 1;
137                                         found = true;
138                                         break;
139                                 }
140                         }
141                         if (found)
142                                 continue;
143
144                         /* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
145                         found = false;
146                         for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
147                         {
148                                 cursize = utf8size(mb);
149                                 /* Size differs (or lengthbyte is zero)? Pick the next! */
150                                 if ((cursize != ncursize) || (!mb[cursize]))
151                                         continue;
152
153                                 unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
154                                 strncpy((char* ) uright, (char *) mb, cursize);
155
156                                 for (int temp = cursize - 1; (temp >= 0) && range; --temp)
157                                 {
158                                         /* all but the first char are 64-based */
159                                         if (temp)
160                                         {
161                                                 char part64 = range & 63; /* i.e. % 64 */
162                                                 /* handle carrying over */
163                                                 if (uright[temp] + part64 - 1 > 191)
164                                                 {
165                                                         uright[temp] -= 64;
166                                                         range += 64;
167                                                 }
168                                                 uright[temp] += part64;
169                                                 range >>= 6; /* divide it on a 64 */
170                                         }
171                                         /* the first char of UTF-8 doesn't follow the rule */
172                                         else
173                                         {
174                                                 uright[temp] += range;
175                                         }
176                                 }
177
178                                 if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
179                                 {
180                                         i += cursize - 1;
181                                         p += cursize - 1;
182                                         found = true;
183                                         break;
184                                 }
185                         }
186                         if (found)
187                                 continue;
188
189                         /* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
190                         found = false;
191                         for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
192                                 && (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
193                         {
194                                 cursize = utf8size(mb);
195                                 cursize2= utf8size(mb+cursize);
196
197                                 int minlen  = cursize  > ncursize ? ncursize : cursize;
198                                 int minlen2 = cursize2 > ncursize ? ncursize : cursize2;
199
200                                 unsigned char* uright = mb + cursize;
201
202                                 if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
203                                 {
204                                         i += cursize - 1;
205                                         p += cursize - 1;
206                                         found = true;
207                                         break;
208                                 }
209                         }
210                         if (found)
211                                 continue;
212                 }
213
214                 /* invalid character! abort */
215                 return false;
216         }
217
218         /* too long? or not -- pointer arithmetic rocks */
219         return (p < ServerInstance->Config->Limits.NickMax);
220 }
221
222
223 class ModuleNationalChars : public Module
224 {
225         std::string charset;
226         unsigned char m_additional[256], m_additionalUp[256], m_lower[256], m_upper[256];
227         TR1NS::function<bool(const std::string&)> rememberer;
228         bool forcequit;
229         const unsigned char * lowermap_rememberer;
230         std::string casemapping_rememberer;
231         unsigned char prev_map[256];
232
233         template <typename T>
234         void RehashHashmap(T& hashmap)
235         {
236                 T newhash(hashmap.bucket_count());
237                 for (typename T::const_iterator i = hashmap.begin(); i != hashmap.end(); ++i)
238                         newhash.insert(std::make_pair(i->first, i->second));
239                 hashmap.swap(newhash);
240         }
241
242         void CheckRehash()
243         {
244                 // See if anything changed
245                 if (!memcmp(prev_map, national_case_insensitive_map, sizeof(prev_map)))
246                         return;
247
248                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
249
250                 RehashHashmap(ServerInstance->Users.clientlist);
251                 RehashHashmap(ServerInstance->Users.uuidlist);
252                 RehashHashmap(ServerInstance->chanlist);
253         }
254
255  public:
256         ModuleNationalChars()
257                 : rememberer(ServerInstance->IsNick)
258                 , lowermap_rememberer(national_case_insensitive_map)
259                 , casemapping_rememberer(ServerInstance->Config->CaseMapping)
260         {
261                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
262         }
263
264         void init() CXX11_OVERRIDE
265         {
266                 memcpy(m_lower, rfc_case_insensitive_map, 256);
267                 national_case_insensitive_map = m_lower;
268
269                 ServerInstance->IsNick = &lwbNickHandler::Call;
270         }
271
272         void ReadConfig(ConfigStatus& status) CXX11_OVERRIDE
273         {
274                 ConfigTag* tag = ServerInstance->Config->ConfValue("nationalchars");
275                 charset = tag->getString("file");
276                 std::string casemapping = tag->getString("casemapping", FileSystem::GetFileName(charset), 1);
277                 if (casemapping.find(' ') != std::string::npos)
278                         throw ModuleException("<nationalchars:casemapping> must not contain any spaces!");
279                 ServerInstance->Config->CaseMapping = casemapping;
280 #if defined _WIN32
281                 if (!FileSystem::StartsWithWindowsDriveLetter(charset))
282                         charset.insert(0, "./locales/");
283 #else
284                 if(charset[0] != '/')
285                         charset.insert(0, "../locales/");
286 #endif
287                 unsigned char * tables[8] = { m_additional, m_additionalMB, m_additionalUp, m_lower, m_upper, m_additionalUtf8, m_additionalUtf8range, m_additionalUtf8interval };
288                 if (!loadtables(charset, tables, 8, 5))
289                         throw ModuleException("The locale file failed to load. Check your log file for more information.");
290                 forcequit = tag->getBool("forcequit");
291                 CheckForceQuit("National character set changed");
292                 CheckRehash();
293         }
294
295         void CheckForceQuit(const char * message)
296         {
297                 if (!forcequit)
298                         return;
299
300                 const UserManager::LocalList& list = ServerInstance->Users.GetLocalUsers();
301                 for (UserManager::LocalList::const_iterator iter = list.begin(); iter != list.end(); )
302                 {
303                         /* Fix by Brain: Dont quit UID users */
304                         // Quitting the user removes it from the list
305                         User* n = *iter;
306                         ++iter;
307                         if (!isdigit(n->nick[0]) && !ServerInstance->IsNick(n->nick))
308                                 ServerInstance->Users->QuitUser(n, message);
309                 }
310         }
311
312         ~ModuleNationalChars()
313         {
314                 ServerInstance->IsNick = rememberer;
315                 national_case_insensitive_map = lowermap_rememberer;
316                 ServerInstance->Config->CaseMapping = casemapping_rememberer;
317                 // The core rebuilds ISupport on module unload, but before the dtor.
318                 ServerInstance->ISupport.Build();
319                 CheckForceQuit("National characters module unloaded");
320                 CheckRehash();
321         }
322
323         Version GetVersion() CXX11_OVERRIDE
324         {
325                 return Version("Allows the server administrator to define what characters are allowed in nicknames and channel names and how those characters should be compared in a case insensitive way.", VF_VENDOR | VF_COMMON);
326         }
327
328         /*make an array to check against it 8bit characters a bit faster. Whether allowed or uppercase (for your needs).*/
329         void makereverse(unsigned char * from, unsigned  char * to, unsigned int cnt)
330         {
331                 memset(to, 0, cnt);
332                 for(unsigned char * n=from; (*n) && ((*n)<cnt) && (n<from+cnt); n++)
333                         to[*n] = 1;
334         }
335
336         /*so Bynets Unreal distribution stuff*/
337         bool loadtables(std::string filename, unsigned char ** tables, unsigned char cnt, char faillimit)
338         {
339                 std::ifstream ifs(ServerInstance->Config->Paths.PrependConfig(filename).c_str());
340                 if (ifs.fail())
341                 {
342                         ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for missing file: %s", filename.c_str());
343                         return false;
344                 }
345
346                 for (unsigned char n=0; n< cnt; n++)
347                 {
348                         memset(tables[n], 0, 256);
349                 }
350
351                 memcpy(m_lower, rfc_case_insensitive_map, 256);
352
353                 for (unsigned char n = 0; n < cnt; n++)
354                 {
355                         if (loadtable(ifs, tables[n], 255) && (n < faillimit))
356                         {
357                                 ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for illegal file: %s (line %d)", filename.c_str(), n+1);
358                                 return false;
359                         }
360                 }
361
362                 makereverse(m_additional, m_reverse_additional, sizeof(m_additional));
363                 return true;
364         }
365
366         unsigned char symtoi(const char *t,unsigned char base)
367         /* base = 16 for hexadecimal, 10 for decimal, 8 for octal ;) */
368         {
369                 unsigned char tmp = 0, current;
370                 while ((*t) && (*t !=' ') && (*t != 13) && (*t != 10) && (*t != ','))
371                 {
372                         tmp *= base;
373                         current = ascii_case_insensitive_map[(unsigned char)*t];
374                         if (current >= 'a')
375                                 current = current - 'a' + 10;
376                         else
377                                 current = current - '0';
378                         tmp+=current;
379                         t++;
380                 }
381                 return tmp;
382         }
383
384         int loadtable(std::ifstream &ifs , unsigned char *chartable, unsigned int maxindex)
385         {
386                 std::string buf;
387                 getline(ifs, buf);
388
389                 unsigned int i = 0;
390                 int fail = 0;
391
392                 buf.erase(buf.find_last_not_of("\n") + 1);
393
394                 if (buf[0] == '.')      /* simple plain-text string after dot */
395                 {
396                         i = buf.size() - 1;
397
398                         if (i > (maxindex + 1))
399                                 i = maxindex + 1;
400
401                         memcpy(chartable, buf.c_str() + 1, i);
402                 }
403                 else
404                 {
405                         const char * p = buf.c_str();
406                         while (*p)
407                         {
408                                 if (i > maxindex)
409                                 {
410                                         fail = 1;
411                                         break;
412                                 }
413
414                                 if (*p != '\'')         /* decimal or hexadecimal char code */
415                                 {
416                                         if (*p == '0')
417                                         {
418                                                 if (p[1] == 'x')
419                                                         /* hex with the leading "0x" */
420                                                         chartable[i] = symtoi(p + 2, 16);
421                                                 else
422                                                         chartable[i] = symtoi(p + 1, 8);
423                                         }
424                                         /* hex form */
425                                         else if (*p == 'x')
426                                         {
427                                                 chartable[i] = symtoi(p + 1, 16);
428                                         }
429                                         else /* decimal form */
430                                         {
431                                                 chartable[i] = symtoi(p, 10);
432                                         }
433                                 }
434                                 else /* plain-text char between '' */
435                                 {
436                                         if (*(p + 1) == '\\')
437                                         {
438                                                 chartable[i] = *(p + 2);
439                                                 p += 3;
440                                         }else
441                                         {
442                                                 chartable[i] = *(p + 1);
443                                                 p += 2;
444                                         }
445                                 }
446                                 while (*p && (*p != ',') && (*p != ' ') && (*p != 13) && (*p != 10))
447                                         p++;
448                                 while (*p && ((*p == ',') || (*p == ' ') || (*p == 13) || (*p == 10)))
449                                         p++;
450                                 i++;
451                         }
452                 }
453                 return fail;
454         }
455 };
456
457 MODULE_INIT(ModuleNationalChars)