]> git.netwichtig.de Git - user/henk/code/inspircd.git/blob - src/modules/m_nationalchars.cpp
Automatically attach modules to events
[user/henk/code/inspircd.git] / src / modules / m_nationalchars.cpp
1 /*
2  * InspIRCd -- Internet Relay Chat Daemon
3  *
4  *   Copyright (C) 2009 Daniel De Graaf <danieldg@inspircd.org>
5  *   Copyright (C) 2009 Dennis Friis <peavey@inspircd.org>
6  *   Copyright (C) 2009 Craig Edwards <craigedwards@brainbox.cc>
7  *   Copyright (C) 2009 Robin Burchell <robin+git@viroteck.net>
8  *
9  * This file is part of InspIRCd.  InspIRCd is free software: you can
10  * redistribute it and/or modify it under the terms of the GNU General Public
11  * License as published by the Free Software Foundation, version 2.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21
22
23 /* Contains a code of Unreal IRCd + Bynets patch ( http://www.unrealircd.com/ and http://www.bynets.org/ )
24    Original patch is made by Dmitry "Killer{R}" Kononko. ( http://killprog.com/ )
25    Changed at 2008-06-15 - 2009-02-11
26    by Chernov-Phoenix Alexey (Phoenix@RusNet) mailto:phoenix /email address separator/ pravmail.ru */
27
28 #include "inspircd.h"
29 #include "caller.h"
30 #include <fstream>
31
32 class lwbNickHandler : public HandlerBase1<bool, const std::string&>
33 {
34  public:
35         lwbNickHandler() { }
36         ~lwbNickHandler() { }
37         bool Call(const std::string&);
38 };
39
40                                                                  /*,m_reverse_additionalUp[256];*/
41 static unsigned char m_reverse_additional[256],m_additionalMB[256],m_additionalUtf8[256],m_additionalUtf8range[256],m_additionalUtf8interval[256];
42
43 char utf8checkrest(unsigned char * mb, unsigned char cnt)
44 {
45         for (unsigned char * tmp=mb; tmp<mb+cnt; tmp++)
46         {
47                 /* & is faster! -- Phoenix (char & b11000000 == b10000000) */
48                 if ((*tmp & 192) != 128)
49                         return -1;
50         }
51         return cnt + 1;
52 }
53
54
55 char utf8size(unsigned char * mb)
56 {
57         if (!*mb)
58                 return -1;
59         if (!(*mb & 128))
60                 return 1;
61         if ((*mb & 224) == 192)
62                 return utf8checkrest(mb + 1,1);
63         if ((*mb & 240) == 224)
64                 return utf8checkrest(mb + 1,2);
65         if ((*mb & 248) == 240)
66                 return utf8checkrest(mb + 1,3);
67         return -1;
68 }
69
70
71 /* Conditions added */
72 bool lwbNickHandler::Call(const std::string& nick)
73 {
74         if (nick.empty())
75                 return false;
76
77         const char* n = nick.c_str();
78         unsigned int p = 0;
79         for (const char* i = n; *i; i++, p++)
80         {
81                 /* 1. Multibyte encodings support:  */
82                 /* 1.1. 16bit char. areas, e.g. chinese:*/
83
84                 /* if current character is the last, we DO NOT check it against multibyte table */
85                 /* if there are mbtable ranges, use ONLY them. No 8bit at all */
86                 if (i[1] && m_additionalMB[0])
87                 {
88                         /* otherwise let's take a look at the current character and the following one */
89                         bool found = false;
90                         for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
91                         {
92                                 if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
93                                 {
94                                         /* multibyte range character found */
95                                         i++;
96                                         p++;
97                                         found = true;
98                                         break;
99                                 }
100                         }
101                         if (found)
102                                 /* next char! */
103                                 continue;
104                         else
105                                 /* there are ranges, but incorrect char (8bit?) given, sorry */
106                                 return false;
107                 }
108
109                 /* 2. 8bit character support */
110                 if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
111                         /* "A"-"}" can occur anywhere in a nickname */
112                         continue;
113
114                 if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
115                         /* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
116                         continue;
117
118                 /* 3.1. Check against a simple UTF-8 characters enumeration */
119                 int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
120                 /* do check only if current multibyte character is valid UTF-8 only */
121                 if (ncursize != -1)
122                 {
123                         bool found = false;
124                         for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
125                         {
126                                 cursize = utf8size(mb);
127                                 /* Size differs? Pick the next! */
128                                 if (cursize != ncursize)
129                                         continue;
130
131                                 if (!strncmp(i, (char *)mb, cursize))
132                                 {
133                                         i += cursize - 1;
134                                         p += cursize - 1;
135                                         found = true;
136                                         break;
137                                 }
138                         }
139                         if (found)
140                                 continue;
141
142                         /* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
143                         found = false;
144                         for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
145                         {
146                                 cursize = utf8size(mb);
147                                 /* Size differs (or lengthbyte is zero)? Pick the next! */
148                                 if ((cursize != ncursize) || (!mb[cursize]))
149                                         continue;
150
151                                 unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
152                                 strncpy((char* ) uright, (char *) mb, cursize);
153
154                                 for (int temp = cursize - 1; (temp >= 0) && range; --temp)
155                                 {
156                                         /* all but the first char are 64-based */
157                                         if (temp)
158                                         {
159                                                 char part64 = range & 63; /* i.e. % 64 */
160                                                 /* handle carrying over */
161                                                 if (uright[temp] + part64 - 1 > 191)
162                                                 {
163                                                         uright[temp] -= 64;
164                                                         range += 64;
165                                                 }
166                                                 uright[temp] += part64;
167                                                 range >>= 6; /* divide it on a 64 */
168                                         }
169                                         /* the first char of UTF-8 doesn't follow the rule */
170                                         else
171                                         {
172                                                 uright[temp] += range;
173                                         }
174                                 }
175
176                                 if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
177                                 {
178                                         i += cursize - 1;
179                                         p += cursize - 1;
180                                         found = true;
181                                         break;
182                                 }
183                         }
184                         if (found)
185                                 continue;
186
187                         /* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
188                         found = false;
189                         for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
190                                 && (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
191                         {
192                                 cursize = utf8size(mb);
193                                 cursize2= utf8size(mb+cursize);
194
195                                 int minlen  = cursize  > ncursize ? ncursize : cursize;
196                                 int minlen2 = cursize2 > ncursize ? ncursize : cursize2;
197
198                                 unsigned char* uright = mb + cursize;
199
200                                 if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
201                                 {
202                                         i += cursize - 1;
203                                         p += cursize - 1;
204                                         found = true;
205                                         break;
206                                 }
207                         }
208                         if (found)
209                                 continue;
210                 }
211
212                 /* invalid character! abort */
213                 return false;
214         }
215
216         /* too long? or not -- pointer arithmetic rocks */
217         return (p < ServerInstance->Config->Limits.NickMax);
218 }
219
220
221 class ModuleNationalChars : public Module
222 {
223         lwbNickHandler myhandler;
224         std::string charset, casemapping;
225         unsigned char m_additional[256], m_additionalUp[256], m_lower[256], m_upper[256];
226         caller1<bool, const std::string&> rememberer;
227         bool forcequit;
228         const unsigned char * lowermap_rememberer;
229
230  public:
231         ModuleNationalChars()
232                 : rememberer(ServerInstance->IsNick), lowermap_rememberer(national_case_insensitive_map)
233         {
234         }
235
236         void init() CXX11_OVERRIDE
237         {
238                 memcpy(m_lower, rfc_case_insensitive_map, 256);
239                 national_case_insensitive_map = m_lower;
240
241                 ServerInstance->IsNick = &myhandler;
242
243                 OnRehash(NULL);
244         }
245
246         void On005Numeric(std::map<std::string, std::string>& tokens) CXX11_OVERRIDE
247         {
248                 tokens["CASEMAPPING"] = casemapping;
249         }
250
251         void OnRehash(User* user) CXX11_OVERRIDE
252         {
253                 ConfigTag* tag = ServerInstance->Config->ConfValue("nationalchars");
254                 charset = tag->getString("file");
255                 casemapping = tag->getString("casemapping", charset);
256                 if(charset[0] != '/')
257                         charset.insert(0, "../locales/");
258                 unsigned char * tables[8] = { m_additional, m_additionalMB, m_additionalUp, m_lower, m_upper, m_additionalUtf8, m_additionalUtf8range, m_additionalUtf8interval };
259                 loadtables(charset, tables, 8, 5);
260                 forcequit = tag->getBool("forcequit");
261                 CheckForceQuit("National character set changed");
262         }
263
264         void CheckForceQuit(const char * message)
265         {
266                 if (!forcequit)
267                         return;
268
269                 for (LocalUserList::const_iterator iter = ServerInstance->Users->local_users.begin(); iter != ServerInstance->Users->local_users.end(); ++iter)
270                 {
271                         /* Fix by Brain: Dont quit UID users */
272                         User* n = *iter;
273                         if (!isdigit(n->nick[0]) && !ServerInstance->IsNick(n->nick))
274                                 ServerInstance->Users->QuitUser(n, message);
275                 }
276         }
277
278         ~ModuleNationalChars()
279         {
280                 ServerInstance->IsNick = rememberer;
281                 national_case_insensitive_map = lowermap_rememberer;
282                 CheckForceQuit("National characters module unloaded");
283         }
284
285         Version GetVersion() CXX11_OVERRIDE
286         {
287                 return Version("Provides an ability to have non-RFC1459 nicks & support for national CASEMAPPING", VF_VENDOR | VF_COMMON, charset);
288         }
289
290         /*make an array to check against it 8bit characters a bit faster. Whether allowed or uppercase (for your needs).*/
291         void makereverse(unsigned char * from, unsigned  char * to, unsigned int cnt)
292         {
293                 memset(to, 0, cnt);
294                 for(unsigned char * n=from; (*n) && ((*n)<cnt) && (n<from+cnt); n++)
295                         to[*n] = 1;
296         }
297
298         /*so Bynets Unreal distribution stuff*/
299         void loadtables(std::string filename, unsigned char ** tables, unsigned char cnt, char faillimit)
300         {
301                 std::ifstream ifs(filename.c_str());
302                 if (ifs.fail())
303                 {
304                         ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for missing file: %s", filename.c_str());
305                         return;
306                 }
307
308                 for (unsigned char n=0; n< cnt; n++)
309                 {
310                         memset(tables[n], 0, 256);
311                 }
312
313                 memcpy(m_lower, rfc_case_insensitive_map, 256);
314
315                 for (unsigned char n = 0; n < cnt; n++)
316                 {
317                         if (loadtable(ifs, tables[n], 255) && (n < faillimit))
318                         {
319                                 ServerInstance->Logs->Log(MODNAME, LOG_DEFAULT, "loadtables() called for illegal file: %s (line %d)", filename.c_str(), n+1);
320                                 return;
321                         }
322                 }
323
324                 makereverse(m_additional, m_reverse_additional, sizeof(m_additional));
325         }
326
327         unsigned char symtoi(const char *t,unsigned char base)
328         /* base = 16 for hexadecimal, 10 for decimal, 8 for octal ;) */
329         {
330                 unsigned char tmp = 0, current;
331                 while ((*t) && (*t !=' ') && (*t != 13) && (*t != 10) && (*t != ','))
332                 {
333                         tmp *= base;
334                         current = ascii_case_insensitive_map[(unsigned char)*t];
335                         if (current >= 'a')
336                                 current = current - 'a' + 10;
337                         else
338                                 current = current - '0';
339                         tmp+=current;
340                         t++;
341                 }
342                 return tmp;
343         }
344
345         int loadtable(std::ifstream &ifs , unsigned char *chartable, unsigned int maxindex)
346         {
347                 std::string buf;
348                 getline(ifs, buf);
349
350                 unsigned int i = 0;
351                 int fail = 0;
352
353                 buf.erase(buf.find_last_not_of("\n") + 1);
354
355                 if (buf[0] == '.')      /* simple plain-text string after dot */
356                 {
357                         i = buf.size() - 1;
358
359                         if (i > (maxindex + 1))
360                                 i = maxindex + 1;
361
362                         memcpy(chartable, buf.c_str() + 1, i);
363                 }
364                 else
365                 {
366                         const char * p = buf.c_str();
367                         while (*p)
368                         {
369                                 if (i > maxindex)
370                                 {
371                                         fail = 1;
372                                         break;
373                                 }
374
375                                 if (*p != '\'')         /* decimal or hexadecimal char code */
376                                 {
377                                         if (*p == '0')
378                                         {
379                                                 if (p[1] == 'x')
380                                                          /* hex with the leading "0x" */
381                                                         chartable[i] = symtoi(p + 2, 16);
382                                                 else
383                                                         chartable[i] = symtoi(p + 1, 8);
384                                         }
385                                         /* hex form */
386                                         else if (*p == 'x')
387                                         {
388                                                 chartable[i] = symtoi(p + 1, 16);
389                                         }else    /* decimal form */
390                                         {
391                                                 chartable[i] = symtoi(p, 10);
392                                         }
393                                 }
394                                 else             /* plain-text char between '' */
395                                 {
396                                         if (*(p + 1) == '\\')
397                                         {
398                                                 chartable[i] = *(p + 2);
399                                                 p += 3;
400                                         }else
401                                         {
402                                                 chartable[i] = *(p + 1);
403                                                 p += 2;
404                                         }
405                                 }
406                                 while (*p && (*p != ',') && (*p != ' ') && (*p != 13) && (*p != 10))
407                                         p++;
408                                 while (*p && ((*p == ',') || (*p == ' ') || (*p == 13) || (*p == 10)))
409                                         p++;
410                                 i++;
411                         }
412                 }
413                 return fail;
414         }
415 };
416
417 MODULE_INIT(ModuleNationalChars)