]> git.netwichtig.de Git - user/henk/code/inspircd.git/blob - src/modules/m_nationalchars.cpp
Fix various issues with the nationalchars module.
[user/henk/code/inspircd.git] / src / modules / m_nationalchars.cpp
1 /*
2  * InspIRCd -- Internet Relay Chat Daemon
3  *
4  *   Copyright (C) 2009 Daniel De Graaf <danieldg@inspircd.org>
5  *   Copyright (C) 2009 Dennis Friis <peavey@inspircd.org>
6  *   Copyright (C) 2009 Craig Edwards <craigedwards@brainbox.cc>
7  *   Copyright (C) 2009 Robin Burchell <robin+git@viroteck.net>
8  *
9  * This file is part of InspIRCd.  InspIRCd is free software: you can
10  * redistribute it and/or modify it under the terms of the GNU General Public
11  * License as published by the Free Software Foundation, version 2.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21
22
23 /* Contains a code of Unreal IRCd + Bynets patch ( http://www.unrealircd.com/ and http://www.bynets.org/ )
24    Original patch is made by Dmitry "Killer{R}" Kononko. ( http://killprog.com/ )
25    Changed at 2008-06-15 - 2009-02-11
26    by Chernov-Phoenix Alexey (Phoenix@RusNet) mailto:phoenix /email address separator/ pravmail.ru */
27
28 #include "inspircd.h"
29 #include "caller.h"
30 #include <fstream>
31
32 /* $ModDesc: Provides an ability to have non-RFC1459 nicks & support for national CASEMAPPING */
33
34 class lwbNickHandler : public HandlerBase2<bool, const char*, size_t>
35 {
36  public:
37         lwbNickHandler() { }
38         virtual ~lwbNickHandler() { }
39         virtual bool Call(const char*, size_t);
40 };
41
42                                                                  /*,m_reverse_additionalUp[256];*/
43 static unsigned char m_reverse_additional[256],m_additionalMB[256],m_additionalUtf8[256],m_additionalUtf8range[256],m_additionalUtf8interval[256];
44
45 char utf8checkrest(unsigned char * mb, unsigned char cnt)
46 {
47         for (unsigned char * tmp=mb; tmp<mb+cnt; tmp++)
48         {
49                 /* & is faster! -- Phoenix (char & b11000000 == b10000000) */
50                 if ((*tmp & 192) != 128)
51                         return -1;
52         }
53         return cnt + 1;
54 }
55
56
57 char utf8size(unsigned char * mb)
58 {
59         if (!*mb)
60                 return -1;
61         if (!(*mb & 128))
62                 return 1;
63         if ((*mb & 224) == 192)
64                 return utf8checkrest(mb + 1,1);
65         if ((*mb & 240) == 224)
66                 return utf8checkrest(mb + 1,2);
67         if ((*mb & 248) == 240)
68                 return utf8checkrest(mb + 1,3);
69         return -1;
70 }
71
72
73 /* Conditions added */
74 bool lwbNickHandler::Call(const char* n, size_t max)
75 {
76         if (!n || !*n)
77                 return false;
78
79         unsigned int p = 0;
80         for (const char* i = n; *i; i++, p++)
81         {
82                 /* 1. Multibyte encodings support:  */
83                 /* 1.1. 16bit char. areas, e.g. chinese:*/
84
85                 /* if current character is the last, we DO NOT check it against multibyte table */
86                 /* if there are mbtable ranges, use ONLY them. No 8bit at all */
87                 if (i[1] && m_additionalMB[0])
88                 {
89                         /* otherwise let's take a look at the current character and the following one */
90                         bool found = false;
91                         for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
92                         {
93                                 if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
94                                 {
95                                         /* multibyte range character found */
96                                         i++;
97                                         p++;
98                                         found = true;
99                                         break;
100                                 }
101                         }
102                         if (found)
103                                 /* next char! */
104                                 continue;
105                         else
106                                 /* there are ranges, but incorrect char (8bit?) given, sorry */
107                                 return false;
108                 }
109
110                 /* 2. 8bit character support */
111                 if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
112                         /* "A"-"}" can occur anywhere in a nickname */
113                         continue;
114
115                 if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
116                         /* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
117                         continue;
118
119                 /* 3.1. Check against a simple UTF-8 characters enumeration */
120                 int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
121                 /* do check only if current multibyte character is valid UTF-8 only */
122                 if (ncursize != -1)
123                 {
124                         bool found = false;
125                         for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
126                         {
127                                 cursize = utf8size(mb);
128                                 /* Size differs? Pick the next! */
129                                 if (cursize != ncursize)
130                                         continue;
131
132                                 if (!strncmp(i, (char *)mb, cursize))
133                                 {
134                                         i += cursize - 1;
135                                         p += cursize - 1;
136                                         found = true;
137                                         break;
138                                 }
139                         }
140                         if (found)
141                                 continue;
142
143                         /* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
144                         found = false;
145                         for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
146                         {
147                                 cursize = utf8size(mb);
148                                 /* Size differs (or lengthbyte is zero)? Pick the next! */
149                                 if ((cursize != ncursize) || (!mb[cursize]))
150                                         continue;
151
152                                 unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
153                                 strncpy((char* ) uright, (char *) mb, cursize);
154
155                                 for (int temp = cursize - 1; (temp >= 0) && range; --temp)
156                                 {
157                                         /* all but the first char are 64-based */
158                                         if (temp)
159                                         {
160                                                 char part64 = range & 63; /* i.e. % 64 */
161                                                 /* handle carrying over */
162                                                 if (uright[temp] + part64 - 1 > 191)
163                                                 {
164                                                         uright[temp] -= 64;
165                                                         range += 64;
166                                                 }
167                                                 uright[temp] += part64;
168                                                 range >>= 6; /* divide it on a 64 */
169                                         }
170                                         /* the first char of UTF-8 doesn't follow the rule */
171                                         else
172                                         {
173                                                 uright[temp] += range;
174                                         }
175                                 }
176
177                                 if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
178                                 {
179                                         i += cursize - 1;
180                                         p += cursize - 1;
181                                         found = true;
182                                         break;
183                                 }
184                         }
185                         if (found)
186                                 continue;
187
188                         /* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
189                         found = false;
190                         for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
191                                 && (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
192                         {
193                                 cursize = utf8size(mb);
194                                 cursize2= utf8size(mb+cursize);
195
196                                 int minlen  = cursize  > ncursize ? ncursize : cursize;
197                                 int minlen2 = cursize2 > ncursize ? ncursize : cursize2;
198
199                                 unsigned char* uright = mb + cursize;
200
201                                 if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
202                                 {
203                                         i += cursize - 1;
204                                         p += cursize - 1;
205                                         found = true;
206                                         break;
207                                 }
208                         }
209                         if (found)
210                                 continue;
211                 }
212
213                 /* invalid character! abort */
214                 return false;
215         }
216
217         /* too long? or not -- pointer arithmetic rocks */
218         return (p < max);
219 }
220
221
222 class ModuleNationalChars : public Module
223 {
224  private:
225         lwbNickHandler myhandler;
226         std::string charset, casemapping;
227         unsigned char m_additional[256], m_additionalUp[256], m_lower[256], m_upper[256];
228         caller2<bool, const char*, size_t> rememberer;
229         bool forcequit;
230         const unsigned char * lowermap_rememberer;
231         unsigned char prev_map[256];
232
233         void CheckRehash()
234         {
235                 // See if anything changed
236                 if (!memcmp(prev_map, national_case_insensitive_map, sizeof(prev_map)))
237                         return;
238
239                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
240
241                 ServerInstance->RehashUsersAndChans();
242
243                 // The OnGarbageCollect() method in m_watch rebuilds the hashmap used by it
244                 Module* mod = ServerInstance->Modules->Find("m_watch.so");
245                 if (mod)
246                         mod->OnGarbageCollect();
247
248                 // Send a Request to m_spanningtree asking it to rebuild its hashmaps
249                 mod = ServerInstance->Modules->Find("m_spanningtree.so");
250                 if (mod)
251                 {
252                         Request req(this, mod, "rehash");
253                         req.Send();
254                 }
255         }
256
257  public:
258         ModuleNationalChars()
259                 : rememberer(ServerInstance->IsNick), lowermap_rememberer(national_case_insensitive_map)
260         {
261                 memcpy(prev_map, national_case_insensitive_map, sizeof(prev_map));
262         }
263
264         void init()
265         {
266                 memcpy(m_lower, rfc_case_insensitive_map, 256);
267                 national_case_insensitive_map = m_lower;
268
269                 ServerInstance->IsNick = &myhandler;
270
271                 Implementation eventlist[] = { I_OnRehash, I_On005Numeric };
272                 ServerInstance->Modules->Attach(eventlist, this, sizeof(eventlist)/sizeof(Implementation));
273                 OnRehash(NULL);
274         }
275
276         virtual void On005Numeric(std::string &output)
277         {
278                 std::string tmp(casemapping);
279                 tmp.insert(0, "CASEMAPPING=");
280                 SearchAndReplace(output, std::string("CASEMAPPING=rfc1459"), tmp);
281         }
282
283         virtual void OnRehash(User* user)
284         {
285                 ConfigTag* tag = ServerInstance->Config->ConfValue("nationalchars");
286                 charset = tag->getString("file");
287                 casemapping = tag->getString("casemapping", ServerConfig::CleanFilename(charset.c_str()));
288                 if (casemapping.find(' ') != std::string::npos)
289                         throw ModuleException("<nationalchars:casemapping> must not contain any spaces!");
290 #if defined _WIN32
291                 if (!ServerInstance->Config->StartsWithWindowsDriveLetter(charset))
292                         charset.insert(0, "./locales/");
293 #else
294                 if(charset[0] != '/')
295                         charset.insert(0, "../locales/");
296 #endif
297                 unsigned char * tables[8] = { m_additional, m_additionalMB, m_additionalUp, m_lower, m_upper, m_additionalUtf8, m_additionalUtf8range, m_additionalUtf8interval };
298                 if (!loadtables(charset, tables, 8, 5))
299                         throw ModuleException("The locale file failed to load. Check your log file for more information.");
300                 forcequit = tag->getBool("forcequit");
301                 CheckForceQuit("National character set changed");
302                 CheckRehash();
303         }
304
305         void CheckForceQuit(const char * message)
306         {
307                 if (!forcequit)
308                         return;
309
310                 for (LocalUserList::const_iterator iter = ServerInstance->Users->local_users.begin(); iter != ServerInstance->Users->local_users.end(); ++iter)
311                 {
312                         /* Fix by Brain: Dont quit UID users */
313                         User* n = *iter;
314                         if (!isdigit(n->nick[0]) && !ServerInstance->IsNick(n->nick.c_str(), ServerInstance->Config->Limits.NickMax))
315                                 ServerInstance->Users->QuitUser(n, message);
316                 }
317         }
318
319         virtual ~ModuleNationalChars()
320         {
321                 ServerInstance->IsNick = rememberer;
322                 national_case_insensitive_map = lowermap_rememberer;
323                 CheckForceQuit("National characters module unloaded");
324                 CheckRehash();
325         }
326
327         virtual Version GetVersion()
328         {
329                 return Version("Provides an ability to have non-RFC1459 nicks & support for national CASEMAPPING", VF_VENDOR | VF_COMMON, charset);
330         }
331
332         /*make an array to check against it 8bit characters a bit faster. Whether allowed or uppercase (for your needs).*/
333         void makereverse(unsigned char * from, unsigned  char * to, unsigned int cnt)
334         {
335                 memset(to, 0, cnt);
336                 for(unsigned char * n=from; (*n) && ((*n)<cnt) && (n<from+cnt); n++)
337                         to[*n] = 1;
338         }
339
340         /*so Bynets Unreal distribution stuff*/
341         bool loadtables(std::string filename, unsigned char ** tables, unsigned char cnt, char faillimit)
342         {
343                 std::ifstream ifs(filename.c_str());
344                 if (ifs.fail())
345                 {
346                         ServerInstance->Logs->Log("m_nationalchars",DEFAULT,"loadtables() called for missing file: %s", filename.c_str());
347                         return false;
348                 }
349
350                 for (unsigned char n=0; n< cnt; n++)
351                 {
352                         memset(tables[n], 0, 256);
353                 }
354
355                 memcpy(m_lower, rfc_case_insensitive_map, 256);
356
357                 for (unsigned char n = 0; n < cnt; n++)
358                 {
359                         if (loadtable(ifs, tables[n], 255) && (n < faillimit))
360                         {
361                                 ServerInstance->Logs->Log("m_nationalchars",DEFAULT,"loadtables() called for illegal file: %s (line %d)", filename.c_str(), n+1);
362                                 return false;
363                         }
364                 }
365
366                 makereverse(m_additional, m_reverse_additional, sizeof(m_additional));
367                 return true;
368         }
369
370         unsigned char symtoi(const char *t,unsigned char base)
371         /* base = 16 for hexadecimal, 10 for decimal, 8 for octal ;) */
372         {
373                 unsigned char tmp = 0, current;
374                 while ((*t) && (*t !=' ') && (*t != 13) && (*t != 10) && (*t != ','))
375                 {
376                         tmp *= base;
377                         current = ascii_case_insensitive_map[(unsigned char)*t];
378                         if (current >= 'a')
379                                 current = current - 'a' + 10;
380                         else
381                                 current = current - '0';
382                         tmp+=current;
383                         t++;
384                 }
385                 return tmp;
386         }
387
388         int loadtable(std::ifstream &ifs , unsigned char *chartable, unsigned int maxindex)
389         {
390                 std::string buf;
391                 getline(ifs, buf);
392
393                 unsigned int i = 0;
394                 int fail = 0;
395
396                 buf.erase(buf.find_last_not_of("\n") + 1);
397
398                 if (buf[0] == '.')      /* simple plain-text string after dot */
399                 {
400                         i = buf.size() - 1;
401
402                         if (i > (maxindex + 1))
403                                 i = maxindex + 1;
404
405                         memcpy(chartable, buf.c_str() + 1, i);
406                 }
407                 else
408                 {
409                         const char * p = buf.c_str();
410                         while (*p)
411                         {
412                                 if (i > maxindex)
413                                 {
414                                         fail = 1;
415                                         break;
416                                 }
417
418                                 if (*p != '\'')         /* decimal or hexadecimal char code */
419                                 {
420                                         if (*p == '0')
421                                         {
422                                                 if (p[1] == 'x')
423                                                          /* hex with the leading "0x" */
424                                                         chartable[i] = symtoi(p + 2, 16);
425                                                 else
426                                                         chartable[i] = symtoi(p + 1, 8);
427                                         }
428                                         /* hex form */
429                                         else if (*p == 'x')
430                                         {
431                                                 chartable[i] = symtoi(p + 1, 16);
432                                         }else    /* decimal form */
433                                         {
434                                                 chartable[i] = symtoi(p, 10);
435                                         }
436                                 }
437                                 else             /* plain-text char between '' */
438                                 {
439                                         if (*(p + 1) == '\\')
440                                         {
441                                                 chartable[i] = *(p + 2);
442                                                 p += 3;
443                                         }else
444                                         {
445                                                 chartable[i] = *(p + 1);
446                                                 p += 2;
447                                         }
448                                 }
449                                 while (*p && (*p != ',') && (*p != ' ') && (*p != 13) && (*p != 10))
450                                         p++;
451                                 while (*p && ((*p == ',') || (*p == ' ') || (*p == 13) || (*p == 10)))
452                                         p++;
453                                 i++;
454                         }
455                 }
456                 return fail;
457         }
458 };
459
460 MODULE_INIT(ModuleNationalChars)