]> git.netwichtig.de Git - user/henk/code/inspircd.git/blob - src/modules/m_nationalchars.cpp
Remove InspIRCd* parameters and fields
[user/henk/code/inspircd.git] / src / modules / m_nationalchars.cpp
1 /*       +------------------------------------+
2  *       | Inspire Internet Relay Chat Daemon |
3  *       +------------------------------------+
4  *
5  *  InspIRCd: (C) 2002-2009 InspIRCd Development Team
6  * See: http://wiki.inspircd.org/Credits
7  *
8  * This program is free but copyrighted software; see
9  *          the file COPYING for details.
10  *
11  * ---------------------------------------------------
12  */
13
14 /* Contains a code of Unreal IRCd + Bynets patch ( http://www.unrealircd.com/ and http://www.bynets.org/ )
15    Original patch is made by Dmitry "Killer{R}" Kononko. ( http://killprog.com/ )
16    Changed at 2008-06-15 - 2009-02-11
17    by Chernov-Phoenix Alexey (Phoenix@RusNet) mailto:phoenix /email address separator/ pravmail.ru */
18
19 #include "inspircd.h"
20 #include "caller.h"
21 #include <fstream>
22
23 /* $ModDesc: Provides an ability to have non-RFC1459 nicks & support for national CASEMAPPING */
24
25 class lwbNickHandler : public HandlerBase2<bool, const char*, size_t>
26 {
27  public:
28         lwbNickHandler() { }
29         virtual ~lwbNickHandler() { }
30         virtual bool Call(const char*, size_t);
31 };
32
33                                                                  /*,m_reverse_additionalUp[256];*/
34 static unsigned char m_reverse_additional[256],m_additionalMB[256],m_additionalUtf8[256],m_additionalUtf8range[256],m_additionalUtf8interval[256];
35
36 char utf8checkrest(unsigned char * mb, unsigned char cnt)
37 {
38         for (unsigned char * tmp=mb; tmp<mb+cnt; tmp++)
39         {
40                 /* & is faster! -- Phoenix (char & b11000000 == b10000000) */
41                 if ((*tmp & 192) != 128)
42                         return -1;
43         }
44         return cnt + 1;
45 }
46
47
48 char utf8size(unsigned char * mb)
49 {
50         if (!*mb)
51                 return -1;
52         if (!(*mb & 128))
53                 return 1;
54         if ((*mb & 224) == 192)
55                 return utf8checkrest(mb + 1,1);
56         if ((*mb & 240) == 224)
57                 return utf8checkrest(mb + 1,2);
58         if ((*mb & 248) == 240)
59                 return utf8checkrest(mb + 1,3);
60         return -1;
61 }
62
63
64 /* Conditions added */
65 bool lwbNickHandler::Call(const char* n, size_t max)
66 {
67         if (!n || !*n)
68                 return false;
69
70         unsigned int p = 0;
71         for (const char* i = n; *i; i++, p++)
72         {
73                 /* 1. Multibyte encodings support:  */
74                 /* 1.1. 16bit char. areas, e.g. chinese:*/
75
76                 /* if current character is the last, we DO NOT check it against multibyte table */
77                 /* if there are mbtable ranges, use ONLY them. No 8bit at all */
78                 if (i[1] && m_additionalMB[0])
79                 {
80                         /* otherwise let's take a look at the current character and the following one */
81                         bool found = false;
82                         for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
83                         {
84                                 if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
85                                 {
86                                         /* multibyte range character found */
87                                         i++;
88                                         p++;
89                                         found = true;
90                                         break;
91                                 }
92                         }
93                         if (found)
94                                 /* next char! */
95                                 continue;
96                         else
97                                 /* there are ranges, but incorrect char (8bit?) given, sorry */
98                                 return false;
99                 }
100
101                 /* 2. 8bit character support */
102                 if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
103                         /* "A"-"}" can occur anywhere in a nickname */
104                         continue;
105
106                 if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
107                         /* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
108                         continue;
109
110                 /* 3.1. Check against a simple UTF-8 characters enumeration */
111                 int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
112                 /* do check only if current multibyte character is valid UTF-8 only */
113                 if (ncursize != -1)
114                 {
115                         bool found = false;
116                         for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
117                         {
118                                 cursize = utf8size(mb);
119                                 /* Size differs? Pick the next! */
120                                 if (cursize != ncursize)
121                                         continue;
122
123                                 if (!strncmp(i, (char *)mb, cursize))
124                                 {
125                                         i += cursize - 1;
126                                         p += cursize - 1;
127                                         found = true;
128                                         break;
129                                 }
130                         }
131                         if (found)
132                                 continue;
133
134                         /* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
135                         found = false;
136                         for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
137                         {
138                                 cursize = utf8size(mb);
139                                 /* Size differs (or lengthbyte is zero)? Pick the next! */
140                                 if ((cursize != ncursize) || (!mb[cursize]))
141                                         continue;
142
143                                 unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
144                                 strncpy((char* ) uright, (char *) mb, cursize);
145
146                                 for (int temp = cursize - 1; (temp >= 0) && range; --temp)
147                                 {
148                                         /* all but the first char are 64-based */
149                                         if (temp)
150                                         {
151                                                 char part64 = range & 63; /* i.e. % 64 */
152                                                 /* handle carrying over */
153                                                 if (uright[temp] + part64 - 1 > 191)
154                                                 {
155                                                         uright[temp] -= 64;
156                                                         range += 64;
157                                                 }
158                                                 uright[temp] += part64;
159                                                 range >>= 6; /* divide it on a 64 */
160                                         }
161                                         /* the first char of UTF-8 doesn't follow the rule */
162                                         else
163                                         {
164                                                 uright[temp] += range;
165                                         }
166                                 }
167
168                                 if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
169                                 {
170                                         i += cursize - 1;
171                                         p += cursize - 1;
172                                         found = true;
173                                         break;
174                                 }
175                         }
176                         if (found)
177                                 continue;
178
179                         /* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
180                         found = false;
181                         for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
182                                 && (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
183                         {
184                                 cursize = utf8size(mb);
185                                 cursize2= utf8size(mb+cursize);
186
187                                 int minlen  = cursize  > ncursize ? ncursize : cursize;
188                                 int minlen2 = cursize2 > ncursize ? ncursize : cursize2;
189
190                                 unsigned char* uright = mb + cursize;
191
192                                 if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
193                                 {
194                                         i += cursize - 1;
195                                         p += cursize - 1;
196                                         found = true;
197                                         break;
198                                 }
199                         }
200                         if (found)
201                                 continue;
202                 }
203
204                 /* invalid character! abort */
205                 return false;
206         }
207
208         /* too long? or not -- pointer arithmetic rocks */
209         return (p < max);
210 }
211
212
213 class ModuleNationalChars : public Module
214 {
215  private:
216         lwbNickHandler myhandler;
217         std::string charset, casemapping;
218         unsigned char m_additional[256], m_additionalUp[256], m_lower[256], m_upper[256];
219         caller2<bool, const char*, size_t> rememberer;
220         bool forcequit;
221         const unsigned char * lowermap_rememberer;
222
223  public:
224         ModuleNationalChars() : rememberer(ServerInstance->IsNick)
225         {
226                 lowermap_rememberer = national_case_insensitive_map;
227                 memcpy(m_lower, rfc_case_insensitive_map, 256);
228                 national_case_insensitive_map = m_lower;
229
230                 ServerInstance->IsNick = &myhandler;
231
232                 Implementation eventlist[] = { I_OnRehash, I_On005Numeric };
233                 ServerInstance->Modules->Attach(eventlist, this, 2);
234                 OnRehash(NULL);
235         }
236
237         virtual void On005Numeric(std::string &output)
238         {
239                 std::string tmp(casemapping);
240                 tmp.insert(0, "CASEMAPPING=");
241                 SearchAndReplace(output, std::string("CASEMAPPING=rfc1459"), tmp);
242         }
243
244         virtual void OnRehash(User* user)
245         {
246                 ConfigReader* conf = new ConfigReader;
247                 charset = conf->ReadValue("nationalchars", "file", 0);
248                 casemapping = conf->ReadValue("nationalchars", "casemapping", charset, 0, false);
249                 charset.insert(0, "../locales/");
250                 unsigned char * tables[8] = { m_additional, m_additionalMB, m_additionalUp, m_lower, m_upper, m_additionalUtf8, m_additionalUtf8range, m_additionalUtf8interval };
251                 loadtables(charset, tables, 8, 5);
252                 forcequit = conf->ReadFlag("nationalchars", "forcequit", 0);
253                 CheckForceQuit("National character set changed");
254                 delete conf;
255         }
256
257         void CheckForceQuit(const char * message)
258         {
259                 if (!forcequit)
260                         return;
261
262                 for (std::vector<User*>::iterator iter = ServerInstance->Users->local_users.begin(); iter != ServerInstance->Users->local_users.end(); ++iter)
263                 {
264                         /* Fix by Brain: Dont quit UID users */
265                         User* n = *iter;
266                         if (!isdigit(n->nick[0]) && !ServerInstance->IsNick(n->nick.c_str(), ServerInstance->Config->Limits.NickMax))
267                                 ServerInstance->Users->QuitUser(n, message);
268                 }
269         }
270
271         virtual ~ModuleNationalChars()
272         {
273                 ServerInstance->IsNick = rememberer;
274                 national_case_insensitive_map = lowermap_rememberer;
275                 CheckForceQuit("National characters module unloaded");
276         }
277
278         virtual Version GetVersion()
279         {
280                 return Version("Provides an ability to have non-RFC1459 nicks & support for national CASEMAPPING",VF_COMMON,API_VERSION);
281         }
282
283         /*make an array to check against it 8bit characters a bit faster. Whether allowed or uppercase (for your needs).*/
284         void makereverse(unsigned char * from, unsigned  char * to, unsigned int cnt)
285         {
286                 memset(to, 0, cnt);
287                 for(unsigned char * n=from; (*n) && ((*n)<cnt) && (n<from+cnt); n++)
288                         to[*n] = 1;
289         }
290
291         /*so Bynets Unreal distribution stuff*/
292         void loadtables(std::string filename, unsigned char ** tables, unsigned char cnt, char faillimit)
293         {
294                 std::ifstream ifs(filename.c_str());
295                 if (ifs.fail())
296                 {
297                         ServerInstance->Logs->Log("m_nationalchars",DEFAULT,"loadtables() called for missing file: %s", filename.c_str());
298                         return;
299                 }
300
301                 for (unsigned char n=0; n< cnt; n++)
302                 {
303                         memset(tables[n], 0, 256);
304                 }
305
306                 memcpy(m_lower, rfc_case_insensitive_map, 256);
307
308                 for (unsigned char n = 0; n < cnt; n++)
309                 {
310                         if (loadtable(ifs, tables[n], 255) && (n < faillimit))
311                         {
312                                 ServerInstance->Logs->Log("m_nationalchars",DEFAULT,"loadtables() called for illegal file: %s (line %d)", filename.c_str(), n+1);
313                                 return;
314                         }
315                 }
316
317                 makereverse(m_additional, m_reverse_additional, sizeof(m_additional));
318         }
319
320         unsigned char symtoi(const char *t,unsigned char base)
321         /* base = 16 for hexadecimal, 10 for decimal, 8 for octal ;) */
322         {
323                 unsigned char tmp = 0, current;
324                 while ((*t) && (*t !=' ') && (*t != 13) && (*t != 10) && (*t != ','))
325                 {
326                         tmp *= base;
327                         current = ascii_case_insensitive_map[(unsigned char)*t];
328                         if (current >= 'a')
329                                 current = current - 'a' + 10;
330                         else
331                                 current = current - '0';
332                         tmp+=current;
333                         t++;
334                 }
335                 return tmp;
336         }
337
338         int loadtable(std::ifstream &ifs , unsigned char *chartable, unsigned int maxindex)
339         {
340                 std::string buf;
341                 getline(ifs, buf);
342
343                 unsigned int i = 0;
344                 int fail = 0;
345
346                 buf.erase(buf.find_last_not_of("\n") + 1);
347
348                 if (buf[0] == '.')      /* simple plain-text string after dot */
349                 {
350                         i = buf.size() - 1;
351
352                         if (i > (maxindex + 1))
353                                 i = maxindex + 1;
354
355                         memcpy(chartable, buf.c_str() + 1, i);
356                 }
357                 else
358                 {
359                         const char * p = buf.c_str();
360                         while (*p)
361                         {
362                                 if (i > maxindex)
363                                 {
364                                         fail = 1;
365                                         break;
366                                 }
367
368                                 if (*p != '\'')         /* decimal or hexadecimal char code */
369                                 {
370                                         if (*p == '0')
371                                         {
372                                                 if (p[1] == 'x')
373                                                          /* hex with the leading "0x" */
374                                                         chartable[i] = symtoi(p + 2, 16);
375                                                 else
376                                                         chartable[i] = symtoi(p + 1, 8);
377                                         }
378                                         /* hex form */
379                                         else if (*p == 'x')
380                                         {
381                                                 chartable[i] = symtoi(p + 1, 16);
382                                         }else    /* decimal form */
383                                         {
384                                                 chartable[i] = symtoi(p, 10);
385                                         }
386                                 }
387                                 else             /* plain-text char between '' */
388                                 {
389                                         if (*(p + 1) == '\\')
390                                         {
391                                                 chartable[i] = *(p + 2);
392                                                 p += 3;
393                                         }else
394                                         {
395                                                 chartable[i] = *(p + 1);
396                                                 p += 2;
397                                         }
398                                 }
399                                 while (*p && (*p != ',') && (*p != ' ') && (*p != 13) && (*p != 10))
400                                         p++;
401                                 while (*p && ((*p == ',') || (*p == ' ') || (*p == 13) || (*p == 10)))
402                                         p++;
403                                 i++;
404                         }
405                 }
406                 return fail;
407         }
408 };
409
410 MODULE_INIT(ModuleNationalChars)