1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.3 2007/06/26 11:16:54 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
10 Written by Philip Hazel
11 Copyright (c) 1997-2007 University of Cambridge
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
54 /* Macro that decides whether a character should be output as a literal or in
55 hexadecimal. We don't use isprint() because that can vary from system to system
56 (even without the use of locales) and we want the output always to be the same,
57 for testing purposes. This macro is used in pcretest as well as in this file. */
59 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 /* The table of operator names. */
63 static const char *OP_names[] = { OP_NAME_LIST };
67 /*************************************************
68 * Print single- or multi-byte character *
69 *************************************************/
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
77 utf8 = utf8; /* Avoid compiler warning */
78 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82 if (!utf8 || (c & 0xc0) != 0xc0)
84 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
90 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
92 c = (c & _pcre_utf8_table3[a]) << s;
93 for (i = 1; i <= a; i++)
95 /* This is a check for malformed UTF-8; it should only occur if the sanity
96 check has been turned off. Rather than swallow random bytes, just stop if
97 we hit a bad one. Print it with \X instead of \x as an indication. */
99 if ((ptr[i] & 0xc0) != 0x80)
101 fprintf(f, "\\X{%x}", c);
108 c |= (ptr[i] & 0x3f) << s;
110 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
118 /*************************************************
119 * Find Unicode property name *
120 *************************************************/
123 get_ucpname(int ptype, int pvalue)
127 for (i = _pcre_utt_size; i >= 0; i--)
129 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
131 return (i >= 0)? _pcre_utt[i].name : "??";
133 /* It gets harder and harder to shut off unwanted compiler warnings. */
134 ptype = ptype * pvalue;
135 return (ptype == pvalue)? "??" : "??";
141 /*************************************************
142 * Print compiled regex *
143 *************************************************/
145 /* Make this function work for a regex with integers either byte order.
146 However, we assume that what we are passed is a compiled regex. The
147 print_lengths flag controls whether offsets and lengths of items are printed.
148 They can be turned off from pcretest so that automatic tests on bytecode can be
149 written that do not depend on the value of LINK_SIZE. */
152 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
154 real_pcre *re = (real_pcre *)external_re;
155 uschar *codestart, *code;
158 unsigned int options = re->options;
159 int offset = re->name_table_offset;
160 int count = re->name_count;
161 int size = re->name_entry_size;
163 if (re->magic_number != MAGIC_NUMBER)
165 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
166 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
167 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
168 options = ((options << 24) & 0xff000000) |
169 ((options << 8) & 0x00ff0000) |
170 ((options >> 8) & 0x0000ff00) |
171 ((options >> 24) & 0x000000ff);
174 code = codestart = (uschar *)re + offset + count * size;
175 utf8 = (options & PCRE_UTF8) != 0;
184 fprintf(f, "%3d ", (int)(code - codestart));
191 fprintf(f, " %s\n", OP_names[*code]);
192 fprintf(f, "------------------------------------------------------------------\n");
196 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
204 code += 1 + print_char(f, code, utf8);
206 while (*code == OP_CHAR);
215 code += 1 + print_char(f, code, utf8);
217 while (*code == OP_CHARNC);
223 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
224 else fprintf(f, " ");
225 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
237 case OP_ASSERTBACK_NOT:
242 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
243 else fprintf(f, " ");
244 fprintf(f, "%s", OP_names[*code]);
248 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
254 fprintf(f, " Cond recurse any");
256 fprintf(f, " Cond recurse %d", c);
260 fprintf(f, " Cond def");
279 case OP_TYPEMINQUERY:
280 case OP_TYPEPOSQUERY:
282 if (*code >= OP_TYPESTAR)
284 fprintf(f, "%s", OP_names[code[1]]);
285 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
287 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
291 else extra = print_char(f, code+1, utf8);
292 fprintf(f, "%s", OP_names[*code]);
300 extra = print_char(f, code+3, utf8);
302 if (*code != OP_EXACT) fprintf(f, "0,");
303 fprintf(f, "%d}", GET2(code,1));
304 if (*code == OP_MINUPTO) fprintf(f, "?");
305 else if (*code == OP_POSUPTO) fprintf(f, "+");
312 fprintf(f, " %s", OP_names[code[3]]);
313 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
315 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
319 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
320 fprintf(f, "%d}", GET2(code,1));
321 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
322 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
327 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
328 else fprintf(f, " [^\\x%02x]", c);
341 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
342 else fprintf(f, " [^\\x%02x]", c);
343 fprintf(f, "%s", OP_names[*code]);
351 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
352 else fprintf(f, " [^\\x%02x]{", c);
353 if (*code != OP_NOTEXACT) fprintf(f, "0,");
354 fprintf(f, "%d}", GET2(code,1));
355 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
356 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
360 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
361 else fprintf(f, " ");
362 fprintf(f, "%s", OP_names[*code]);
366 fprintf(f, " \\%d", GET2(code,1));
367 ccode = code + _pcre_OP_lengths[*code];
368 goto CLASS_REF_REPEAT;
371 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
372 GET(code, 2 + LINK_SIZE));
377 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
380 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
381 having this code always here, and it makes it less messy without all those
393 if (*code == OP_XCLASS)
395 extra = GET(code, 1);
396 ccode = code + LINK_SIZE + 1;
397 printmap = (*ccode & XCL_MAP) != 0;
398 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
406 /* Print a bit map */
410 for (i = 0; i < 256; i++)
412 if ((ccode[i/8] & (1 << (i&7))) != 0)
415 for (j = i+1; j < 256; j++)
416 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
417 if (i == '-' || i == ']') fprintf(f, "\\");
418 if (PRINTABLE(i)) fprintf(f, "%c", i);
419 else fprintf(f, "\\x%02x", i);
422 if (j != i + 1) fprintf(f, "-");
423 if (j == '-' || j == ']') fprintf(f, "\\");
424 if (PRINTABLE(j)) fprintf(f, "%c", j);
425 else fprintf(f, "\\x%02x", j);
433 /* For an XCLASS there is always some additional data */
435 if (*code == OP_XCLASS)
438 while ((ch = *ccode++) != XCL_END)
442 int ptype = *ccode++;
443 int pvalue = *ccode++;
444 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
446 else if (ch == XCL_NOTPROP)
448 int ptype = *ccode++;
449 int pvalue = *ccode++;
450 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
454 ccode += 1 + print_char(f, ccode, TRUE);
458 ccode += 1 + print_char(f, ccode, TRUE);
464 /* Indicate a non-UTF8 class which was created by negation */
466 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
468 /* Handle repeats after a class or a back reference */
479 fprintf(f, "%s", OP_names[*ccode]);
480 extra += _pcre_OP_lengths[*ccode];
487 if (max == 0) fprintf(f, "{%d,}", min);
488 else fprintf(f, "{%d,%d}", min, max);
489 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
490 extra += _pcre_OP_lengths[*ccode];
493 /* Do nothing if it's not a repeat; this code stops picky compilers
494 warning about the lack of a default code path. */
502 /* Anything else is just an item with no data*/
505 fprintf(f, " %s", OP_names[*code]);
509 code += _pcre_OP_lengths[*code] + extra;
514 /* End of pcre_printint.src */