1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.8 2007/11/12 13:02:20 nm4 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
54 /* A number of things vary for Windows builds. Originally, pcretest opened its
55 input and output without "b"; then I was told that "b" was needed in some
56 environments, so it was added for release 5.0 to both the input and output. (It
57 makes no difference on Unix-like systems.) Later I was told that it is wrong
58 for the input on Windows. I've now abstracted the modes into two macros that
59 are set here, to make it easier to fiddle with them, and removed "b" from the
60 input mode under Windows. */
62 #if defined(_WIN32) || defined(WIN32)
63 #include <io.h> /* For _setmode() */
64 #include <fcntl.h> /* For _O_BINARY */
65 #define INPUT_MODE "r"
66 #define OUTPUT_MODE "wb"
69 #include <sys/time.h> /* These two includes are needed */
70 #include <sys/resource.h> /* for setrlimit(). */
71 #define INPUT_MODE "rb"
72 #define OUTPUT_MODE "wb"
76 /* We have to include pcre_internal.h because we need the internal info for
77 displaying the results of pcre_study() and we also need to know about the
78 internal macros, structures, and other internal data values; pcretest has
79 "inside information" compared to a program that strictly follows the PCRE API.
81 Although pcre_internal.h does itself include pcre.h, we explicitly include it
82 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
83 appropriately for an application, not for building PCRE. */
86 #include "pcre_internal.h"
88 /* We need access to the data tables that PCRE uses. So as not to have to keep
89 two copies, we include the source file here, changing the names of the external
90 symbols to prevent clashes. */
92 #define _pcre_utf8_table1 utf8_table1
93 #define _pcre_utf8_table1_size utf8_table1_size
94 #define _pcre_utf8_table2 utf8_table2
95 #define _pcre_utf8_table3 utf8_table3
96 #define _pcre_utf8_table4 utf8_table4
98 #define _pcre_utt_size utt_size
99 #define _pcre_utt_names utt_names
100 #define _pcre_OP_lengths OP_lengths
102 #include "pcre_tables.c"
104 /* We also need the pcre_printint() function for printing out compiled
105 patterns. This function is in a separate file so that it can be included in
106 pcre_compile.c when that module is compiled with debugging enabled.
108 The definition of the macro PRINTABLE, which determines whether to print an
109 output character as-is or as a hex value when showing compiled patterns, is
110 contained in this file. We uses it here also, in cases when the locale has not
111 been explicitly changed, so as to get consistent output from systems that
112 differ in their output from isprint() even in the "C" locale. */
114 #include "pcre_printint.src"
116 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
119 /* It is possible to compile this test program without including support for
120 testing the POSIX interface, though this is not available via the standard
124 #include "pcreposix.h"
127 /* It is also possible, for the benefit of the version currently imported into
128 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
129 interface to the DFA matcher (NODFA), and without the doublecheck of the old
130 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
131 UTF8 support if PCRE is built without it. */
140 /* Other parameters */
142 #ifndef CLOCKS_PER_SEC
144 #define CLOCKS_PER_SEC CLK_TCK
146 #define CLOCKS_PER_SEC 100
150 /* This is the default loop count for timing. */
152 #define LOOPREPEAT 500000
154 /* Static variables */
156 static FILE *outfile;
157 static int log_store = 0;
158 static int callout_count;
159 static int callout_extra;
160 static int callout_fail_count;
161 static int callout_fail_id;
162 static int debug_lengths;
163 static int first_callout;
164 static int locale_set = 0;
165 static int show_malloc;
167 static size_t gotten_store;
169 /* The buffers grow automatically if very long input lines are encountered. */
171 static int buffer_size = 50000;
172 static uschar *buffer = NULL;
173 static uschar *dbuffer = NULL;
174 static uschar *pbuffer = NULL;
178 /*************************************************
179 * Read or extend an input line *
180 *************************************************/
182 /* Input lines are read into buffer, but both patterns and data lines can be
183 continued over multiple input lines. In addition, if the buffer fills up, we
184 want to automatically expand it so as to be able to handle extremely large
185 lines that are needed for certain stress tests. When the input buffer is
186 expanded, the other two buffers must also be expanded likewise, and the
187 contents of pbuffer, which are a copy of the input for callouts, must be
188 preserved (for when expansion happens for a data line). This is not the most
189 optimal way of handling this, but hey, this is just a test program!
193 start where in buffer to start (this *must* be within buffer)
195 Returns: pointer to the start of new data
196 could be a copy of start, or could be moved
197 NULL if no data read and EOF reached
201 extend_inputline(FILE *f, uschar *start)
203 uschar *here = start;
207 int rlen = buffer_size - (here - buffer);
212 if (fgets((char *)here, rlen, f) == NULL)
213 return (here == start)? NULL : start;
214 dlen = (int)strlen((char *)here);
215 if (dlen > 0 && here[dlen - 1] == '\n') return start;
221 int new_buffer_size = 2*buffer_size;
222 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
223 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
224 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
226 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
228 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
232 memcpy(new_buffer, buffer, buffer_size);
233 memcpy(new_pbuffer, pbuffer, buffer_size);
235 buffer_size = new_buffer_size;
237 start = new_buffer + (start - buffer);
238 here = new_buffer + (here - buffer);
245 dbuffer = new_dbuffer;
246 pbuffer = new_pbuffer;
250 return NULL; /* Control never gets here */
259 /*************************************************
260 * Read number from string *
261 *************************************************/
263 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
264 around with conditional compilation, just do the job by hand. It is only used
265 for unpicking arguments, so just keep it simple.
268 str string to be converted
269 endptr where to put the end pointer
271 Returns: the unsigned long
275 get_value(unsigned char *str, unsigned char **endptr)
278 while(*str != 0 && isspace(*str)) str++;
279 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
287 /*************************************************
288 * Convert UTF-8 string to value *
289 *************************************************/
291 /* This function takes one or more bytes that represents a UTF-8 character,
292 and returns the value of the character.
295 utf8bytes a pointer to the byte vector
296 vptr a pointer to an int to receive the value
298 Returns: > 0 => the number of bytes consumed
299 -6 to 0 => malformed UTF-8 character at offset = (-return)
305 utf82ord(unsigned char *utf8bytes, int *vptr)
307 int c = *utf8bytes++;
311 for (i = -1; i < 6; i++) /* i is number of additional bytes */
313 if ((d & 0x80) == 0) break;
317 if (i == -1) { *vptr = c; return 1; } /* ascii character */
318 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
320 /* i now has a value in the range 1-5 */
323 d = (c & utf8_table3[i]) << s;
325 for (j = 0; j < i; j++)
328 if ((c & 0xc0) != 0x80) return -(j+1);
330 d |= (c & 0x3f) << s;
333 /* Check that encoding was the correct unique one */
335 for (j = 0; j < utf8_table1_size; j++)
336 if (d <= utf8_table1[j]) break;
337 if (j != i) return -(i+1);
349 /*************************************************
350 * Convert character value to UTF-8 *
351 *************************************************/
353 /* This function takes an integer value in the range 0 - 0x7fffffff
354 and encodes it as a UTF-8 character in 0 to 6 bytes.
357 cvalue the character value
358 utf8bytes pointer to buffer for result - at least 6 bytes long
360 Returns: number of characters placed in the buffer
366 ord2utf8(int cvalue, uschar *utf8bytes)
369 for (i = 0; i < utf8_table1_size; i++)
370 if (cvalue <= utf8_table1[i]) break;
372 for (j = i; j > 0; j--)
374 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
377 *utf8bytes = utf8_table2[i] | cvalue;
385 /*************************************************
386 * Print character string *
387 *************************************************/
389 /* Character string printing function. Must handle UTF-8 strings in utf8
390 mode. Yields number of characters printed. If handed a NULL file, just counts
391 chars without printing. */
393 static int pchars(unsigned char *p, int length, FILE *f)
403 int rc = utf82ord(p, &c);
405 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
411 if (f != NULL) fprintf(f, "%c", c);
417 if (f != NULL) fprintf(f, "\\x{%02x}", c);
418 yield += (n <= 0x000000ff)? 2 :
419 (n <= 0x00000fff)? 3 :
420 (n <= 0x0000ffff)? 4 :
421 (n <= 0x000fffff)? 5 : 6;
428 /* Not UTF-8, or malformed UTF-8 */
433 if (f != NULL) fprintf(f, "%c", c);
438 if (f != NULL) fprintf(f, "\\x%02x", c);
448 /*************************************************
450 *************************************************/
452 /* Called from PCRE as a result of the (?C) item. We print out where we are in
453 the match. Yield zero unless more callouts than the fail count, or the callout
456 static int callout(pcre_callout_block *cb)
458 FILE *f = (first_callout | callout_extra)? outfile : NULL;
459 int i, pre_start, post_start, subject_length;
463 fprintf(f, "Callout %d: last capture = %d\n",
464 cb->callout_number, cb->capture_last);
466 for (i = 0; i < cb->capture_top * 2; i += 2)
468 if (cb->offset_vector[i] < 0)
469 fprintf(f, "%2d: <unset>\n", i/2);
472 fprintf(f, "%2d: ", i/2);
473 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
474 cb->offset_vector[i+1] - cb->offset_vector[i], f);
480 /* Re-print the subject in canonical form, the first time or if giving full
481 datails. On subsequent calls in the same match, we use pchars just to find the
482 printed lengths of the substrings. */
484 if (f != NULL) fprintf(f, "--->");
486 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
487 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
488 cb->current_position - cb->start_match, f);
490 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
492 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
493 cb->subject_length - cb->current_position, f);
495 if (f != NULL) fprintf(f, "\n");
497 /* Always print appropriate indicators, with callout number if not already
498 shown. For automatic callouts, show the pattern offset. */
500 if (cb->callout_number == 255)
502 fprintf(outfile, "%+3d ", cb->pattern_position);
503 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
507 if (callout_extra) fprintf(outfile, " ");
508 else fprintf(outfile, "%3d ", cb->callout_number);
511 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
512 fprintf(outfile, "^");
516 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
517 fprintf(outfile, "^");
520 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
521 fprintf(outfile, " ");
523 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
524 pbuffer + cb->pattern_position);
526 fprintf(outfile, "\n");
529 if (cb->callout_data != NULL)
531 int callout_data = *((int *)(cb->callout_data));
532 if (callout_data != 0)
534 fprintf(outfile, "Callout data = %d\n", callout_data);
539 return (cb->callout_number != callout_fail_id)? 0 :
540 (++callout_count >= callout_fail_count)? 1 : 0;
544 /*************************************************
545 * Local malloc functions *
546 *************************************************/
548 /* Alternative malloc function, to test functionality and show the size of the
551 static void *new_malloc(size_t size)
553 void *block = malloc(size);
556 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
560 static void new_free(void *block)
563 fprintf(outfile, "free %p\n", block);
568 /* For recursion malloc/free, to test stacking calls */
570 static void *stack_malloc(size_t size)
572 void *block = malloc(size);
574 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
578 static void stack_free(void *block)
581 fprintf(outfile, "stack_free %p\n", block);
586 /*************************************************
587 * Call pcre_fullinfo() *
588 *************************************************/
590 /* Get one piece of information from the pcre_fullinfo() function */
592 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
595 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
596 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
601 /*************************************************
602 * Byte flipping function *
603 *************************************************/
605 static unsigned long int
606 byteflip(unsigned long int value, int n)
608 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
609 return ((value & 0x000000ff) << 24) |
610 ((value & 0x0000ff00) << 8) |
611 ((value & 0x00ff0000) >> 8) |
612 ((value & 0xff000000) >> 24);
618 /*************************************************
619 * Check match or recursion limit *
620 *************************************************/
623 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
624 int start_offset, int options, int *use_offsets, int use_size_offsets,
625 int flag, unsigned long int *limit, int errnumber, const char *msg)
632 extra->flags |= flag;
638 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
639 use_offsets, use_size_offsets);
641 if (count == errnumber)
643 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
645 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
648 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
649 count == PCRE_ERROR_PARTIAL)
653 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
656 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
660 else break; /* Some other error */
663 extra->flags &= ~flag;
669 /*************************************************
670 * Case-independent strncmp() function *
671 *************************************************/
677 n number of characters to compare
679 Returns: < 0, = 0, or > 0, according to the comparison
683 strncmpic(uschar *s, uschar *t, int n)
687 int c = tolower(*s++) - tolower(*t++);
695 /*************************************************
696 * Check newline indicator *
697 *************************************************/
699 /* This is used both at compile and run-time to check for <xxx> escapes, where
700 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
704 p points after the leading '<'
705 f file for error message
707 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
711 check_newline(uschar *p, FILE *f)
713 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
714 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
715 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
716 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
717 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
718 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
719 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
720 fprintf(f, "Unknown newline type at: <%s\n", p);
726 /*************************************************
728 *************************************************/
733 printf("Usage: pcretest [options] [<input> [<output>]]\n");
734 printf(" -b show compiled code (bytecode)\n");
735 printf(" -C show PCRE compile-time options and exit\n");
736 printf(" -d debug: show compiled code and information (-b and -i)\n");
738 printf(" -dfa force DFA matching for all subjects\n");
740 printf(" -help show usage information\n");
741 printf(" -i show information about compiled patterns\n"
742 " -m output memory used information\n"
743 " -o <n> set size of offsets vector to <n>\n");
745 printf(" -p use POSIX interface\n");
747 printf(" -q quiet: do not output PCRE version number at start\n");
748 printf(" -S <n> set stack size to <n> megabytes\n");
749 printf(" -s output store (memory) used information\n"
750 " -t time compilation and execution\n");
751 printf(" -t <n> time compilation and execution, repeating <n> times\n");
752 printf(" -tm time execution (matching) only\n");
753 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
758 /*************************************************
760 *************************************************/
762 /* Read lines from named file or stdin and write to named file or stdout; lines
763 consist of a regular expression, in delimiters and optionally followed by
764 options, followed by a set of test data, terminated by an empty line. */
766 int main(int argc, char **argv)
768 FILE *infile = stdin;
770 int study_options = 0;
777 int size_offsets = 45;
778 int size_offsets_max;
789 /* These vectors store, end-to-end, a list of captured substring names. Assume
790 that 1024 is plenty long enough for the few names we'll be testing. */
792 uschar copynames[1024];
793 uschar getnames[1024];
795 uschar *copynamesptr;
798 /* Get buffers from malloc() so that Electric Fence will check their misuse
799 when I am debugging. They grow automatically when very long lines are read. */
801 buffer = (unsigned char *)malloc(buffer_size);
802 dbuffer = (unsigned char *)malloc(buffer_size);
803 pbuffer = (unsigned char *)malloc(buffer_size);
805 /* The outfile variable is static so that new_malloc can use it. */
809 /* The following _setmode() stuff is some Windows magic that tells its runtime
810 library to translate CRLF into a single LF character. At least, that's what
811 I've been told: never having used Windows I take this all on trust. Originally
812 it set 0x8000, but then I was advised that _O_BINARY was better. */
814 #if defined(_WIN32) || defined(WIN32)
815 _setmode( _fileno( stdout ), _O_BINARY );
820 while (argc > 1 && argv[op][0] == '-')
822 unsigned char *endptr;
824 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
826 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
827 else if (strcmp(argv[op], "-b") == 0) debug = 1;
828 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
829 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
831 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
833 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
834 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
840 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
842 int both = argv[op][2] == 0;
844 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
851 else timeitm = LOOPREPEAT;
852 if (both) timeit = timeitm;
854 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
855 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
858 #if defined(_WIN32) || defined(WIN32)
859 printf("PCRE: -S not supported on this OS\n");
864 getrlimit(RLIMIT_STACK, &rlim);
865 rlim.rlim_cur = stack_size * 1024 * 1024;
866 rc = setrlimit(RLIMIT_STACK, &rlim);
869 printf("PCRE: setrlimit() failed with error %d\n", rc);
877 else if (strcmp(argv[op], "-p") == 0) posix = 1;
879 else if (strcmp(argv[op], "-C") == 0)
882 printf("PCRE version %s\n", pcre_version());
883 printf("Compiled with\n");
884 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
885 printf(" %sUTF-8 support\n", rc? "" : "No ");
886 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
887 printf(" %sUnicode properties support\n", rc? "" : "No ");
888 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
889 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
890 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
891 (rc == -2)? "ANYCRLF" :
892 (rc == -1)? "ANY" : "???");
893 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
894 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
895 "all Unicode newlines");
896 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
897 printf(" Internal link size = %d\n", rc);
898 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
899 printf(" POSIX malloc threshold = %d\n", rc);
900 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
901 printf(" Default match limit = %d\n", rc);
902 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
903 printf(" Default recursion depth limit = %d\n", rc);
904 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
905 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
908 else if (strcmp(argv[op], "-help") == 0 ||
909 strcmp(argv[op], "--help") == 0)
916 printf("** Unknown or malformed option %s\n", argv[op]);
925 /* Get the store for the offsets vector, and remember what it was */
927 size_offsets_max = size_offsets;
928 offsets = (int *)malloc(size_offsets_max * sizeof(int));
931 printf("** Failed to get %d bytes of memory for offsets vector\n",
932 (int)(size_offsets_max * sizeof(int)));
937 /* Sort out the input and output files */
941 infile = fopen(argv[op], INPUT_MODE);
944 printf("** Failed to open %s\n", argv[op]);
952 outfile = fopen(argv[op+1], OUTPUT_MODE);
955 printf("** Failed to open %s\n", argv[op+1]);
961 /* Set alternative malloc function */
963 pcre_malloc = new_malloc;
964 pcre_free = new_free;
965 pcre_stack_malloc = stack_malloc;
966 pcre_stack_free = stack_free;
968 /* Heading line unless quiet, then prompt for first regex if stdin */
970 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
977 pcre_extra *extra = NULL;
979 #if !defined NOPOSIX /* There are still compilers that require no indent */
985 unsigned char *p, *pp, *ppp;
986 unsigned char *to_file = NULL;
987 const unsigned char *tables = NULL;
988 unsigned long int true_size, true_study_size = 0;
989 size_t size, regex_gotten_store;
991 int do_debug = debug;
994 int do_showinfo = showinfo;
997 int erroroffset, len, delimiter, poffset;
1002 if (infile == stdin) printf(" re> ");
1003 if (extend_inputline(infile, buffer) == NULL) break;
1004 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1008 while (isspace(*p)) p++;
1009 if (*p == 0) continue;
1011 /* See if the pattern is to be loaded pre-compiled from a file. */
1013 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1015 unsigned long int magic, get_options;
1020 pp = p + (int)strlen((char *)p);
1021 while (isspace(pp[-1])) pp--;
1024 f = fopen((char *)p, "rb");
1027 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1031 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1034 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1036 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1038 re = (real_pcre *)new_malloc(true_size);
1039 regex_gotten_store = gotten_store;
1041 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1043 magic = ((real_pcre *)re)->magic_number;
1044 if (magic != MAGIC_NUMBER)
1046 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1052 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1058 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1059 do_flip? " (byte-inverted)" : "", p);
1061 /* Need to know if UTF-8 for printing data strings */
1063 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1064 use_utf8 = (get_options & PCRE_UTF8) != 0;
1066 /* Now see if there is any following study data */
1068 if (true_study_size != 0)
1070 pcre_study_data *psd;
1072 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1073 extra->flags = PCRE_EXTRA_STUDY_DATA;
1075 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1076 extra->study_data = psd;
1078 if (fread(psd, 1, true_study_size, f) != true_study_size)
1081 fprintf(outfile, "Failed to read data from %s\n", p);
1082 if (extra != NULL) new_free(extra);
1083 if (re != NULL) new_free(re);
1087 fprintf(outfile, "Study data loaded from %s\n", p);
1088 do_study = 1; /* To get the data output if requested */
1090 else fprintf(outfile, "No study data\n");
1096 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1097 the pattern; if is isn't complete, read more. */
1101 if (isalnum(delimiter) || delimiter == '\\')
1103 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1108 poffset = p - buffer;
1114 if (*pp == '\\' && pp[1] != 0) pp++;
1115 else if (*pp == delimiter) break;
1118 if (*pp != 0) break;
1119 if (infile == stdin) printf(" > ");
1120 if ((pp = extend_inputline(infile, pp)) == NULL)
1122 fprintf(outfile, "** Unexpected EOF\n");
1126 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1129 /* The buffer may have moved while being extended; reset the start of data
1130 pointer to the correct relative point in the buffer. */
1132 p = buffer + poffset;
1134 /* If the first character after the delimiter is backslash, make
1135 the pattern end with backslash. This is purely to provide a way
1136 of testing for the error message when a pattern ends with backslash. */
1138 if (pp[1] == '\\') *pp++ = '\\';
1140 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1144 strcpy((char *)pbuffer, (char *)p);
1146 /* Look for options after final delimiter */
1150 log_store = showstore; /* default from command line */
1156 case 'f': options |= PCRE_FIRSTLINE; break;
1157 case 'g': do_g = 1; break;
1158 case 'i': options |= PCRE_CASELESS; break;
1159 case 'm': options |= PCRE_MULTILINE; break;
1160 case 's': options |= PCRE_DOTALL; break;
1161 case 'x': options |= PCRE_EXTENDED; break;
1163 case '+': do_showrest = 1; break;
1164 case 'A': options |= PCRE_ANCHORED; break;
1165 case 'B': do_debug = 1; break;
1166 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1167 case 'D': do_debug = do_showinfo = 1; break;
1168 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1169 case 'F': do_flip = 1; break;
1170 case 'G': do_G = 1; break;
1171 case 'I': do_showinfo = 1; break;
1172 case 'J': options |= PCRE_DUPNAMES; break;
1173 case 'M': log_store = 1; break;
1174 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1176 #if !defined NOPOSIX
1177 case 'P': do_posix = 1; break;
1180 case 'S': do_study = 1; break;
1181 case 'U': options |= PCRE_UNGREEDY; break;
1182 case 'X': options |= PCRE_EXTRA; break;
1183 case 'Z': debug_lengths = 0; break;
1184 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1185 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1189 /* The '\r' test here is so that it works on Windows. */
1190 /* The '0' test is just in case this is an unterminated line. */
1191 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1193 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1195 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1199 tables = pcre_maketables();
1205 while (*pp != 0) pp++;
1206 while (isspace(pp[-1])) pp--;
1212 int x = check_newline(pp, outfile);
1213 if (x == 0) goto SKIP_DATA;
1215 while (*pp++ != '>');
1219 case '\r': /* So that it works in Windows */
1225 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1230 /* Handle compiling via the POSIX interface, which doesn't support the
1231 timing, showing, or debugging options, nor the ability to pass over
1232 local character tables. */
1234 #if !defined NOPOSIX
1235 if (posix || do_posix)
1240 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1241 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1242 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1243 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1244 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1246 rc = regcomp(&preg, (char *)p, cflags);
1248 /* Compilation failed; go back for another re, skipping to blank line
1249 if non-interactive. */
1253 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1254 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1259 /* Handle compiling via the native interface */
1262 #endif /* !defined NOPOSIX */
1269 clock_t start_time = clock();
1270 for (i = 0; i < timeit; i++)
1272 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1273 if (re != NULL) free(re);
1275 time_taken = clock() - start_time;
1276 fprintf(outfile, "Compile time %.4f milliseconds\n",
1277 (((double)time_taken * 1000.0) / (double)timeit) /
1278 (double)CLOCKS_PER_SEC);
1281 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1283 /* Compilation failed; go back for another re, skipping to blank line
1284 if non-interactive. */
1288 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1290 if (infile != stdin)
1294 if (extend_inputline(infile, buffer) == NULL)
1299 len = (int)strlen((char *)buffer);
1300 while (len > 0 && isspace(buffer[len-1])) len--;
1301 if (len == 0) break;
1303 fprintf(outfile, "\n");
1308 /* Compilation succeeded; print data if required. There are now two
1309 info-returning functions. The old one has a limited interface and
1310 returns only limited data. Check that it agrees with the newer one. */
1313 fprintf(outfile, "Memory allocation (code space): %d\n",
1314 (int)(gotten_store -
1316 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1318 /* Extract the size for possible writing before possibly flipping it,
1319 and remember the store that was got. */
1321 true_size = ((real_pcre *)re)->size;
1322 regex_gotten_store = gotten_store;
1324 /* If /S was present, study the regexp to generate additional info to
1325 help with the matching. */
1333 clock_t start_time = clock();
1334 for (i = 0; i < timeit; i++)
1335 extra = pcre_study(re, study_options, &error);
1336 time_taken = clock() - start_time;
1337 if (extra != NULL) free(extra);
1338 fprintf(outfile, " Study time %.4f milliseconds\n",
1339 (((double)time_taken * 1000.0) / (double)timeit) /
1340 (double)CLOCKS_PER_SEC);
1342 extra = pcre_study(re, study_options, &error);
1344 fprintf(outfile, "Failed to study: %s\n", error);
1345 else if (extra != NULL)
1346 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1349 /* If the 'F' option was present, we flip the bytes of all the integer
1350 fields in the regex data block and the study block. This is to make it
1351 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1352 compiled on a different architecture. */
1356 real_pcre *rre = (real_pcre *)re;
1358 byteflip(rre->magic_number, sizeof(rre->magic_number));
1359 rre->size = byteflip(rre->size, sizeof(rre->size));
1360 rre->options = byteflip(rre->options, sizeof(rre->options));
1361 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1363 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1365 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1367 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1369 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1370 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1371 sizeof(rre->name_table_offset));
1372 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1373 sizeof(rre->name_entry_size));
1374 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1375 sizeof(rre->name_count));
1379 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1380 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1381 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1385 /* Extract information from the compiled data if required */
1391 fprintf(outfile, "------------------------------------------------------------------\n");
1392 pcre_printint(re, outfile, debug_lengths);
1397 unsigned long int get_options, all_options;
1398 #if !defined NOINFOCHECK
1399 int old_first_char, old_options, old_count;
1401 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1403 int nameentrysize, namecount;
1404 const uschar *nametable;
1406 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1407 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1408 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1409 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1410 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1411 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1412 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1413 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1414 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1415 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1416 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1417 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1419 #if !defined NOINFOCHECK
1420 old_count = pcre_info(re, &old_options, &old_first_char);
1421 if (count < 0) fprintf(outfile,
1422 "Error %d from pcre_info()\n", count);
1425 if (old_count != count) fprintf(outfile,
1426 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1429 if (old_first_char != first_char) fprintf(outfile,
1430 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1431 first_char, old_first_char);
1433 if (old_options != (int)get_options) fprintf(outfile,
1434 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1435 get_options, old_options);
1439 if (size != regex_gotten_store) fprintf(outfile,
1440 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1441 (int)size, (int)regex_gotten_store);
1443 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1445 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1449 fprintf(outfile, "Named capturing subpatterns:\n");
1450 while (namecount-- > 0)
1452 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1453 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1454 GET2(nametable, 0));
1455 nametable += nameentrysize;
1459 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1460 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1462 all_options = ((real_pcre *)re)->options;
1463 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1465 if (get_options == 0) fprintf(outfile, "No options\n");
1466 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1467 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1468 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1469 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1470 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1471 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1472 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1473 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1474 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1475 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1476 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1477 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1478 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1479 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1480 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1481 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1483 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1485 switch (get_options & PCRE_NEWLINE_BITS)
1487 case PCRE_NEWLINE_CR:
1488 fprintf(outfile, "Forced newline sequence: CR\n");
1491 case PCRE_NEWLINE_LF:
1492 fprintf(outfile, "Forced newline sequence: LF\n");
1495 case PCRE_NEWLINE_CRLF:
1496 fprintf(outfile, "Forced newline sequence: CRLF\n");
1499 case PCRE_NEWLINE_ANYCRLF:
1500 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1503 case PCRE_NEWLINE_ANY:
1504 fprintf(outfile, "Forced newline sequence: ANY\n");
1511 if (first_char == -1)
1513 fprintf(outfile, "First char at start or follows newline\n");
1515 else if (first_char < 0)
1517 fprintf(outfile, "No first char\n");
1521 int ch = first_char & 255;
1522 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1525 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1527 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1532 fprintf(outfile, "No need char\n");
1536 int ch = need_char & 255;
1537 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1540 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1542 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1545 /* Don't output study size; at present it is in any case a fixed
1546 value, but it varies, depending on the computer architecture, and
1547 so messes up the test suite. (And with the /F option, it might be
1553 fprintf(outfile, "Study returned NULL\n");
1556 uschar *start_bits = NULL;
1557 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1559 if (start_bits == NULL)
1560 fprintf(outfile, "No starting byte set\n");
1565 fprintf(outfile, "Starting byte set: ");
1566 for (i = 0; i < 256; i++)
1568 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1572 fprintf(outfile, "\n ");
1575 if (PRINTHEX(i) && i != ' ')
1577 fprintf(outfile, "%c ", i);
1582 fprintf(outfile, "\\x%02x ", i);
1587 fprintf(outfile, "\n");
1593 /* If the '>' option was present, we write out the regex to a file, and
1594 that is all. The first 8 bytes of the file are the regex length and then
1595 the study length, in big-endian order. */
1597 if (to_file != NULL)
1599 FILE *f = fopen((char *)to_file, "wb");
1602 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1607 sbuf[0] = (uschar)((true_size >> 24) & 255);
1608 sbuf[1] = (uschar)((true_size >> 16) & 255);
1609 sbuf[2] = (uschar)((true_size >> 8) & 255);
1610 sbuf[3] = (uschar)((true_size) & 255);
1612 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1613 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1614 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1615 sbuf[7] = (uschar)((true_study_size) & 255);
1617 if (fwrite(sbuf, 1, 8, f) < 8 ||
1618 fwrite(re, 1, true_size, f) < true_size)
1620 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1624 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1627 if (fwrite(extra->study_data, 1, true_study_size, f) <
1630 fprintf(outfile, "Write error on %s: %s\n", to_file,
1633 else fprintf(outfile, "Study data written to %s\n", to_file);
1641 if (extra != NULL) new_free(extra);
1642 if (tables != NULL) new_free((void *)tables);
1643 continue; /* With next regex */
1645 } /* End of non-POSIX compile */
1647 /* Read data lines and test them */
1653 int *use_offsets = offsets;
1654 int use_size_offsets = size_offsets;
1655 int callout_data = 0;
1656 int callout_data_set = 0;
1658 int copystrings = 0;
1659 int find_match_limit = 0;
1663 int start_offset = 0;
1672 copynamesptr = copynames;
1673 getnamesptr = getnames;
1675 pcre_callout = callout;
1679 callout_fail_count = 999999;
1680 callout_fail_id = -1;
1683 if (extra != NULL) extra->flags &=
1684 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1689 if (infile == stdin) printf("data> ");
1690 if (extend_inputline(infile, buffer + len) == NULL)
1696 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1697 len = (int)strlen((char *)buffer);
1698 if (buffer[len-1] == '\n') break;
1701 while (len > 0 && isspace(buffer[len-1])) len--;
1703 if (len == 0) break;
1706 while (isspace(*p)) p++;
1709 while ((c = *p++) != 0)
1714 if (c == '\\') switch ((c = *p++))
1716 case 'a': c = 7; break;
1717 case 'b': c = '\b'; break;
1718 case 'e': c = 27; break;
1719 case 'f': c = '\f'; break;
1720 case 'n': c = '\n'; break;
1721 case 'r': c = '\r'; break;
1722 case 't': c = '\t'; break;
1723 case 'v': c = '\v'; break;
1725 case '0': case '1': case '2': case '3':
1726 case '4': case '5': case '6': case '7':
1728 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1729 c = c * 8 + *p++ - '0';
1732 if (use_utf8 && c > 255)
1734 unsigned char buff8[8];
1736 utn = ord2utf8(c, buff8);
1737 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1738 c = buff8[ii]; /* Last byte */
1745 /* Handle \x{..} specially - new Perl thing for utf8 */
1750 unsigned char *pt = p;
1752 while (isxdigit(*(++pt)))
1753 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1756 unsigned char buff8[8];
1758 utn = ord2utf8(c, buff8);
1759 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1760 c = buff8[ii]; /* Last byte */
1764 /* Not correct form; fall through */
1771 while (i++ < 2 && isxdigit(*p))
1773 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1778 case 0: /* \ followed by EOF allows for an empty line */
1783 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1786 case 'A': /* Option setting */
1787 options |= PCRE_ANCHORED;
1791 options |= PCRE_NOTBOL;
1795 if (isdigit(*p)) /* Set copy string */
1797 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1798 copystrings |= 1 << n;
1800 else if (isalnum(*p))
1802 uschar *npp = copynamesptr;
1803 while (isalnum(*p)) *npp++ = *p++;
1806 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1808 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1818 pcre_callout = NULL;
1823 callout_fail_id = 0;
1826 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1827 callout_fail_count = 0;
1832 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1839 if (*(++p) == '-') { sign = -1; p++; }
1841 callout_data = callout_data * 10 + *p++ - '0';
1842 callout_data *= sign;
1843 callout_data_set = 1;
1849 #if !defined NOPOSIX
1850 if (posix || do_posix)
1851 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1858 options |= PCRE_DFA_SHORTEST;
1865 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1866 getstrings |= 1 << n;
1868 else if (isalnum(*p))
1870 uschar *npp = getnamesptr;
1871 while (isalnum(*p)) *npp++ = *p++;
1874 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1876 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1886 find_match_limit = 1;
1890 options |= PCRE_NOTEMPTY;
1894 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1895 if (n > size_offsets_max)
1897 size_offsets_max = n;
1899 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1900 if (offsets == NULL)
1902 printf("** Failed to get %d bytes of memory for offsets vector\n",
1903 (int)(size_offsets_max * sizeof(int)));
1908 use_size_offsets = n;
1909 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1913 options |= PCRE_PARTIAL;
1917 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1920 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1923 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1924 extra->match_limit_recursion = n;
1928 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1931 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1934 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1935 extra->match_limit = n;
1940 options |= PCRE_DFA_RESTART;
1949 options |= PCRE_NOTEOL;
1953 options |= PCRE_NO_UTF8_CHECK;
1958 int x = check_newline(p, outfile);
1959 if (x == 0) goto NEXT_DATA;
1961 while (*p++ != '>');
1970 if ((all_use_dfa || use_dfa) && find_match_limit)
1972 printf("**Match limit not relevant for DFA matching: ignored\n");
1973 find_match_limit = 0;
1976 /* Handle matching via the POSIX interface, which does not
1977 support timing or playing with the match limit or callout data. */
1979 #if !defined NOPOSIX
1980 if (posix || do_posix)
1984 regmatch_t *pmatch = NULL;
1985 if (use_size_offsets > 0)
1986 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1987 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1988 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1990 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1994 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1995 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1997 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2000 fprintf(outfile, "Matched with REG_NOSUB\n");
2005 for (i = 0; i < (size_t)use_size_offsets; i++)
2007 if (pmatch[i].rm_so >= 0)
2009 fprintf(outfile, "%2d: ", (int)i);
2010 (void)pchars(dbuffer + pmatch[i].rm_so,
2011 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2012 fprintf(outfile, "\n");
2013 if (i == 0 && do_showrest)
2015 fprintf(outfile, " 0+ ");
2016 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2018 fprintf(outfile, "\n");
2026 /* Handle matching via the native interface - repeats for /g and /G */
2029 #endif /* !defined NOPOSIX */
2031 for (;; gmatched++) /* Loop for /g or /G */
2037 clock_t start_time = clock();
2040 if (all_use_dfa || use_dfa)
2042 int workspace[1000];
2043 for (i = 0; i < timeitm; i++)
2044 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2045 options | g_notempty, use_offsets, use_size_offsets, workspace,
2046 sizeof(workspace)/sizeof(int));
2051 for (i = 0; i < timeitm; i++)
2052 count = pcre_exec(re, extra, (char *)bptr, len,
2053 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055 time_taken = clock() - start_time;
2056 fprintf(outfile, "Execute time %.4f milliseconds\n",
2057 (((double)time_taken * 1000.0) / (double)timeitm) /
2058 (double)CLOCKS_PER_SEC);
2061 /* If find_match_limit is set, we want to do repeated matches with
2062 varying limits in order to find the minimum value for the match limit and
2063 for the recursion limit. */
2065 if (find_match_limit)
2069 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2073 (void)check_match_limit(re, extra, bptr, len, start_offset,
2074 options|g_notempty, use_offsets, use_size_offsets,
2075 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2076 PCRE_ERROR_MATCHLIMIT, "match()");
2078 count = check_match_limit(re, extra, bptr, len, start_offset,
2079 options|g_notempty, use_offsets, use_size_offsets,
2080 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2081 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2084 /* If callout_data is set, use the interface with additional data */
2086 else if (callout_data_set)
2090 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2093 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2094 extra->callout_data = &callout_data;
2095 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2096 options | g_notempty, use_offsets, use_size_offsets);
2097 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2100 /* The normal case is just to do the match once, with the default
2101 value of match_limit. */
2104 else if (all_use_dfa || use_dfa)
2106 int workspace[1000];
2107 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2108 options | g_notempty, use_offsets, use_size_offsets, workspace,
2109 sizeof(workspace)/sizeof(int));
2112 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2113 count = use_size_offsets/2;
2120 count = pcre_exec(re, extra, (char *)bptr, len,
2121 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2124 fprintf(outfile, "Matched, but too many substrings\n");
2125 count = use_size_offsets/3;
2136 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2138 maxcount = use_size_offsets/3;
2140 /* This is a check against a lunatic return value. */
2142 if (count > maxcount)
2145 "** PCRE error: returned count %d is too big for offset size %d\n",
2146 count, use_size_offsets);
2147 count = use_size_offsets/3;
2150 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2151 do_g = do_G = FALSE; /* Break g/G loop */
2155 for (i = 0; i < count * 2; i += 2)
2157 if (use_offsets[i] < 0)
2158 fprintf(outfile, "%2d: <unset>\n", i/2);
2161 fprintf(outfile, "%2d: ", i/2);
2162 (void)pchars(bptr + use_offsets[i],
2163 use_offsets[i+1] - use_offsets[i], outfile);
2164 fprintf(outfile, "\n");
2169 fprintf(outfile, " 0+ ");
2170 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2172 fprintf(outfile, "\n");
2178 for (i = 0; i < 32; i++)
2180 if ((copystrings & (1 << i)) != 0)
2182 char copybuffer[256];
2183 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2184 i, copybuffer, sizeof(copybuffer));
2186 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2188 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2192 for (copynamesptr = copynames;
2194 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2196 char copybuffer[256];
2197 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2198 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2200 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2202 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2205 for (i = 0; i < 32; i++)
2207 if ((getstrings & (1 << i)) != 0)
2209 const char *substring;
2210 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2213 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2216 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2217 pcre_free_substring(substring);
2222 for (getnamesptr = getnames;
2224 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2226 const char *substring;
2227 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2228 count, (char *)getnamesptr, &substring);
2230 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2233 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2234 pcre_free_substring(substring);
2240 const char **stringlist;
2241 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2244 fprintf(outfile, "get substring list failed %d\n", rc);
2247 for (i = 0; i < count; i++)
2248 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2249 if (stringlist[i] != NULL)
2250 fprintf(outfile, "string list not terminated by NULL\n");
2251 /* free((void *)stringlist); */
2252 pcre_free_substring_list(stringlist);
2257 /* There was a partial match */
2259 else if (count == PCRE_ERROR_PARTIAL)
2261 fprintf(outfile, "Partial match");
2263 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2264 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2265 bptr + use_offsets[0]);
2267 fprintf(outfile, "\n");
2268 break; /* Out of the /g loop */
2271 /* Failed to match. If this is a /g or /G loop and we previously set
2272 g_notempty after a null match, this is not necessarily the end. We want
2273 to advance the start offset, and continue. We won't be at the end of the
2274 string - that was checked before setting g_notempty.
2276 Complication arises in the case when the newline option is "any" or
2277 "anycrlf". If the previous match was at the end of a line terminated by
2278 CRLF, an advance of one character just passes the \r, whereas we should
2279 prefer the longer newline sequence, as does the code in pcre_exec().
2280 Fudge the offset value to achieve this.
2282 Otherwise, in the case of UTF-8 matching, the advance must be one
2283 character, not one byte. */
2287 if (g_notempty != 0)
2290 unsigned int obits = ((real_pcre *)re)->options;
2291 use_offsets[0] = start_offset;
2292 if ((obits & PCRE_NEWLINE_BITS) == 0)
2295 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2296 obits = (d == '\r')? PCRE_NEWLINE_CR :
2297 (d == '\n')? PCRE_NEWLINE_LF :
2298 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2299 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2300 (d == -1)? PCRE_NEWLINE_ANY : 0;
2302 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2303 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2305 start_offset < len - 1 &&
2306 bptr[start_offset] == '\r' &&
2307 bptr[start_offset+1] == '\n')
2311 while (start_offset + onechar < len)
2313 int tb = bptr[start_offset+onechar];
2314 if (tb <= 127) break;
2316 if (tb != 0 && tb != 0xc0) onechar++;
2319 use_offsets[1] = start_offset + onechar;
2323 if (count == PCRE_ERROR_NOMATCH)
2325 if (gmatched == 0) fprintf(outfile, "No match\n");
2327 else fprintf(outfile, "Error %d\n", count);
2328 break; /* Out of the /g loop */
2332 /* If not /g or /G we are done */
2334 if (!do_g && !do_G) break;
2336 /* If we have matched an empty string, first check to see if we are at
2337 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2338 what Perl's /g options does. This turns out to be rather cunning. First
2339 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2340 same point. If this fails (picked up above) we advance to the next
2345 if (use_offsets[0] == use_offsets[1])
2347 if (use_offsets[0] == len) break;
2348 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2351 /* For /g, update the start offset, leaving the rest alone */
2353 if (do_g) start_offset = use_offsets[1];
2355 /* For /G, update the pointer and length */
2359 bptr += use_offsets[1];
2360 len -= use_offsets[1];
2362 } /* End of loop for /g and /G */
2364 NEXT_DATA: continue;
2365 } /* End of loop for data lines */
2369 #if !defined NOPOSIX
2370 if (posix || do_posix) regfree(&preg);
2373 if (re != NULL) new_free(re);
2374 if (extra != NULL) new_free(extra);
2377 new_free((void *)tables);
2378 setlocale(LC_CTYPE, "C");
2383 if (infile == stdin) fprintf(outfile, "\n");
2387 if (infile != NULL && infile != stdin) fclose(infile);
2388 if (outfile != NULL && outfile != stdout) fclose(outfile);
2398 /* End of pcretest.c */