1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.7 2007/06/26 11:16:54 ph10 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
50 /* A number of things vary for Windows builds. Originally, pcretest opened its
51 input and output without "b"; then I was told that "b" was needed in some
52 environments, so it was added for release 5.0 to both the input and output. (It
53 makes no difference on Unix-like systems.) Later I was told that it is wrong
54 for the input on Windows. I've now abstracted the modes into two macros that
55 are set here, to make it easier to fiddle with them, and removed "b" from the
56 input mode under Windows. */
58 #if defined(_WIN32) || defined(WIN32)
59 #include <io.h> /* For _setmode() */
60 #include <fcntl.h> /* For _O_BINARY */
61 #define INPUT_MODE "r"
62 #define OUTPUT_MODE "wb"
65 #include <sys/time.h> /* These two includes are needed */
66 #include <sys/resource.h> /* for setrlimit(). */
67 #define INPUT_MODE "rb"
68 #define OUTPUT_MODE "wb"
72 /* We have to include pcre_internal.h because we need the internal info for
73 displaying the results of pcre_study() and we also need to know about the
74 internal macros, structures, and other internal data values; pcretest has
75 "inside information" compared to a program that strictly follows the PCRE API.
77 Although pcre_internal.h does itself include pcre.h, we explicitly include it
78 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
79 appropriately for an application, not for building PCRE. */
82 #include "pcre_internal.h"
84 /* We need access to the data tables that PCRE uses. So as not to have to keep
85 two copies, we include the source file here, changing the names of the external
86 symbols to prevent clashes. */
88 #define _pcre_utf8_table1 utf8_table1
89 #define _pcre_utf8_table1_size utf8_table1_size
90 #define _pcre_utf8_table2 utf8_table2
91 #define _pcre_utf8_table3 utf8_table3
92 #define _pcre_utf8_table4 utf8_table4
94 #define _pcre_utt_size utt_size
95 #define _pcre_OP_lengths OP_lengths
97 #include "pcre_tables.c"
99 /* We also need the pcre_printint() function for printing out compiled
100 patterns. This function is in a separate file so that it can be included in
101 pcre_compile.c when that module is compiled with debugging enabled.
103 The definition of the macro PRINTABLE, which determines whether to print an
104 output character as-is or as a hex value when showing compiled patterns, is
105 contained in this file. We uses it here also, in cases when the locale has not
106 been explicitly changed, so as to get consistent output from systems that
107 differ in their output from isprint() even in the "C" locale. */
109 #include "pcre_printint.src"
111 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114 /* It is possible to compile this test program without including support for
115 testing the POSIX interface, though this is not available via the standard
119 #include "pcreposix.h"
122 /* It is also possible, for the benefit of the version currently imported into
123 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
124 interface to the DFA matcher (NODFA), and without the doublecheck of the old
125 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
126 UTF8 support if PCRE is built without it. */
135 /* Other parameters */
137 #ifndef CLOCKS_PER_SEC
139 #define CLOCKS_PER_SEC CLK_TCK
141 #define CLOCKS_PER_SEC 100
145 /* This is the default loop count for timing. */
147 #define LOOPREPEAT 500000
149 /* Static variables */
151 static FILE *outfile;
152 static int log_store = 0;
153 static int callout_count;
154 static int callout_extra;
155 static int callout_fail_count;
156 static int callout_fail_id;
157 static int first_callout;
158 static int locale_set = 0;
159 static int show_malloc;
161 static size_t gotten_store;
163 /* The buffers grow automatically if very long input lines are encountered. */
165 static int buffer_size = 50000;
166 static uschar *buffer = NULL;
167 static uschar *dbuffer = NULL;
168 static uschar *pbuffer = NULL;
172 /*************************************************
173 * Read or extend an input line *
174 *************************************************/
176 /* Input lines are read into buffer, but both patterns and data lines can be
177 continued over multiple input lines. In addition, if the buffer fills up, we
178 want to automatically expand it so as to be able to handle extremely large
179 lines that are needed for certain stress tests. When the input buffer is
180 expanded, the other two buffers must also be expanded likewise, and the
181 contents of pbuffer, which are a copy of the input for callouts, must be
182 preserved (for when expansion happens for a data line). This is not the most
183 optimal way of handling this, but hey, this is just a test program!
187 start where in buffer to start (this *must* be within buffer)
189 Returns: pointer to the start of new data
190 could be a copy of start, or could be moved
191 NULL if no data read and EOF reached
195 extend_inputline(FILE *f, uschar *start)
197 uschar *here = start;
201 int rlen = buffer_size - (here - buffer);
206 if (fgets((char *)here, rlen, f) == NULL)
207 return (here == start)? NULL : start;
208 dlen = (int)strlen((char *)here);
209 if (dlen > 0 && here[dlen - 1] == '\n') return start;
215 int new_buffer_size = 2*buffer_size;
216 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
217 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
218 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
220 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
222 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 memcpy(new_buffer, buffer, buffer_size);
227 memcpy(new_pbuffer, pbuffer, buffer_size);
229 buffer_size = new_buffer_size;
231 start = new_buffer + (start - buffer);
232 here = new_buffer + (here - buffer);
239 dbuffer = new_dbuffer;
240 pbuffer = new_pbuffer;
244 return NULL; /* Control never gets here */
253 /*************************************************
254 * Read number from string *
255 *************************************************/
257 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
258 around with conditional compilation, just do the job by hand. It is only used
259 for unpicking arguments, so just keep it simple.
262 str string to be converted
263 endptr where to put the end pointer
265 Returns: the unsigned long
269 get_value(unsigned char *str, unsigned char **endptr)
272 while(*str != 0 && isspace(*str)) str++;
273 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
281 /*************************************************
282 * Convert UTF-8 string to value *
283 *************************************************/
285 /* This function takes one or more bytes that represents a UTF-8 character,
286 and returns the value of the character.
289 utf8bytes a pointer to the byte vector
290 vptr a pointer to an int to receive the value
292 Returns: > 0 => the number of bytes consumed
293 -6 to 0 => malformed UTF-8 character at offset = (-return)
299 utf82ord(unsigned char *utf8bytes, int *vptr)
301 int c = *utf8bytes++;
305 for (i = -1; i < 6; i++) /* i is number of additional bytes */
307 if ((d & 0x80) == 0) break;
311 if (i == -1) { *vptr = c; return 1; } /* ascii character */
312 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
314 /* i now has a value in the range 1-5 */
317 d = (c & utf8_table3[i]) << s;
319 for (j = 0; j < i; j++)
322 if ((c & 0xc0) != 0x80) return -(j+1);
324 d |= (c & 0x3f) << s;
327 /* Check that encoding was the correct unique one */
329 for (j = 0; j < utf8_table1_size; j++)
330 if (d <= utf8_table1[j]) break;
331 if (j != i) return -(i+1);
343 /*************************************************
344 * Convert character value to UTF-8 *
345 *************************************************/
347 /* This function takes an integer value in the range 0 - 0x7fffffff
348 and encodes it as a UTF-8 character in 0 to 6 bytes.
351 cvalue the character value
352 utf8bytes pointer to buffer for result - at least 6 bytes long
354 Returns: number of characters placed in the buffer
360 ord2utf8(int cvalue, uschar *utf8bytes)
363 for (i = 0; i < utf8_table1_size; i++)
364 if (cvalue <= utf8_table1[i]) break;
366 for (j = i; j > 0; j--)
368 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371 *utf8bytes = utf8_table2[i] | cvalue;
379 /*************************************************
380 * Print character string *
381 *************************************************/
383 /* Character string printing function. Must handle UTF-8 strings in utf8
384 mode. Yields number of characters printed. If handed a NULL file, just counts
385 chars without printing. */
387 static int pchars(unsigned char *p, int length, FILE *f)
397 int rc = utf82ord(p, &c);
399 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
405 if (f != NULL) fprintf(f, "%c", c);
411 if (f != NULL) fprintf(f, "\\x{%02x}", c);
412 yield += (n <= 0x000000ff)? 2 :
413 (n <= 0x00000fff)? 3 :
414 (n <= 0x0000ffff)? 4 :
415 (n <= 0x000fffff)? 5 : 6;
422 /* Not UTF-8, or malformed UTF-8 */
427 if (f != NULL) fprintf(f, "%c", c);
432 if (f != NULL) fprintf(f, "\\x%02x", c);
442 /*************************************************
444 *************************************************/
446 /* Called from PCRE as a result of the (?C) item. We print out where we are in
447 the match. Yield zero unless more callouts than the fail count, or the callout
450 static int callout(pcre_callout_block *cb)
452 FILE *f = (first_callout | callout_extra)? outfile : NULL;
453 int i, pre_start, post_start, subject_length;
457 fprintf(f, "Callout %d: last capture = %d\n",
458 cb->callout_number, cb->capture_last);
460 for (i = 0; i < cb->capture_top * 2; i += 2)
462 if (cb->offset_vector[i] < 0)
463 fprintf(f, "%2d: <unset>\n", i/2);
466 fprintf(f, "%2d: ", i/2);
467 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
468 cb->offset_vector[i+1] - cb->offset_vector[i], f);
474 /* Re-print the subject in canonical form, the first time or if giving full
475 datails. On subsequent calls in the same match, we use pchars just to find the
476 printed lengths of the substrings. */
478 if (f != NULL) fprintf(f, "--->");
480 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
481 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
482 cb->current_position - cb->start_match, f);
484 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
486 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
487 cb->subject_length - cb->current_position, f);
489 if (f != NULL) fprintf(f, "\n");
491 /* Always print appropriate indicators, with callout number if not already
492 shown. For automatic callouts, show the pattern offset. */
494 if (cb->callout_number == 255)
496 fprintf(outfile, "%+3d ", cb->pattern_position);
497 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 if (callout_extra) fprintf(outfile, " ");
502 else fprintf(outfile, "%3d ", cb->callout_number);
505 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
506 fprintf(outfile, "^");
510 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
511 fprintf(outfile, "^");
514 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
515 fprintf(outfile, " ");
517 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
518 pbuffer + cb->pattern_position);
520 fprintf(outfile, "\n");
523 if (cb->callout_data != NULL)
525 int callout_data = *((int *)(cb->callout_data));
526 if (callout_data != 0)
528 fprintf(outfile, "Callout data = %d\n", callout_data);
533 return (cb->callout_number != callout_fail_id)? 0 :
534 (++callout_count >= callout_fail_count)? 1 : 0;
538 /*************************************************
539 * Local malloc functions *
540 *************************************************/
542 /* Alternative malloc function, to test functionality and show the size of the
545 static void *new_malloc(size_t size)
547 void *block = malloc(size);
550 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 static void new_free(void *block)
557 fprintf(outfile, "free %p\n", block);
562 /* For recursion malloc/free, to test stacking calls */
564 static void *stack_malloc(size_t size)
566 void *block = malloc(size);
568 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 static void stack_free(void *block)
575 fprintf(outfile, "stack_free %p\n", block);
580 /*************************************************
581 * Call pcre_fullinfo() *
582 *************************************************/
584 /* Get one piece of information from the pcre_fullinfo() function */
586 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
589 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
590 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
595 /*************************************************
596 * Byte flipping function *
597 *************************************************/
599 static unsigned long int
600 byteflip(unsigned long int value, int n)
602 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
603 return ((value & 0x000000ff) << 24) |
604 ((value & 0x0000ff00) << 8) |
605 ((value & 0x00ff0000) >> 8) |
606 ((value & 0xff000000) >> 24);
612 /*************************************************
613 * Check match or recursion limit *
614 *************************************************/
617 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
618 int start_offset, int options, int *use_offsets, int use_size_offsets,
619 int flag, unsigned long int *limit, int errnumber, const char *msg)
626 extra->flags |= flag;
632 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
633 use_offsets, use_size_offsets);
635 if (count == errnumber)
637 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
643 count == PCRE_ERROR_PARTIAL)
647 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 else break; /* Some other error */
657 extra->flags &= ~flag;
663 /*************************************************
664 * Check newline indicator *
665 *************************************************/
667 /* This is used both at compile and run-time to check for <xxx> escapes, where
668 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672 p points after the leading '<'
673 f file for error message
675 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
679 check_newline(uschar *p, FILE *f)
681 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
682 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
683 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
684 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
685 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
686 fprintf(f, "Unknown newline type at: <%s\n", p);
692 /*************************************************
694 *************************************************/
699 printf("Usage: pcretest [options] [<input> [<output>]]\n");
700 printf(" -b show compiled code (bytecode)\n");
701 printf(" -C show PCRE compile-time options and exit\n");
702 printf(" -d debug: show compiled code and information (-b and -i)\n");
704 printf(" -dfa force DFA matching for all subjects\n");
706 printf(" -help show usage information\n");
707 printf(" -i show information about compiled patterns\n"
708 " -m output memory used information\n"
709 " -o <n> set size of offsets vector to <n>\n");
711 printf(" -p use POSIX interface\n");
713 printf(" -q quiet: do not output PCRE version number at start\n");
714 printf(" -S <n> set stack size to <n> megabytes\n");
715 printf(" -s output store (memory) used information\n"
716 " -t time compilation and execution\n");
717 printf(" -t <n> time compilation and execution, repeating <n> times\n");
718 printf(" -tm time execution (matching) only\n");
719 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
724 /*************************************************
726 *************************************************/
728 /* Read lines from named file or stdin and write to named file or stdout; lines
729 consist of a regular expression, in delimiters and optionally followed by
730 options, followed by a set of test data, terminated by an empty line. */
732 int main(int argc, char **argv)
734 FILE *infile = stdin;
736 int study_options = 0;
743 int size_offsets = 45;
744 int size_offsets_max;
755 /* These vectors store, end-to-end, a list of captured substring names. Assume
756 that 1024 is plenty long enough for the few names we'll be testing. */
758 uschar copynames[1024];
759 uschar getnames[1024];
761 uschar *copynamesptr;
764 /* Get buffers from malloc() so that Electric Fence will check their misuse
765 when I am debugging. They grow automatically when very long lines are read. */
767 buffer = (unsigned char *)malloc(buffer_size);
768 dbuffer = (unsigned char *)malloc(buffer_size);
769 pbuffer = (unsigned char *)malloc(buffer_size);
771 /* The outfile variable is static so that new_malloc can use it. */
775 /* The following _setmode() stuff is some Windows magic that tells its runtime
776 library to translate CRLF into a single LF character. At least, that's what
777 I've been told: never having used Windows I take this all on trust. Originally
778 it set 0x8000, but then I was advised that _O_BINARY was better. */
780 #if defined(_WIN32) || defined(WIN32)
781 _setmode( _fileno( stdout ), _O_BINARY );
786 while (argc > 1 && argv[op][0] == '-')
788 unsigned char *endptr;
790 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
792 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
793 else if (strcmp(argv[op], "-b") == 0) debug = 1;
794 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
795 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
797 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
799 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
800 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
806 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
808 int both = argv[op][2] == 0;
810 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
817 else timeitm = LOOPREPEAT;
818 if (both) timeit = timeitm;
820 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
821 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824 #if defined(_WIN32) || defined(WIN32)
825 printf("PCRE: -S not supported on this OS\n");
830 getrlimit(RLIMIT_STACK, &rlim);
831 rlim.rlim_cur = stack_size * 1024 * 1024;
832 rc = setrlimit(RLIMIT_STACK, &rlim);
835 printf("PCRE: setrlimit() failed with error %d\n", rc);
843 else if (strcmp(argv[op], "-p") == 0) posix = 1;
845 else if (strcmp(argv[op], "-C") == 0)
848 printf("PCRE version %s\n", pcre_version());
849 printf("Compiled with\n");
850 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
851 printf(" %sUTF-8 support\n", rc? "" : "No ");
852 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
853 printf(" %sUnicode properties support\n", rc? "" : "No ");
854 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
855 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
856 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
857 (rc == -2)? "ANYCRLF" :
858 (rc == -1)? "ANY" : "???");
859 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
860 printf(" Internal link size = %d\n", rc);
861 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
862 printf(" POSIX malloc threshold = %d\n", rc);
863 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
864 printf(" Default match limit = %d\n", rc);
865 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
866 printf(" Default recursion depth limit = %d\n", rc);
867 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
868 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
871 else if (strcmp(argv[op], "-help") == 0 ||
872 strcmp(argv[op], "--help") == 0)
879 printf("** Unknown or malformed option %s\n", argv[op]);
888 /* Get the store for the offsets vector, and remember what it was */
890 size_offsets_max = size_offsets;
891 offsets = (int *)malloc(size_offsets_max * sizeof(int));
894 printf("** Failed to get %d bytes of memory for offsets vector\n",
895 (int)(size_offsets_max * sizeof(int)));
900 /* Sort out the input and output files */
904 infile = fopen(argv[op], INPUT_MODE);
907 printf("** Failed to open %s\n", argv[op]);
915 outfile = fopen(argv[op+1], OUTPUT_MODE);
918 printf("** Failed to open %s\n", argv[op+1]);
924 /* Set alternative malloc function */
926 pcre_malloc = new_malloc;
927 pcre_free = new_free;
928 pcre_stack_malloc = stack_malloc;
929 pcre_stack_free = stack_free;
931 /* Heading line unless quiet, then prompt for first regex if stdin */
933 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
940 pcre_extra *extra = NULL;
942 #if !defined NOPOSIX /* There are still compilers that require no indent */
948 unsigned char *p, *pp, *ppp;
949 unsigned char *to_file = NULL;
950 const unsigned char *tables = NULL;
951 unsigned long int true_size, true_study_size = 0;
952 size_t size, regex_gotten_store;
954 int do_debug = debug;
955 int debug_lengths = 1;
958 int do_showinfo = showinfo;
961 int erroroffset, len, delimiter, poffset;
965 if (infile == stdin) printf(" re> ");
966 if (extend_inputline(infile, buffer) == NULL) break;
967 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
971 while (isspace(*p)) p++;
972 if (*p == 0) continue;
974 /* See if the pattern is to be loaded pre-compiled from a file. */
976 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
978 unsigned long int magic, get_options;
983 pp = p + (int)strlen((char *)p);
984 while (isspace(pp[-1])) pp--;
987 f = fopen((char *)p, "rb");
990 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
994 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
999 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1001 re = (real_pcre *)new_malloc(true_size);
1002 regex_gotten_store = gotten_store;
1004 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1006 magic = ((real_pcre *)re)->magic_number;
1007 if (magic != MAGIC_NUMBER)
1009 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1015 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1021 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1022 do_flip? " (byte-inverted)" : "", p);
1024 /* Need to know if UTF-8 for printing data strings */
1026 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1027 use_utf8 = (get_options & PCRE_UTF8) != 0;
1029 /* Now see if there is any following study data */
1031 if (true_study_size != 0)
1033 pcre_study_data *psd;
1035 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1036 extra->flags = PCRE_EXTRA_STUDY_DATA;
1038 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1039 extra->study_data = psd;
1041 if (fread(psd, 1, true_study_size, f) != true_study_size)
1044 fprintf(outfile, "Failed to read data from %s\n", p);
1045 if (extra != NULL) new_free(extra);
1046 if (re != NULL) new_free(re);
1050 fprintf(outfile, "Study data loaded from %s\n", p);
1051 do_study = 1; /* To get the data output if requested */
1053 else fprintf(outfile, "No study data\n");
1059 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1060 the pattern; if is isn't complete, read more. */
1064 if (isalnum(delimiter) || delimiter == '\\')
1066 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1071 poffset = p - buffer;
1077 if (*pp == '\\' && pp[1] != 0) pp++;
1078 else if (*pp == delimiter) break;
1081 if (*pp != 0) break;
1082 if (infile == stdin) printf(" > ");
1083 if ((pp = extend_inputline(infile, pp)) == NULL)
1085 fprintf(outfile, "** Unexpected EOF\n");
1089 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092 /* The buffer may have moved while being extended; reset the start of data
1093 pointer to the correct relative point in the buffer. */
1095 p = buffer + poffset;
1097 /* If the first character after the delimiter is backslash, make
1098 the pattern end with backslash. This is purely to provide a way
1099 of testing for the error message when a pattern ends with backslash. */
1101 if (pp[1] == '\\') *pp++ = '\\';
1103 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1107 strcpy((char *)pbuffer, (char *)p);
1109 /* Look for options after final delimiter */
1113 log_store = showstore; /* default from command line */
1119 case 'f': options |= PCRE_FIRSTLINE; break;
1120 case 'g': do_g = 1; break;
1121 case 'i': options |= PCRE_CASELESS; break;
1122 case 'm': options |= PCRE_MULTILINE; break;
1123 case 's': options |= PCRE_DOTALL; break;
1124 case 'x': options |= PCRE_EXTENDED; break;
1126 case '+': do_showrest = 1; break;
1127 case 'A': options |= PCRE_ANCHORED; break;
1128 case 'B': do_debug = 1; break;
1129 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1130 case 'D': do_debug = do_showinfo = 1; break;
1131 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1132 case 'F': do_flip = 1; break;
1133 case 'G': do_G = 1; break;
1134 case 'I': do_showinfo = 1; break;
1135 case 'J': options |= PCRE_DUPNAMES; break;
1136 case 'M': log_store = 1; break;
1137 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1139 #if !defined NOPOSIX
1140 case 'P': do_posix = 1; break;
1143 case 'S': do_study = 1; break;
1144 case 'U': options |= PCRE_UNGREEDY; break;
1145 case 'X': options |= PCRE_EXTRA; break;
1146 case 'Z': debug_lengths = 0; break;
1147 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1148 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152 /* The '\r' test here is so that it works on Windows. */
1153 /* The '0' test is just in case this is an unterminated line. */
1154 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1156 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1158 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1162 tables = pcre_maketables();
1168 while (*pp != 0) pp++;
1169 while (isspace(pp[-1])) pp--;
1175 int x = check_newline(pp, outfile);
1176 if (x == 0) goto SKIP_DATA;
1178 while (*pp++ != '>');
1182 case '\r': /* So that it works in Windows */
1188 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1193 /* Handle compiling via the POSIX interface, which doesn't support the
1194 timing, showing, or debugging options, nor the ability to pass over
1195 local character tables. */
1197 #if !defined NOPOSIX
1198 if (posix || do_posix)
1203 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1204 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1205 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1206 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1207 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1209 rc = regcomp(&preg, (char *)p, cflags);
1211 /* Compilation failed; go back for another re, skipping to blank line
1212 if non-interactive. */
1216 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1217 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1222 /* Handle compiling via the native interface */
1225 #endif /* !defined NOPOSIX */
1232 clock_t start_time = clock();
1233 for (i = 0; i < timeit; i++)
1235 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1236 if (re != NULL) free(re);
1238 time_taken = clock() - start_time;
1239 fprintf(outfile, "Compile time %.4f milliseconds\n",
1240 (((double)time_taken * 1000.0) / (double)timeit) /
1241 (double)CLOCKS_PER_SEC);
1244 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1246 /* Compilation failed; go back for another re, skipping to blank line
1247 if non-interactive. */
1251 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1253 if (infile != stdin)
1257 if (extend_inputline(infile, buffer) == NULL)
1262 len = (int)strlen((char *)buffer);
1263 while (len > 0 && isspace(buffer[len-1])) len--;
1264 if (len == 0) break;
1266 fprintf(outfile, "\n");
1271 /* Compilation succeeded; print data if required. There are now two
1272 info-returning functions. The old one has a limited interface and
1273 returns only limited data. Check that it agrees with the newer one. */
1276 fprintf(outfile, "Memory allocation (code space): %d\n",
1277 (int)(gotten_store -
1279 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1281 /* Extract the size for possible writing before possibly flipping it,
1282 and remember the store that was got. */
1284 true_size = ((real_pcre *)re)->size;
1285 regex_gotten_store = gotten_store;
1287 /* If /S was present, study the regexp to generate additional info to
1288 help with the matching. */
1296 clock_t start_time = clock();
1297 for (i = 0; i < timeit; i++)
1298 extra = pcre_study(re, study_options, &error);
1299 time_taken = clock() - start_time;
1300 if (extra != NULL) free(extra);
1301 fprintf(outfile, " Study time %.4f milliseconds\n",
1302 (((double)time_taken * 1000.0) / (double)timeit) /
1303 (double)CLOCKS_PER_SEC);
1305 extra = pcre_study(re, study_options, &error);
1307 fprintf(outfile, "Failed to study: %s\n", error);
1308 else if (extra != NULL)
1309 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312 /* If the 'F' option was present, we flip the bytes of all the integer
1313 fields in the regex data block and the study block. This is to make it
1314 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1315 compiled on a different architecture. */
1319 real_pcre *rre = (real_pcre *)re;
1320 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1321 rre->size = byteflip(rre->size, sizeof(rre->size));
1322 rre->options = byteflip(rre->options, sizeof(rre->options));
1323 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1324 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1325 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1326 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1327 rre->name_table_offset = byteflip(rre->name_table_offset,
1328 sizeof(rre->name_table_offset));
1329 rre->name_entry_size = byteflip(rre->name_entry_size,
1330 sizeof(rre->name_entry_size));
1331 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1335 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1336 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1337 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1341 /* Extract information from the compiled data if required */
1347 fprintf(outfile, "------------------------------------------------------------------\n");
1348 pcre_printint(re, outfile, debug_lengths);
1353 unsigned long int get_options, all_options;
1354 #if !defined NOINFOCHECK
1355 int old_first_char, old_options, old_count;
1357 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1358 int nameentrysize, namecount;
1359 const uschar *nametable;
1361 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1362 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1363 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1364 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1365 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1366 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1367 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1368 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1369 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1370 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1371 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1373 #if !defined NOINFOCHECK
1374 old_count = pcre_info(re, &old_options, &old_first_char);
1375 if (count < 0) fprintf(outfile,
1376 "Error %d from pcre_info()\n", count);
1379 if (old_count != count) fprintf(outfile,
1380 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1383 if (old_first_char != first_char) fprintf(outfile,
1384 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1385 first_char, old_first_char);
1387 if (old_options != (int)get_options) fprintf(outfile,
1388 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1389 get_options, old_options);
1393 if (size != regex_gotten_store) fprintf(outfile,
1394 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1395 (int)size, (int)regex_gotten_store);
1397 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1399 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1403 fprintf(outfile, "Named capturing subpatterns:\n");
1404 while (namecount-- > 0)
1406 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1407 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1408 GET2(nametable, 0));
1409 nametable += nameentrysize;
1413 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1415 all_options = ((real_pcre *)re)->options;
1416 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1418 if (get_options == 0) fprintf(outfile, "No options\n");
1419 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1423 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1424 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1425 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1426 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1427 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1428 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1434 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1436 switch (get_options & PCRE_NEWLINE_BITS)
1438 case PCRE_NEWLINE_CR:
1439 fprintf(outfile, "Forced newline sequence: CR\n");
1442 case PCRE_NEWLINE_LF:
1443 fprintf(outfile, "Forced newline sequence: LF\n");
1446 case PCRE_NEWLINE_CRLF:
1447 fprintf(outfile, "Forced newline sequence: CRLF\n");
1450 case PCRE_NEWLINE_ANYCRLF:
1451 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454 case PCRE_NEWLINE_ANY:
1455 fprintf(outfile, "Forced newline sequence: ANY\n");
1462 if (first_char == -1)
1464 fprintf(outfile, "First char at start or follows newline\n");
1466 else if (first_char < 0)
1468 fprintf(outfile, "No first char\n");
1472 int ch = first_char & 255;
1473 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1478 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1483 fprintf(outfile, "No need char\n");
1487 int ch = need_char & 255;
1488 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1493 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496 /* Don't output study size; at present it is in any case a fixed
1497 value, but it varies, depending on the computer architecture, and
1498 so messes up the test suite. (And with the /F option, it might be
1504 fprintf(outfile, "Study returned NULL\n");
1507 uschar *start_bits = NULL;
1508 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1510 if (start_bits == NULL)
1511 fprintf(outfile, "No starting byte set\n");
1516 fprintf(outfile, "Starting byte set: ");
1517 for (i = 0; i < 256; i++)
1519 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1523 fprintf(outfile, "\n ");
1526 if (PRINTHEX(i) && i != ' ')
1528 fprintf(outfile, "%c ", i);
1533 fprintf(outfile, "\\x%02x ", i);
1538 fprintf(outfile, "\n");
1544 /* If the '>' option was present, we write out the regex to a file, and
1545 that is all. The first 8 bytes of the file are the regex length and then
1546 the study length, in big-endian order. */
1548 if (to_file != NULL)
1550 FILE *f = fopen((char *)to_file, "wb");
1553 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1558 sbuf[0] = (true_size >> 24) & 255;
1559 sbuf[1] = (true_size >> 16) & 255;
1560 sbuf[2] = (true_size >> 8) & 255;
1561 sbuf[3] = (true_size) & 255;
1563 sbuf[4] = (true_study_size >> 24) & 255;
1564 sbuf[5] = (true_study_size >> 16) & 255;
1565 sbuf[6] = (true_study_size >> 8) & 255;
1566 sbuf[7] = (true_study_size) & 255;
1568 if (fwrite(sbuf, 1, 8, f) < 8 ||
1569 fwrite(re, 1, true_size, f) < true_size)
1571 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1575 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578 if (fwrite(extra->study_data, 1, true_study_size, f) <
1581 fprintf(outfile, "Write error on %s: %s\n", to_file,
1584 else fprintf(outfile, "Study data written to %s\n", to_file);
1592 if (extra != NULL) new_free(extra);
1593 if (tables != NULL) new_free((void *)tables);
1594 continue; /* With next regex */
1596 } /* End of non-POSIX compile */
1598 /* Read data lines and test them */
1604 int *use_offsets = offsets;
1605 int use_size_offsets = size_offsets;
1606 int callout_data = 0;
1607 int callout_data_set = 0;
1609 int copystrings = 0;
1610 int find_match_limit = 0;
1614 int start_offset = 0;
1623 copynamesptr = copynames;
1624 getnamesptr = getnames;
1626 pcre_callout = callout;
1630 callout_fail_count = 999999;
1631 callout_fail_id = -1;
1634 if (extra != NULL) extra->flags &=
1635 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1640 if (infile == stdin) printf("data> ");
1641 if (extend_inputline(infile, buffer + len) == NULL)
1647 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1648 len = (int)strlen((char *)buffer);
1649 if (buffer[len-1] == '\n') break;
1652 while (len > 0 && isspace(buffer[len-1])) len--;
1654 if (len == 0) break;
1657 while (isspace(*p)) p++;
1660 while ((c = *p++) != 0)
1665 if (c == '\\') switch ((c = *p++))
1667 case 'a': c = 7; break;
1668 case 'b': c = '\b'; break;
1669 case 'e': c = 27; break;
1670 case 'f': c = '\f'; break;
1671 case 'n': c = '\n'; break;
1672 case 'r': c = '\r'; break;
1673 case 't': c = '\t'; break;
1674 case 'v': c = '\v'; break;
1676 case '0': case '1': case '2': case '3':
1677 case '4': case '5': case '6': case '7':
1679 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1680 c = c * 8 + *p++ - '0';
1683 if (use_utf8 && c > 255)
1685 unsigned char buff8[8];
1687 utn = ord2utf8(c, buff8);
1688 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1689 c = buff8[ii]; /* Last byte */
1696 /* Handle \x{..} specially - new Perl thing for utf8 */
1701 unsigned char *pt = p;
1703 while (isxdigit(*(++pt)))
1704 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1707 unsigned char buff8[8];
1709 utn = ord2utf8(c, buff8);
1710 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1711 c = buff8[ii]; /* Last byte */
1715 /* Not correct form; fall through */
1722 while (i++ < 2 && isxdigit(*p))
1724 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1729 case 0: /* \ followed by EOF allows for an empty line */
1734 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737 case 'A': /* Option setting */
1738 options |= PCRE_ANCHORED;
1742 options |= PCRE_NOTBOL;
1746 if (isdigit(*p)) /* Set copy string */
1748 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1749 copystrings |= 1 << n;
1751 else if (isalnum(*p))
1753 uschar *npp = copynamesptr;
1754 while (isalnum(*p)) *npp++ = *p++;
1757 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1759 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1769 pcre_callout = NULL;
1774 callout_fail_id = 0;
1777 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1778 callout_fail_count = 0;
1783 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1790 if (*(++p) == '-') { sign = -1; p++; }
1792 callout_data = callout_data * 10 + *p++ - '0';
1793 callout_data *= sign;
1794 callout_data_set = 1;
1800 #if !defined NOPOSIX
1801 if (posix || do_posix)
1802 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1809 options |= PCRE_DFA_SHORTEST;
1816 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1817 getstrings |= 1 << n;
1819 else if (isalnum(*p))
1821 uschar *npp = getnamesptr;
1822 while (isalnum(*p)) *npp++ = *p++;
1825 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1827 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1837 find_match_limit = 1;
1841 options |= PCRE_NOTEMPTY;
1845 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1846 if (n > size_offsets_max)
1848 size_offsets_max = n;
1850 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1851 if (offsets == NULL)
1853 printf("** Failed to get %d bytes of memory for offsets vector\n",
1854 (int)(size_offsets_max * sizeof(int)));
1859 use_size_offsets = n;
1860 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1864 options |= PCRE_PARTIAL;
1868 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1875 extra->match_limit_recursion = n;
1879 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1886 extra->match_limit = n;
1891 options |= PCRE_DFA_RESTART;
1900 options |= PCRE_NOTEOL;
1904 options |= PCRE_NO_UTF8_CHECK;
1909 int x = check_newline(p, outfile);
1910 if (x == 0) goto NEXT_DATA;
1912 while (*p++ != '>');
1921 if ((all_use_dfa || use_dfa) && find_match_limit)
1923 printf("**Match limit not relevant for DFA matching: ignored\n");
1924 find_match_limit = 0;
1927 /* Handle matching via the POSIX interface, which does not
1928 support timing or playing with the match limit or callout data. */
1930 #if !defined NOPOSIX
1931 if (posix || do_posix)
1935 regmatch_t *pmatch = NULL;
1936 if (use_size_offsets > 0)
1937 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1938 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1939 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1941 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1945 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1946 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1948 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951 fprintf(outfile, "Matched with REG_NOSUB\n");
1956 for (i = 0; i < (size_t)use_size_offsets; i++)
1958 if (pmatch[i].rm_so >= 0)
1960 fprintf(outfile, "%2d: ", (int)i);
1961 (void)pchars(dbuffer + pmatch[i].rm_so,
1962 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1963 fprintf(outfile, "\n");
1964 if (i == 0 && do_showrest)
1966 fprintf(outfile, " 0+ ");
1967 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1969 fprintf(outfile, "\n");
1977 /* Handle matching via the native interface - repeats for /g and /G */
1980 #endif /* !defined NOPOSIX */
1982 for (;; gmatched++) /* Loop for /g or /G */
1988 clock_t start_time = clock();
1991 if (all_use_dfa || use_dfa)
1993 int workspace[1000];
1994 for (i = 0; i < timeitm; i++)
1995 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1996 options | g_notempty, use_offsets, use_size_offsets, workspace,
1997 sizeof(workspace)/sizeof(int));
2002 for (i = 0; i < timeitm; i++)
2003 count = pcre_exec(re, extra, (char *)bptr, len,
2004 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2006 time_taken = clock() - start_time;
2007 fprintf(outfile, "Execute time %.4f milliseconds\n",
2008 (((double)time_taken * 1000.0) / (double)timeitm) /
2009 (double)CLOCKS_PER_SEC);
2012 /* If find_match_limit is set, we want to do repeated matches with
2013 varying limits in order to find the minimum value for the match limit and
2014 for the recursion limit. */
2016 if (find_match_limit)
2020 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024 (void)check_match_limit(re, extra, bptr, len, start_offset,
2025 options|g_notempty, use_offsets, use_size_offsets,
2026 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2027 PCRE_ERROR_MATCHLIMIT, "match()");
2029 count = check_match_limit(re, extra, bptr, len, start_offset,
2030 options|g_notempty, use_offsets, use_size_offsets,
2031 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2032 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2035 /* If callout_data is set, use the interface with additional data */
2037 else if (callout_data_set)
2041 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2044 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2045 extra->callout_data = &callout_data;
2046 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2047 options | g_notempty, use_offsets, use_size_offsets);
2048 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2051 /* The normal case is just to do the match once, with the default
2052 value of match_limit. */
2055 else if (all_use_dfa || use_dfa)
2057 int workspace[1000];
2058 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2059 options | g_notempty, use_offsets, use_size_offsets, workspace,
2060 sizeof(workspace)/sizeof(int));
2063 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2064 count = use_size_offsets/2;
2071 count = pcre_exec(re, extra, (char *)bptr, len,
2072 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075 fprintf(outfile, "Matched, but too many substrings\n");
2076 count = use_size_offsets/3;
2087 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2089 maxcount = use_size_offsets/3;
2091 /* This is a check against a lunatic return value. */
2093 if (count > maxcount)
2096 "** PCRE error: returned count %d is too big for offset size %d\n",
2097 count, use_size_offsets);
2098 count = use_size_offsets/3;
2101 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2102 do_g = do_G = FALSE; /* Break g/G loop */
2106 for (i = 0; i < count * 2; i += 2)
2108 if (use_offsets[i] < 0)
2109 fprintf(outfile, "%2d: <unset>\n", i/2);
2112 fprintf(outfile, "%2d: ", i/2);
2113 (void)pchars(bptr + use_offsets[i],
2114 use_offsets[i+1] - use_offsets[i], outfile);
2115 fprintf(outfile, "\n");
2120 fprintf(outfile, " 0+ ");
2121 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2123 fprintf(outfile, "\n");
2129 for (i = 0; i < 32; i++)
2131 if ((copystrings & (1 << i)) != 0)
2133 char copybuffer[256];
2134 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2135 i, copybuffer, sizeof(copybuffer));
2137 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2139 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2143 for (copynamesptr = copynames;
2145 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2147 char copybuffer[256];
2148 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2149 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2151 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2153 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156 for (i = 0; i < 32; i++)
2158 if ((getstrings & (1 << i)) != 0)
2160 const char *substring;
2161 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2164 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2167 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2168 pcre_free_substring(substring);
2173 for (getnamesptr = getnames;
2175 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2177 const char *substring;
2178 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2179 count, (char *)getnamesptr, &substring);
2181 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2185 pcre_free_substring(substring);
2191 const char **stringlist;
2192 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2195 fprintf(outfile, "get substring list failed %d\n", rc);
2198 for (i = 0; i < count; i++)
2199 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2200 if (stringlist[i] != NULL)
2201 fprintf(outfile, "string list not terminated by NULL\n");
2202 /* free((void *)stringlist); */
2203 pcre_free_substring_list(stringlist);
2208 /* There was a partial match */
2210 else if (count == PCRE_ERROR_PARTIAL)
2212 fprintf(outfile, "Partial match");
2214 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2215 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2216 bptr + use_offsets[0]);
2218 fprintf(outfile, "\n");
2219 break; /* Out of the /g loop */
2222 /* Failed to match. If this is a /g or /G loop and we previously set
2223 g_notempty after a null match, this is not necessarily the end. We want
2224 to advance the start offset, and continue. We won't be at the end of the
2225 string - that was checked before setting g_notempty.
2227 Complication arises in the case when the newline option is "any" or
2228 "anycrlf". If the previous match was at the end of a line terminated by
2229 CRLF, an advance of one character just passes the \r, whereas we should
2230 prefer the longer newline sequence, as does the code in pcre_exec().
2231 Fudge the offset value to achieve this.
2233 Otherwise, in the case of UTF-8 matching, the advance must be one
2234 character, not one byte. */
2238 if (g_notempty != 0)
2241 unsigned int obits = ((real_pcre *)re)->options;
2242 use_offsets[0] = start_offset;
2243 if ((obits & PCRE_NEWLINE_BITS) == 0)
2246 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2247 obits = (d == '\r')? PCRE_NEWLINE_CR :
2248 (d == '\n')? PCRE_NEWLINE_LF :
2249 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2250 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2251 (d == -1)? PCRE_NEWLINE_ANY : 0;
2253 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2254 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2256 start_offset < len - 1 &&
2257 bptr[start_offset] == '\r' &&
2258 bptr[start_offset+1] == '\n')
2262 while (start_offset + onechar < len)
2264 int tb = bptr[start_offset+onechar];
2265 if (tb <= 127) break;
2267 if (tb != 0 && tb != 0xc0) onechar++;
2270 use_offsets[1] = start_offset + onechar;
2274 if (count == PCRE_ERROR_NOMATCH)
2276 if (gmatched == 0) fprintf(outfile, "No match\n");
2278 else fprintf(outfile, "Error %d\n", count);
2279 break; /* Out of the /g loop */
2283 /* If not /g or /G we are done */
2285 if (!do_g && !do_G) break;
2287 /* If we have matched an empty string, first check to see if we are at
2288 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2289 what Perl's /g options does. This turns out to be rather cunning. First
2290 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2291 same point. If this fails (picked up above) we advance to the next
2296 if (use_offsets[0] == use_offsets[1])
2298 if (use_offsets[0] == len) break;
2299 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2302 /* For /g, update the start offset, leaving the rest alone */
2304 if (do_g) start_offset = use_offsets[1];
2306 /* For /G, update the pointer and length */
2310 bptr += use_offsets[1];
2311 len -= use_offsets[1];
2313 } /* End of loop for /g and /G */
2315 NEXT_DATA: continue;
2316 } /* End of loop for data lines */
2320 #if !defined NOPOSIX
2321 if (posix || do_posix) regfree(&preg);
2324 if (re != NULL) new_free(re);
2325 if (extra != NULL) new_free(extra);
2328 new_free((void *)tables);
2329 setlocale(LC_CTYPE, "C");
2334 if (infile == stdin) fprintf(outfile, "\n");
2338 if (infile != NULL && infile != stdin) fclose(infile);
2339 if (outfile != NULL && outfile != stdout) fclose(outfile);
2349 /* End of pcretest.c */