src/src/pcre/pcre_exec.c

   1 /* $Cambridge: exim/src/src/pcre/pcre_exec.c,v 1.5 2007/06/26 11:16:54 ph10 Exp $ */
   2
   3 /*************************************************
   4 *      Perl-Compatible Regular Expressions       *
   5 *************************************************/
   6
   7 /* PCRE is a library of functions to support regular expressions whose syntax
   8 and semantics are as close as possible to those of the Perl 5 language.
   9
  10                        Written by Philip Hazel
  11            Copyright (c) 1997-2007 University of Cambridge
  12
  13 -----------------------------------------------------------------------------
  14 Redistribution and use in source and binary forms, with or without
  15 modification, are permitted provided that the following conditions are met:
  16
  17     * Redistributions of source code must retain the above copyright notice,
  18       this list of conditions and the following disclaimer.
  19
  20     * Redistributions in binary form must reproduce the above copyright
  21       notice, this list of conditions and the following disclaimer in the
  22       documentation and/or other materials provided with the distribution.
  23
  24     * Neither the name of the University of Cambridge nor the names of its
  25       contributors may be used to endorse or promote products derived from
  26       this software without specific prior written permission.
  27
  28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 POSSIBILITY OF SUCH DAMAGE.
  39 -----------------------------------------------------------------------------
  40 */
  41
  42
  43 /* This module contains pcre_exec(), the externally visible function that does
  44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  45 possible. There are also some static supporting functions. */
  46
  47 #define NLBLOCK md             /* Block containing newline information */
  48 #define PSSTART start_subject  /* Field containing processed string start */
  49 #define PSEND   end_subject    /* Field containing processed string end */
  50
  51 #include "pcre_internal.h"
  52
  53 /* Undefine some potentially clashing cpp symbols */
  54
  55 #undef min
  56 #undef max
  57
  58 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
  59 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
  60
  61 #define EPTR_WORK_SIZE (1000)
  62
  63 /* Flag bits for the match() function */
  64
  65 #define match_condassert     0x01  /* Called to check a condition assertion */
  66 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  67 #define match_tail_recursed  0x04  /* Tail recursive call */
  68
  69 /* Non-error returns from the match() function. Error returns are externally
  70 defined PCRE_ERROR_xxx codes, which are all negative. */
  71
  72 #define MATCH_MATCH        1
  73 #define MATCH_NOMATCH      0
  74
  75 /* Maximum number of ints of offset to save on the stack for recursive calls.
  76 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  77 because the offset vector is always a multiple of 3 long. */
  78
  79 #define REC_STACK_SAVE_MAX 30
  80
  81 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  82
  83 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  84 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  85
  86
  87
  88 #ifdef DEBUG
  89 /*************************************************
  90 *        Debugging function to print chars       *
  91 *************************************************/
  92
  93 /* Print a sequence of chars in printable format, stopping at the end of the
  94 subject if the requested.
  95
  96 Arguments:
  97   p           points to characters
  98   length      number to print
  99   is_subject  TRUE if printing from within md->start_subject
 100   md          pointer to matching data block, if is_subject is TRUE
 101
 102 Returns:     nothing
 103 */
 104
 105 static void
 106 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 107 {
 108 unsigned int c;
 109 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 110 while (length-- > 0)
 111   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 112 }
 113 #endif
 114
 115
 116
 117 /*************************************************
 118 *          Match a back-reference                *
 119 *************************************************/
 120
 121 /* If a back reference hasn't been set, the length that is passed is greater
 122 than the number of characters left in the string, so the match fails.
 123
 124 Arguments:
 125   offset      index into the offset vector
 126   eptr        points into the subject
 127   length      length to be matched
 128   md          points to match data block
 129   ims         the ims flags
 130
 131 Returns:      TRUE if matched
 132 */
 133
 134 static BOOL
 135 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 136   unsigned long int ims)
 137 {
 138 USPTR p = md->start_subject + md->offset_vector[offset];
 139
 140 #ifdef DEBUG
 141 if (eptr >= md->end_subject)
 142   printf("matching subject <null>");
 143 else
 144   {
 145   printf("matching subject ");
 146   pchars(eptr, length, TRUE, md);
 147   }
 148 printf(" against backref ");
 149 pchars(p, length, FALSE, md);
 150 printf("\n");
 151 #endif
 152
 153 /* Always fail if not enough characters left */
 154
 155 if (length > md->end_subject - eptr) return FALSE;
 156
 157 /* Separate the caselesss case for speed */
 158
 159 if ((ims & PCRE_CASELESS) != 0)
 160   {
 161   while (length-- > 0)
 162     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
 163   }
 164 else
 165   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 166
 167 return TRUE;
 168 }
 169
 170
 171
 172 /***************************************************************************
 173 ****************************************************************************
 174                    RECURSION IN THE match() FUNCTION
 175
 176 The match() function is highly recursive, though not every recursive call
 177 increases the recursive depth. Nevertheless, some regular expressions can cause
 178 it to recurse to a great depth. I was writing for Unix, so I just let it call
 179 itself recursively. This uses the stack for saving everything that has to be
 180 saved for a recursive call. On Unix, the stack can be large, and this works
 181 fine.
 182
 183 It turns out that on some non-Unix-like systems there are problems with
 184 programs that use a lot of stack. (This despite the fact that every last chip
 185 has oodles of memory these days, and techniques for extending the stack have
 186 been known for decades.) So....
 187
 188 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 189 calls by keeping local variables that need to be preserved in blocks of memory
 190 obtained from malloc() instead instead of on the stack. Macros are used to
 191 achieve this so that the actual code doesn't look very different to what it
 192 always used to.
 193
 194 The original heap-recursive code used longjmp(). However, it seems that this
 195 can be very slow on some operating systems. Following a suggestion from Stan
 196 Switzer, the use of longjmp() has been abolished, at the cost of having to
 197 provide a unique number for each call to RMATCH. There is no way of generating
 198 a sequence of numbers at compile time in C. I have given them names, to make
 199 them stand out more clearly.
 200
 201 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 202 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 203 tests. Furthermore, not using longjmp() means that local dynamic variables
 204 don't have indeterminate values; this has meant that the frame size can be
 205 reduced because the result can be "passed back" by straight setting of the
 206 variable instead of being passed in the frame.
 207 ****************************************************************************
 208 ***************************************************************************/
 209
 210
 211 /* Numbers for RMATCH calls */
 212
 213 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 214        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 215        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 216        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 217        RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
 218
 219
 220 /* These versions of the macros use the stack, as normal. There are debugging
 221 versions and production versions. Note that the "rw" argument of RMATCH isn't
 222 actuall used in this definition. */
 223
 224 #ifndef NO_RECURSE
 225 #define REGISTER register
 226
 227 #ifdef DEBUG
 228 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 229   { \
 230   printf("match() called in line %d\n", __LINE__); \
 231   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
 232   printf("to line %d\n", __LINE__); \
 233   }
 234 #define RRETURN(ra) \
 235   { \
 236   printf("match() returned %d from line %d ", ra, __LINE__); \
 237   return ra; \
 238   }
 239 #else
 240 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 241   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
 242 #define RRETURN(ra) return ra
 243 #endif
 244
 245 #else
 246
 247
 248 /* These versions of the macros manage a private stack on the heap. Note that
 249 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 250 argument of match(), which never changes. */
 251
 252 #define REGISTER
 253
 254 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 255   {\
 256   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 257   frame->Xwhere = rw; \
 258   newframe->Xeptr = ra;\
 259   newframe->Xecode = rb;\
 260   newframe->Xmstart = mstart;\
 261   newframe->Xoffset_top = rc;\
 262   newframe->Xims = re;\
 263   newframe->Xeptrb = rf;\
 264   newframe->Xflags = rg;\
 265   newframe->Xrdepth = frame->Xrdepth + 1;\
 266   newframe->Xprevframe = frame;\
 267   frame = newframe;\
 268   DPRINTF(("restarting from line %d\n", __LINE__));\
 269   goto HEAP_RECURSE;\
 270   L_##rw:\
 271   DPRINTF(("jumped back to line %d\n", __LINE__));\
 272   }
 273
 274 #define RRETURN(ra)\
 275   {\
 276   heapframe *newframe = frame;\
 277   frame = newframe->Xprevframe;\
 278   (pcre_stack_free)(newframe);\
 279   if (frame != NULL)\
 280     {\
 281     rrc = ra;\
 282     goto HEAP_RETURN;\
 283     }\
 284   return ra;\
 285   }
 286
 287
 288 /* Structure for remembering the local variables in a private frame */
 289
 290 typedef struct heapframe {
 291   struct heapframe *Xprevframe;
 292
 293   /* Function arguments that may change */
 294
 295   const uschar *Xeptr;
 296   const uschar *Xecode;
 297   const uschar *Xmstart;
 298   int Xoffset_top;
 299   long int Xims;
 300   eptrblock *Xeptrb;
 301   int Xflags;
 302   unsigned int Xrdepth;
 303
 304   /* Function local variables */
 305
 306   const uschar *Xcallpat;
 307   const uschar *Xcharptr;
 308   const uschar *Xdata;
 309   const uschar *Xnext;
 310   const uschar *Xpp;
 311   const uschar *Xprev;
 312   const uschar *Xsaved_eptr;
 313
 314   recursion_info Xnew_recursive;
 315
 316   BOOL Xcur_is_word;
 317   BOOL Xcondition;
 318   BOOL Xprev_is_word;
 319
 320   unsigned long int Xoriginal_ims;
 321
 322 #ifdef SUPPORT_UCP
 323   int Xprop_type;
 324   int Xprop_value;
 325   int Xprop_fail_result;
 326   int Xprop_category;
 327   int Xprop_chartype;
 328   int Xprop_script;
 329   int Xoclength;
 330   uschar Xocchars[8];
 331 #endif
 332
 333   int Xctype;
 334   unsigned int Xfc;
 335   int Xfi;
 336   int Xlength;
 337   int Xmax;
 338   int Xmin;
 339   int Xnumber;
 340   int Xoffset;
 341   int Xop;
 342   int Xsave_capture_last;
 343   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 344   int Xstacksave[REC_STACK_SAVE_MAX];
 345
 346   eptrblock Xnewptrb;
 347
 348   /* Where to jump back to */
 349
 350   int Xwhere;
 351
 352 } heapframe;
 353
 354 #endif
 355
 356
 357 /***************************************************************************
 358 ***************************************************************************/
 359
 360
 361
 362 /*************************************************
 363 *         Match from current position            *
 364 *************************************************/
 365
 366 /* This function is called recursively in many circumstances. Whenever it
 367 returns a negative (error) response, the outer incarnation must also return the
 368 same response.
 369
 370 Performance note: It might be tempting to extract commonly used fields from the
 371 md structure (e.g. utf8, end_subject) into individual variables to improve
 372 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 373 made performance worse.
 374
 375 Arguments:
 376    eptr        pointer to current character in subject
 377    ecode       pointer to current position in compiled code
 378    mstart      pointer to the current match start position (can be modified
 379                  by encountering \K)
 380    offset_top  current top pointer
 381    md          pointer to "static" info for the match
 382    ims         current /i, /m, and /s options
 383    eptrb       pointer to chain of blocks containing eptr at start of
 384                  brackets - for testing for empty matches
 385    flags       can contain
 386                  match_condassert - this is an assertion condition
 387                  match_cbegroup - this is the start of an unlimited repeat
 388                    group that can match an empty string
 389                  match_tail_recursed - this is a tail_recursed group
 390    rdepth      the recursion depth
 391
 392 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 393                MATCH_NOMATCH if failed to match  )
 394                a negative PCRE_ERROR_xxx value if aborted by an error condition
 395                  (e.g. stopped by repeated call or recursion limit)
 396 */
 397
 398 static int
 399 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
 400   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
 401   int flags, unsigned int rdepth)
 402 {
 403 /* These variables do not need to be preserved over recursion in this function,
 404 so they can be ordinary variables in all cases. Mark some of them with
 405 "register" because they are used a lot in loops. */
 406
 407 register int  rrc;         /* Returns from recursive calls */
 408 register int  i;           /* Used for loops not involving calls to RMATCH() */
 409 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 410 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 411
 412 BOOL minimize, possessive; /* Quantifier options */
 413
 414 /* When recursion is not being used, all "local" variables that have to be
 415 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 416 heap storage. Set up the top-level frame here; others are obtained from the
 417 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 418
 419 #ifdef NO_RECURSE
 420 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 421 frame->Xprevframe = NULL;            /* Marks the top level */
 422
 423 /* Copy in the original argument variables */
 424
 425 frame->Xeptr = eptr;
 426 frame->Xecode = ecode;
 427 frame->Xmstart = mstart;
 428 frame->Xoffset_top = offset_top;
 429 frame->Xims = ims;
 430 frame->Xeptrb = eptrb;
 431 frame->Xflags = flags;
 432 frame->Xrdepth = rdepth;
 433
 434 /* This is where control jumps back to to effect "recursion" */
 435
 436 HEAP_RECURSE:
 437
 438 /* Macros make the argument variables come from the current frame */
 439
 440 #define eptr               frame->Xeptr
 441 #define ecode              frame->Xecode
 442 #define mstart             frame->Xmstart
 443 #define offset_top         frame->Xoffset_top
 444 #define ims                frame->Xims
 445 #define eptrb              frame->Xeptrb
 446 #define flags              frame->Xflags
 447 #define rdepth             frame->Xrdepth
 448
 449 /* Ditto for the local variables */
 450
 451 #ifdef SUPPORT_UTF8
 452 #define charptr            frame->Xcharptr
 453 #endif
 454 #define callpat            frame->Xcallpat
 455 #define data               frame->Xdata
 456 #define next               frame->Xnext
 457 #define pp                 frame->Xpp
 458 #define prev               frame->Xprev
 459 #define saved_eptr         frame->Xsaved_eptr
 460
 461 #define new_recursive      frame->Xnew_recursive
 462
 463 #define cur_is_word        frame->Xcur_is_word
 464 #define condition          frame->Xcondition
 465 #define prev_is_word       frame->Xprev_is_word
 466
 467 #define original_ims       frame->Xoriginal_ims
 468
 469 #ifdef SUPPORT_UCP
 470 #define prop_type          frame->Xprop_type
 471 #define prop_value         frame->Xprop_value
 472 #define prop_fail_result   frame->Xprop_fail_result
 473 #define prop_category      frame->Xprop_category
 474 #define prop_chartype      frame->Xprop_chartype
 475 #define prop_script        frame->Xprop_script
 476 #define oclength           frame->Xoclength
 477 #define occhars            frame->Xocchars
 478 #endif
 479
 480 #define ctype              frame->Xctype
 481 #define fc                 frame->Xfc
 482 #define fi                 frame->Xfi
 483 #define length             frame->Xlength
 484 #define max                frame->Xmax
 485 #define min                frame->Xmin
 486 #define number             frame->Xnumber
 487 #define offset             frame->Xoffset
 488 #define op                 frame->Xop
 489 #define save_capture_last  frame->Xsave_capture_last
 490 #define save_offset1       frame->Xsave_offset1
 491 #define save_offset2       frame->Xsave_offset2
 492 #define save_offset3       frame->Xsave_offset3
 493 #define stacksave          frame->Xstacksave
 494
 495 #define newptrb            frame->Xnewptrb
 496
 497 /* When recursion is being used, local variables are allocated on the stack and
 498 get preserved during recursion in the normal way. In this environment, fi and
 499 i, and fc and c, can be the same variables. */
 500
 501 #else         /* NO_RECURSE not defined */
 502 #define fi i
 503 #define fc c
 504
 505
 506 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 507 const uschar *charptr;             /* in small blocks of the code. My normal */
 508 #endif                             /* style of coding would have declared    */
 509 const uschar *callpat;             /* them within each of those blocks.      */
 510 const uschar *data;                /* However, in order to accommodate the   */
 511 const uschar *next;                /* version of this code that uses an      */
 512 USPTR         pp;                  /* external "stack" implemented on the    */
 513 const uschar *prev;                /* heap, it is easier to declare them all */
 514 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 515                                    /* out in a block. The only declarations  */
 516 recursion_info new_recursive;      /* within blocks below are for variables  */
 517                                    /* that do not have to be preserved over  */
 518 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 519 BOOL condition;
 520 BOOL prev_is_word;
 521
 522 unsigned long int original_ims;
 523
 524 #ifdef SUPPORT_UCP
 525 int prop_type;
 526 int prop_value;
 527 int prop_fail_result;
 528 int prop_category;
 529 int prop_chartype;
 530 int prop_script;
 531 int oclength;
 532 uschar occhars[8];
 533 #endif
 534
 535 int ctype;
 536 int length;
 537 int max;
 538 int min;
 539 int number;
 540 int offset;
 541 int op;
 542 int save_capture_last;
 543 int save_offset1, save_offset2, save_offset3;
 544 int stacksave[REC_STACK_SAVE_MAX];
 545
 546 eptrblock newptrb;
 547 #endif     /* NO_RECURSE */
 548
 549 /* These statements are here to stop the compiler complaining about unitialized
 550 variables. */
 551
 552 #ifdef SUPPORT_UCP
 553 prop_value = 0;
 554 prop_fail_result = 0;
 555 #endif
 556
 557
 558 /* This label is used for tail recursion, which is used in a few cases even
 559 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 560 used. Thanks to Ian Taylor for noticing this possibility and sending the
 561 original patch. */
 562
 563 TAIL_RECURSE:
 564
 565 /* OK, now we can get on with the real code of the function. Recursive calls
 566 are specified by the macro RMATCH and RRETURN is used to return. When
 567 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 568 and a "return", respectively (possibly with some debugging if DEBUG is
 569 defined). However, RMATCH isn't like a function call because it's quite a
 570 complicated macro. It has to be used in one particular way. This shouldn't,
 571 however, impact performance when true recursion is being used. */
 572
 573 #ifdef SUPPORT_UTF8
 574 utf8 = md->utf8;       /* Local copy of the flag */
 575 #else
 576 utf8 = FALSE;
 577 #endif
 578
 579 /* First check that we haven't called match() too many times, or that we
 580 haven't exceeded the recursive call limit. */
 581
 582 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 583 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 584
 585 original_ims = ims;    /* Save for resetting on ')' */
 586
 587 /* At the start of a group with an unlimited repeat that may match an empty
 588 string, the match_cbegroup flag is set. When this is the case, add the current
 589 subject pointer to the chain of such remembered pointers, to be checked when we
 590 hit the closing ket, in order to break infinite loops that match no characters.
 591 When match() is called in other circumstances, don't add to the chain. If this
 592 is a tail recursion, use a block from the workspace, as the one on the stack is
 593 already used. */
 594
 595 if ((flags & match_cbegroup) != 0)
 596   {
 597   eptrblock *p;
 598   if ((flags & match_tail_recursed) != 0)
 599     {
 600     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
 601     p = md->eptrchain + md->eptrn++;
 602     }
 603   else p = &newptrb;
 604   p->epb_saved_eptr = eptr;
 605   p->epb_prev = eptrb;
 606   eptrb = p;
 607   }
 608
 609 /* Now start processing the opcodes. */
 610
 611 for (;;)
 612   {
 613   minimize = possessive = FALSE;
 614   op = *ecode;
 615
 616   /* For partial matching, remember if we ever hit the end of the subject after
 617   matching at least one subject character. */
 618
 619   if (md->partial &&
 620       eptr >= md->end_subject &&
 621       eptr > mstart)
 622     md->hitend = TRUE;
 623
 624   switch(op)
 625     {
 626     /* Handle a capturing bracket. If there is space in the offset vector, save
 627     the current subject position in the working slot at the top of the vector.
 628     We mustn't change the current values of the data slot, because they may be
 629     set from a previous iteration of this group, and be referred to by a
 630     reference inside the group.
 631
 632     If the bracket fails to match, we need to restore this value and also the
 633     values of the final offsets, in case they were set by a previous iteration
 634     of the same bracket.
 635
 636     If there isn't enough space in the offset vector, treat this as if it were
 637     a non-capturing bracket. Don't worry about setting the flag for the error
 638     case here; that is handled in the code for KET. */
 639
 640     case OP_CBRA:
 641     case OP_SCBRA:
 642     number = GET2(ecode, 1+LINK_SIZE);
 643     offset = number << 1;
 644
 645 #ifdef DEBUG
 646     printf("start bracket %d\n", number);
 647     printf("subject=");
 648     pchars(eptr, 16, TRUE, md);
 649     printf("\n");
 650 #endif
 651
 652     if (offset < md->offset_max)
 653       {
 654       save_offset1 = md->offset_vector[offset];
 655       save_offset2 = md->offset_vector[offset+1];
 656       save_offset3 = md->offset_vector[md->offset_end - number];
 657       save_capture_last = md->capture_last;
 658
 659       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 660       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 661
 662       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 663       do
 664         {
 665         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 666           ims, eptrb, flags, RM1);
 667         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 668         md->capture_last = save_capture_last;
 669         ecode += GET(ecode, 1);
 670         }
 671       while (*ecode == OP_ALT);
 672
 673       DPRINTF(("bracket %d failed\n", number));
 674
 675       md->offset_vector[offset] = save_offset1;
 676       md->offset_vector[offset+1] = save_offset2;
 677       md->offset_vector[md->offset_end - number] = save_offset3;
 678
 679       RRETURN(MATCH_NOMATCH);
 680       }
 681
 682     /* Insufficient room for saving captured contents. Treat as a non-capturing
 683     bracket. */
 684
 685     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 686
 687     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 688     final alternative within the brackets, we would return the result of a
 689     recursive call to match() whatever happened. We can reduce stack usage by
 690     turning this into a tail recursion. */
 691
 692     case OP_BRA:
 693     case OP_SBRA:
 694     DPRINTF(("start non-capturing bracket\n"));
 695     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 696     for (;;)
 697       {
 698       if (ecode[GET(ecode, 1)] != OP_ALT)
 699         {
 700         ecode += _pcre_OP_lengths[*ecode];
 701         flags |= match_tail_recursed;
 702         DPRINTF(("bracket 0 tail recursion\n"));
 703         goto TAIL_RECURSE;
 704         }
 705
 706       /* For non-final alternatives, continue the loop for a NOMATCH result;
 707       otherwise return. */
 708
 709       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 710         eptrb, flags, RM2);
 711       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 712       ecode += GET(ecode, 1);
 713       }
 714     /* Control never reaches here. */
 715
 716     /* Conditional group: compilation checked that there are no more than
 717     two branches. If the condition is false, skipping the first branch takes us
 718     past the end if there is only one branch, but that's OK because that is
 719     exactly what going to the ket would do. As there is only one branch to be
 720     obeyed, we can use tail recursion to avoid using another stack frame. */
 721
 722     case OP_COND:
 723     case OP_SCOND:
 724     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
 725       {
 726       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
 727       condition = md->recursive != NULL &&
 728         (offset == RREF_ANY || offset == md->recursive->group_num);
 729       ecode += condition? 3 : GET(ecode, 1);
 730       }
 731
 732     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
 733       {
 734       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 735       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 736       ecode += condition? 3 : GET(ecode, 1);
 737       }
 738
 739     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
 740       {
 741       condition = FALSE;
 742       ecode += GET(ecode, 1);
 743       }
 744
 745     /* The condition is an assertion. Call match() to evaluate it - setting
 746     the final argument match_condassert causes it to stop at the end of an
 747     assertion. */
 748
 749     else
 750       {
 751       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 752           match_condassert, RM3);
 753       if (rrc == MATCH_MATCH)
 754         {
 755         condition = TRUE;
 756         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
 757         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 758         }
 759       else if (rrc != MATCH_NOMATCH)
 760         {
 761         RRETURN(rrc);         /* Need braces because of following else */
 762         }
 763       else
 764         {
 765         condition = FALSE;
 766         ecode += GET(ecode, 1);
 767         }
 768       }
 769
 770     /* We are now at the branch that is to be obeyed. As there is only one,
 771     we can use tail recursion to avoid using another stack frame. If the second
 772     alternative doesn't exist, we can just plough on. */
 773
 774     if (condition || *ecode == OP_ALT)
 775       {
 776       ecode += 1 + LINK_SIZE;
 777       flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
 778       goto TAIL_RECURSE;
 779       }
 780     else
 781       {
 782       ecode += 1 + LINK_SIZE;
 783       }
 784     break;
 785
 786
 787     /* End of the pattern. If we are in a top-level recursion, we should
 788     restore the offsets appropriately and continue from after the call. */
 789
 790     case OP_END:
 791     if (md->recursive != NULL && md->recursive->group_num == 0)
 792       {
 793       recursion_info *rec = md->recursive;
 794       DPRINTF(("End of pattern in a (?0) recursion\n"));
 795       md->recursive = rec->prevrec;
 796       memmove(md->offset_vector, rec->offset_save,
 797         rec->saved_max * sizeof(int));
 798       mstart = rec->save_start;
 799       ims = original_ims;
 800       ecode = rec->after_call;
 801       break;
 802       }
 803
 804     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
 805     string - backtracking will then try other alternatives, if any. */
 806
 807     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
 808     md->end_match_ptr = eptr;           /* Record where we ended */
 809     md->end_offset_top = offset_top;    /* and how many extracts were taken */
 810     md->start_match_ptr = mstart;  /* and the start (\K can modify) */
 811     RRETURN(MATCH_MATCH);
 812
 813     /* Change option settings */
 814
 815     case OP_OPT:
 816     ims = ecode[1];
 817     ecode += 2;
 818     DPRINTF(("ims set to %02lx\n", ims));
 819     break;
 820
 821     /* Assertion brackets. Check the alternative branches in turn - the
 822     matching won't pass the KET for an assertion. If any one branch matches,
 823     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 824     start of each branch to move the current point backwards, so the code at
 825     this level is identical to the lookahead case. */
 826
 827     case OP_ASSERT:
 828     case OP_ASSERTBACK:
 829     do
 830       {
 831       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 832         RM4);
 833       if (rrc == MATCH_MATCH) break;
 834       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 835       ecode += GET(ecode, 1);
 836       }
 837     while (*ecode == OP_ALT);
 838     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 839
 840     /* If checking an assertion for a condition, return MATCH_MATCH. */
 841
 842     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 843
 844     /* Continue from after the assertion, updating the offsets high water
 845     mark, since extracts may have been taken during the assertion. */
 846
 847     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 848     ecode += 1 + LINK_SIZE;
 849     offset_top = md->end_offset_top;
 850     continue;
 851
 852     /* Negative assertion: all branches must fail to match */
 853
 854     case OP_ASSERT_NOT:
 855     case OP_ASSERTBACK_NOT:
 856     do
 857       {
 858       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 859         RM5);
 860       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
 861       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 862       ecode += GET(ecode,1);
 863       }
 864     while (*ecode == OP_ALT);
 865
 866     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 867
 868     ecode += 1 + LINK_SIZE;
 869     continue;
 870
 871     /* Move the subject pointer back. This occurs only at the start of
 872     each branch of a lookbehind assertion. If we are too close to the start to
 873     move back, this match function fails. When working with UTF-8 we move
 874     back a number of characters, not bytes. */
 875
 876     case OP_REVERSE:
 877 #ifdef SUPPORT_UTF8
 878     if (utf8)
 879       {
 880       i = GET(ecode, 1);
 881       while (i-- > 0)
 882         {
 883         eptr--;
 884         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 885         BACKCHAR(eptr)
 886         }
 887       }
 888     else
 889 #endif
 890
 891     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 892
 893       {
 894       eptr -= GET(ecode, 1);
 895       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 896       }
 897
 898     /* Skip to next op code */
 899
 900     ecode += 1 + LINK_SIZE;
 901     break;
 902
 903     /* The callout item calls an external function, if one is provided, passing
 904     details of the match so far. This is mainly for debugging, though the
 905     function is able to force a failure. */
 906
 907     case OP_CALLOUT:
 908     if (pcre_callout != NULL)
 909       {
 910       pcre_callout_block cb;
 911       cb.version          = 1;   /* Version 1 of the callout block */
 912       cb.callout_number   = ecode[1];
 913       cb.offset_vector    = md->offset_vector;
 914       cb.subject          = (PCRE_SPTR)md->start_subject;
 915       cb.subject_length   = md->end_subject - md->start_subject;
 916       cb.start_match      = mstart - md->start_subject;
 917       cb.current_position = eptr - md->start_subject;
 918       cb.pattern_position = GET(ecode, 2);
 919       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 920       cb.capture_top      = offset_top/2;
 921       cb.capture_last     = md->capture_last;
 922       cb.callout_data     = md->callout_data;
 923       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 924       if (rrc < 0) RRETURN(rrc);
 925       }
 926     ecode += 2 + 2*LINK_SIZE;
 927     break;
 928
 929     /* Recursion either matches the current regex, or some subexpression. The
 930     offset data is the offset to the starting bracket from the start of the
 931     whole pattern. (This is so that it works from duplicated subpatterns.)
 932
 933     If there are any capturing brackets started but not finished, we have to
 934     save their starting points and reinstate them after the recursion. However,
 935     we don't know how many such there are (offset_top records the completed
 936     total) so we just have to save all the potential data. There may be up to
 937     65535 such values, which is too large to put on the stack, but using malloc
 938     for small numbers seems expensive. As a compromise, the stack is used when
 939     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
 940     is used. A problem is what to do if the malloc fails ... there is no way of
 941     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
 942     values on the stack, and accept that the rest may be wrong.
 943
 944     There are also other values that have to be saved. We use a chained
 945     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 946     for the original version of this logic. */
 947
 948     case OP_RECURSE:
 949       {
 950       callpat = md->start_code + GET(ecode, 1);
 951       new_recursive.group_num = (callpat == md->start_code)? 0 :
 952         GET2(callpat, 1 + LINK_SIZE);
 953
 954       /* Add to "recursing stack" */
 955
 956       new_recursive.prevrec = md->recursive;
 957       md->recursive = &new_recursive;
 958
 959       /* Find where to continue from afterwards */
 960
 961       ecode += 1 + LINK_SIZE;
 962       new_recursive.after_call = ecode;
 963
 964       /* Now save the offset data. */
 965
 966       new_recursive.saved_max = md->offset_end;
 967       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
 968         new_recursive.offset_save = stacksave;
 969       else
 970         {
 971         new_recursive.offset_save =
 972           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
 973         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
 974         }
 975
 976       memcpy(new_recursive.offset_save, md->offset_vector,
 977             new_recursive.saved_max * sizeof(int));
 978       new_recursive.save_start = mstart;
 979       mstart = eptr;
 980
 981       /* OK, now we can do the recursion. For each top-level alternative we
 982       restore the offset and recursion data. */
 983
 984       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
 985       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
 986       do
 987         {
 988         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
 989           md, ims, eptrb, flags, RM6);
 990         if (rrc == MATCH_MATCH)
 991           {
 992           DPRINTF(("Recursion matched\n"));
 993           md->recursive = new_recursive.prevrec;
 994           if (new_recursive.offset_save != stacksave)
 995             (pcre_free)(new_recursive.offset_save);
 996           RRETURN(MATCH_MATCH);
 997           }
 998         else if (rrc != MATCH_NOMATCH)
 999           {
1000           DPRINTF(("Recursion gave error %d\n", rrc));
1001           RRETURN(rrc);
1002           }
1003
1004         md->recursive = &new_recursive;
1005         memcpy(md->offset_vector, new_recursive.offset_save,
1006             new_recursive.saved_max * sizeof(int));
1007         callpat += GET(callpat, 1);
1008         }
1009       while (*callpat == OP_ALT);
1010
1011       DPRINTF(("Recursion didn't match\n"));
1012       md->recursive = new_recursive.prevrec;
1013       if (new_recursive.offset_save != stacksave)
1014         (pcre_free)(new_recursive.offset_save);
1015       RRETURN(MATCH_NOMATCH);
1016       }
1017     /* Control never reaches here */
1018
1019     /* "Once" brackets are like assertion brackets except that after a match,
1020     the point in the subject string is not moved back. Thus there can never be
1021     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1022     Check the alternative branches in turn - the matching won't pass the KET
1023     for this kind of subpattern. If any one branch matches, we carry on as at
1024     the end of a normal bracket, leaving the subject pointer. */
1025
1026     case OP_ONCE:
1027     prev = ecode;
1028     saved_eptr = eptr;
1029
1030     do
1031       {
1032       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1033         eptrb, 0, RM7);
1034       if (rrc == MATCH_MATCH) break;
1035       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1036       ecode += GET(ecode,1);
1037       }
1038     while (*ecode == OP_ALT);
1039
1040     /* If hit the end of the group (which could be repeated), fail */
1041
1042     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1043
1044     /* Continue as from after the assertion, updating the offsets high water
1045     mark, since extracts may have been taken. */
1046
1047     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1048
1049     offset_top = md->end_offset_top;
1050     eptr = md->end_match_ptr;
1051
1052     /* For a non-repeating ket, just continue at this level. This also
1053     happens for a repeating ket if no characters were matched in the group.
1054     This is the forcible breaking of infinite loops as implemented in Perl
1055     5.005. If there is an options reset, it will get obeyed in the normal
1056     course of events. */
1057
1058     if (*ecode == OP_KET || eptr == saved_eptr)
1059       {
1060       ecode += 1+LINK_SIZE;
1061       break;
1062       }
1063
1064     /* The repeating kets try the rest of the pattern or restart from the
1065     preceding bracket, in the appropriate order. The second "call" of match()
1066     uses tail recursion, to avoid using another stack frame. We need to reset
1067     any options that changed within the bracket before re-running it, so
1068     check the next opcode. */
1069
1070     if (ecode[1+LINK_SIZE] == OP_OPT)
1071       {
1072       ims = (ims & ~PCRE_IMS) | ecode[4];
1073       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1074       }
1075
1076     if (*ecode == OP_KETRMIN)
1077       {
1078       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1079         RM8);
1080       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1081       ecode = prev;
1082       flags = match_tail_recursed;
1083       goto TAIL_RECURSE;
1084       }
1085     else  /* OP_KETRMAX */
1086       {
1087       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1088       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1089       ecode += 1 + LINK_SIZE;
1090       flags = match_tail_recursed;
1091       goto TAIL_RECURSE;
1092       }
1093     /* Control never gets here */
1094
1095     /* An alternation is the end of a branch; scan along to find the end of the
1096     bracketed group and go to there. */
1097
1098     case OP_ALT:
1099     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1100     break;
1101
1102     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1103     that it may occur zero times. It may repeat infinitely, or not at all -
1104     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1105     repeat limits are compiled as a number of copies, with the optional ones
1106     preceded by BRAZERO or BRAMINZERO. */
1107
1108     case OP_BRAZERO:
1109       {
1110       next = ecode+1;
1111       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1112       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1113       do next += GET(next,1); while (*next == OP_ALT);
1114       ecode = next + 1 + LINK_SIZE;
1115       }
1116     break;
1117
1118     case OP_BRAMINZERO:
1119       {
1120       next = ecode+1;
1121       do next += GET(next, 1); while (*next == OP_ALT);
1122       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1123       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1124       ecode++;
1125       }
1126     break;
1127
1128     /* End of a group, repeated or non-repeating. */
1129
1130     case OP_KET:
1131     case OP_KETRMIN:
1132     case OP_KETRMAX:
1133     prev = ecode - GET(ecode, 1);
1134
1135     /* If this was a group that remembered the subject start, in order to break
1136     infinite repeats of empty string matches, retrieve the subject start from
1137     the chain. Otherwise, set it NULL. */
1138
1139     if (*prev >= OP_SBRA)
1140       {
1141       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1142       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1143       }
1144     else saved_eptr = NULL;
1145
1146     /* If we are at the end of an assertion group, stop matching and return
1147     MATCH_MATCH, but record the current high water mark for use by positive
1148     assertions. Do this also for the "once" (atomic) groups. */
1149
1150     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1151         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1152         *prev == OP_ONCE)
1153       {
1154       md->end_match_ptr = eptr;      /* For ONCE */
1155       md->end_offset_top = offset_top;
1156       RRETURN(MATCH_MATCH);
1157       }
1158
1159     /* For capturing groups we have to check the group number back at the start
1160     and if necessary complete handling an extraction by setting the offsets and
1161     bumping the high water mark. Note that whole-pattern recursion is coded as
1162     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1163     when the OP_END is reached. Other recursion is handled here. */
1164
1165     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1166       {
1167       number = GET2(prev, 1+LINK_SIZE);
1168       offset = number << 1;
1169
1170 #ifdef DEBUG
1171       printf("end bracket %d", number);
1172       printf("\n");
1173 #endif
1174
1175       md->capture_last = number;
1176       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1177         {
1178         md->offset_vector[offset] =
1179           md->offset_vector[md->offset_end - number];
1180         md->offset_vector[offset+1] = eptr - md->start_subject;
1181         if (offset_top <= offset) offset_top = offset + 2;
1182         }
1183
1184       /* Handle a recursively called group. Restore the offsets
1185       appropriately and continue from after the call. */
1186
1187       if (md->recursive != NULL && md->recursive->group_num == number)
1188         {
1189         recursion_info *rec = md->recursive;
1190         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1191         md->recursive = rec->prevrec;
1192         mstart = rec->save_start;
1193         memcpy(md->offset_vector, rec->offset_save,
1194           rec->saved_max * sizeof(int));
1195         ecode = rec->after_call;
1196         ims = original_ims;
1197         break;
1198         }
1199       }
1200
1201     /* For both capturing and non-capturing groups, reset the value of the ims
1202     flags, in case they got changed during the group. */
1203
1204     ims = original_ims;
1205     DPRINTF(("ims reset to %02lx\n", ims));
1206
1207     /* For a non-repeating ket, just continue at this level. This also
1208     happens for a repeating ket if no characters were matched in the group.
1209     This is the forcible breaking of infinite loops as implemented in Perl
1210     5.005. If there is an options reset, it will get obeyed in the normal
1211     course of events. */
1212
1213     if (*ecode == OP_KET || eptr == saved_eptr)
1214       {
1215       ecode += 1 + LINK_SIZE;
1216       break;
1217       }
1218
1219     /* The repeating kets try the rest of the pattern or restart from the
1220     preceding bracket, in the appropriate order. In the second case, we can use
1221     tail recursion to avoid using another stack frame. */
1222
1223     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1224
1225     if (*ecode == OP_KETRMIN)
1226       {
1227       RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1228         RM12);
1229       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1230       ecode = prev;
1231       flags |= match_tail_recursed;
1232       goto TAIL_RECURSE;
1233       }
1234     else  /* OP_KETRMAX */
1235       {
1236       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1237       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1238       ecode += 1 + LINK_SIZE;
1239       flags = match_tail_recursed;
1240       goto TAIL_RECURSE;
1241       }
1242     /* Control never gets here */
1243
1244     /* Start of subject unless notbol, or after internal newline if multiline */
1245
1246     case OP_CIRC:
1247     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1248     if ((ims & PCRE_MULTILINE) != 0)
1249       {
1250       if (eptr != md->start_subject &&
1251           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1252         RRETURN(MATCH_NOMATCH);
1253       ecode++;
1254       break;
1255       }
1256     /* ... else fall through */
1257
1258     /* Start of subject assertion */
1259
1260     case OP_SOD:
1261     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1262     ecode++;
1263     break;
1264
1265     /* Start of match assertion */
1266
1267     case OP_SOM:
1268     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1269     ecode++;
1270     break;
1271
1272     /* Reset the start of match point */
1273
1274     case OP_SET_SOM:
1275     mstart = eptr;
1276     ecode++;
1277     break;
1278
1279     /* Assert before internal newline if multiline, or before a terminating
1280     newline unless endonly is set, else end of subject unless noteol is set. */
1281
1282     case OP_DOLL:
1283     if ((ims & PCRE_MULTILINE) != 0)
1284       {
1285       if (eptr < md->end_subject)
1286         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1287       else
1288         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1289       ecode++;
1290       break;
1291       }
1292     else
1293       {
1294       if (md->noteol) RRETURN(MATCH_NOMATCH);
1295       if (!md->endonly)
1296         {
1297         if (eptr != md->end_subject &&
1298             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1299           RRETURN(MATCH_NOMATCH);
1300         ecode++;
1301         break;
1302         }
1303       }
1304     /* ... else fall through for endonly */
1305
1306     /* End of subject assertion (\z) */
1307
1308     case OP_EOD:
1309     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1310     ecode++;
1311     break;
1312
1313     /* End of subject or ending \n assertion (\Z) */
1314
1315     case OP_EODN:
1316     if (eptr != md->end_subject &&
1317         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1318       RRETURN(MATCH_NOMATCH);
1319     ecode++;
1320     break;
1321
1322     /* Word boundary assertions */
1323
1324     case OP_NOT_WORD_BOUNDARY:
1325     case OP_WORD_BOUNDARY:
1326       {
1327
1328       /* Find out if the previous and current characters are "word" characters.
1329       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1330       be "non-word" characters. */
1331
1332 #ifdef SUPPORT_UTF8
1333       if (utf8)
1334         {
1335         if (eptr == md->start_subject) prev_is_word = FALSE; else
1336           {
1337           const uschar *lastptr = eptr - 1;
1338           while((*lastptr & 0xc0) == 0x80) lastptr--;
1339           GETCHAR(c, lastptr);
1340           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1341           }
1342         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1343           {
1344           GETCHAR(c, eptr);
1345           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1346           }
1347         }
1348       else
1349 #endif
1350
1351       /* More streamlined when not in UTF-8 mode */
1352
1353         {
1354         prev_is_word = (eptr != md->start_subject) &&
1355           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1356         cur_is_word = (eptr < md->end_subject) &&
1357           ((md->ctypes[*eptr] & ctype_word) != 0);
1358         }
1359
1360       /* Now see if the situation is what we want */
1361
1362       if ((*ecode++ == OP_WORD_BOUNDARY)?
1363            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1364         RRETURN(MATCH_NOMATCH);
1365       }
1366     break;
1367
1368     /* Match a single character type; inline for speed */
1369
1370     case OP_ANY:
1371     if ((ims & PCRE_DOTALL) == 0)
1372       {
1373       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1374       }
1375     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1376     if (utf8)
1377       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1378     ecode++;
1379     break;
1380
1381     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1382     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1383
1384     case OP_ANYBYTE:
1385     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1386     ecode++;
1387     break;
1388
1389     case OP_NOT_DIGIT:
1390     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1391     GETCHARINCTEST(c, eptr);
1392     if (
1393 #ifdef SUPPORT_UTF8
1394        c < 256 &&
1395 #endif
1396        (md->ctypes[c] & ctype_digit) != 0
1397        )
1398       RRETURN(MATCH_NOMATCH);
1399     ecode++;
1400     break;
1401
1402     case OP_DIGIT:
1403     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1404     GETCHARINCTEST(c, eptr);
1405     if (
1406 #ifdef SUPPORT_UTF8
1407        c >= 256 ||
1408 #endif
1409        (md->ctypes[c] & ctype_digit) == 0
1410        )
1411       RRETURN(MATCH_NOMATCH);
1412     ecode++;
1413     break;
1414
1415     case OP_NOT_WHITESPACE:
1416     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1417     GETCHARINCTEST(c, eptr);
1418     if (
1419 #ifdef SUPPORT_UTF8
1420        c < 256 &&
1421 #endif
1422        (md->ctypes[c] & ctype_space) != 0
1423        )
1424       RRETURN(MATCH_NOMATCH);
1425     ecode++;
1426     break;
1427
1428     case OP_WHITESPACE:
1429     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1430     GETCHARINCTEST(c, eptr);
1431     if (
1432 #ifdef SUPPORT_UTF8
1433        c >= 256 ||
1434 #endif
1435        (md->ctypes[c] & ctype_space) == 0
1436        )
1437       RRETURN(MATCH_NOMATCH);
1438     ecode++;
1439     break;
1440
1441     case OP_NOT_WORDCHAR:
1442     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1443     GETCHARINCTEST(c, eptr);
1444     if (
1445 #ifdef SUPPORT_UTF8
1446        c < 256 &&
1447 #endif
1448        (md->ctypes[c] & ctype_word) != 0
1449        )
1450       RRETURN(MATCH_NOMATCH);
1451     ecode++;
1452     break;
1453
1454     case OP_WORDCHAR:
1455     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1456     GETCHARINCTEST(c, eptr);
1457     if (
1458 #ifdef SUPPORT_UTF8
1459        c >= 256 ||
1460 #endif
1461        (md->ctypes[c] & ctype_word) == 0
1462        )
1463       RRETURN(MATCH_NOMATCH);
1464     ecode++;
1465     break;
1466
1467     case OP_ANYNL:
1468     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1469     GETCHARINCTEST(c, eptr);
1470     switch(c)
1471       {
1472       default: RRETURN(MATCH_NOMATCH);
1473       case 0x000d:
1474       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1475       break;
1476       case 0x000a:
1477       case 0x000b:
1478       case 0x000c:
1479       case 0x0085:
1480       case 0x2028:
1481       case 0x2029:
1482       break;
1483       }
1484     ecode++;
1485     break;
1486
1487     case OP_NOT_HSPACE:
1488     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1489     GETCHARINCTEST(c, eptr);
1490     switch(c)
1491       {
1492       default: break;
1493       case 0x09:      /* HT */
1494       case 0x20:      /* SPACE */
1495       case 0xa0:      /* NBSP */
1496       case 0x1680:    /* OGHAM SPACE MARK */
1497       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1498       case 0x2000:    /* EN QUAD */
1499       case 0x2001:    /* EM QUAD */
1500       case 0x2002:    /* EN SPACE */
1501       case 0x2003:    /* EM SPACE */
1502       case 0x2004:    /* THREE-PER-EM SPACE */
1503       case 0x2005:    /* FOUR-PER-EM SPACE */
1504       case 0x2006:    /* SIX-PER-EM SPACE */
1505       case 0x2007:    /* FIGURE SPACE */
1506       case 0x2008:    /* PUNCTUATION SPACE */
1507       case 0x2009:    /* THIN SPACE */
1508       case 0x200A:    /* HAIR SPACE */
1509       case 0x202f:    /* NARROW NO-BREAK SPACE */
1510       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1511       case 0x3000:    /* IDEOGRAPHIC SPACE */
1512       RRETURN(MATCH_NOMATCH);
1513       }
1514     ecode++;
1515     break;
1516
1517     case OP_HSPACE:
1518     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1519     GETCHARINCTEST(c, eptr);
1520     switch(c)
1521       {
1522       default: RRETURN(MATCH_NOMATCH);
1523       case 0x09:      /* HT */
1524       case 0x20:      /* SPACE */
1525       case 0xa0:      /* NBSP */
1526       case 0x1680:    /* OGHAM SPACE MARK */
1527       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1528       case 0x2000:    /* EN QUAD */
1529       case 0x2001:    /* EM QUAD */
1530       case 0x2002:    /* EN SPACE */
1531       case 0x2003:    /* EM SPACE */
1532       case 0x2004:    /* THREE-PER-EM SPACE */
1533       case 0x2005:    /* FOUR-PER-EM SPACE */
1534       case 0x2006:    /* SIX-PER-EM SPACE */
1535       case 0x2007:    /* FIGURE SPACE */
1536       case 0x2008:    /* PUNCTUATION SPACE */
1537       case 0x2009:    /* THIN SPACE */
1538       case 0x200A:    /* HAIR SPACE */
1539       case 0x202f:    /* NARROW NO-BREAK SPACE */
1540       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1541       case 0x3000:    /* IDEOGRAPHIC SPACE */
1542       break;
1543       }
1544     ecode++;
1545     break;
1546
1547     case OP_NOT_VSPACE:
1548     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1549     GETCHARINCTEST(c, eptr);
1550     switch(c)
1551       {
1552       default: break;
1553       case 0x0a:      /* LF */
1554       case 0x0b:      /* VT */
1555       case 0x0c:      /* FF */
1556       case 0x0d:      /* CR */
1557       case 0x85:      /* NEL */
1558       case 0x2028:    /* LINE SEPARATOR */
1559       case 0x2029:    /* PARAGRAPH SEPARATOR */
1560       RRETURN(MATCH_NOMATCH);
1561       }
1562     ecode++;
1563     break;
1564
1565     case OP_VSPACE:
1566     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1567     GETCHARINCTEST(c, eptr);
1568     switch(c)
1569       {
1570       default: RRETURN(MATCH_NOMATCH);
1571       case 0x0a:      /* LF */
1572       case 0x0b:      /* VT */
1573       case 0x0c:      /* FF */
1574       case 0x0d:      /* CR */
1575       case 0x85:      /* NEL */
1576       case 0x2028:    /* LINE SEPARATOR */
1577       case 0x2029:    /* PARAGRAPH SEPARATOR */
1578       break;
1579       }
1580     ecode++;
1581     break;
1582
1583 #ifdef SUPPORT_UCP
1584     /* Check the next character by Unicode property. We will get here only
1585     if the support is in the binary; otherwise a compile-time error occurs. */
1586
1587     case OP_PROP:
1588     case OP_NOTPROP:
1589     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1590     GETCHARINCTEST(c, eptr);
1591       {
1592       int chartype, script;
1593       int category = _pcre_ucp_findprop(c, &chartype, &script);
1594
1595       switch(ecode[1])
1596         {
1597         case PT_ANY:
1598         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1599         break;
1600
1601         case PT_LAMP:
1602         if ((chartype == ucp_Lu ||
1603              chartype == ucp_Ll ||
1604              chartype == ucp_Lt) == (op == OP_NOTPROP))
1605           RRETURN(MATCH_NOMATCH);
1606          break;
1607
1608         case PT_GC:
1609         if ((ecode[2] != category) == (op == OP_PROP))
1610           RRETURN(MATCH_NOMATCH);
1611         break;
1612
1613         case PT_PC:
1614         if ((ecode[2] != chartype) == (op == OP_PROP))
1615           RRETURN(MATCH_NOMATCH);
1616         break;
1617
1618         case PT_SC:
1619         if ((ecode[2] != script) == (op == OP_PROP))
1620           RRETURN(MATCH_NOMATCH);
1621         break;
1622
1623         default:
1624         RRETURN(PCRE_ERROR_INTERNAL);
1625         }
1626
1627       ecode += 3;
1628       }
1629     break;
1630
1631     /* Match an extended Unicode sequence. We will get here only if the support
1632     is in the binary; otherwise a compile-time error occurs. */
1633
1634     case OP_EXTUNI:
1635     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1636     GETCHARINCTEST(c, eptr);
1637       {
1638       int chartype, script;
1639       int category = _pcre_ucp_findprop(c, &chartype, &script);
1640       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1641       while (eptr < md->end_subject)
1642         {
1643         int len = 1;
1644         if (!utf8) c = *eptr; else
1645           {
1646           GETCHARLEN(c, eptr, len);
1647           }
1648         category = _pcre_ucp_findprop(c, &chartype, &script);
1649         if (category != ucp_M) break;
1650         eptr += len;
1651         }
1652       }
1653     ecode++;
1654     break;
1655 #endif
1656
1657
1658     /* Match a back reference, possibly repeatedly. Look past the end of the
1659     item to see if there is repeat information following. The code is similar
1660     to that for character classes, but repeated for efficiency. Then obey
1661     similar code to character type repeats - written out again for speed.
1662     However, if the referenced string is the empty string, always treat
1663     it as matched, any number of times (otherwise there could be infinite
1664     loops). */
1665
1666     case OP_REF:
1667       {
1668       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1669       ecode += 3;                                 /* Advance past item */
1670
1671       /* If the reference is unset, set the length to be longer than the amount
1672       of subject left; this ensures that every attempt at a match fails. We
1673       can't just fail here, because of the possibility of quantifiers with zero
1674       minima. */
1675
1676       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1677         md->end_subject - eptr + 1 :
1678         md->offset_vector[offset+1] - md->offset_vector[offset];
1679
1680       /* Set up for repetition, or handle the non-repeated case */
1681
1682       switch (*ecode)
1683         {
1684         case OP_CRSTAR:
1685         case OP_CRMINSTAR:
1686         case OP_CRPLUS:
1687         case OP_CRMINPLUS:
1688         case OP_CRQUERY:
1689         case OP_CRMINQUERY:
1690         c = *ecode++ - OP_CRSTAR;
1691         minimize = (c & 1) != 0;
1692         min = rep_min[c];                 /* Pick up values from tables; */
1693         max = rep_max[c];                 /* zero for max => infinity */
1694         if (max == 0) max = INT_MAX;
1695         break;
1696
1697         case OP_CRRANGE:
1698         case OP_CRMINRANGE:
1699         minimize = (*ecode == OP_CRMINRANGE);
1700         min = GET2(ecode, 1);
1701         max = GET2(ecode, 3);
1702         if (max == 0) max = INT_MAX;
1703         ecode += 5;
1704         break;
1705
1706         default:               /* No repeat follows */
1707         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1708         eptr += length;
1709         continue;              /* With the main loop */
1710         }
1711
1712       /* If the length of the reference is zero, just continue with the
1713       main loop. */
1714
1715       if (length == 0) continue;
1716
1717       /* First, ensure the minimum number of matches are present. We get back
1718       the length of the reference string explicitly rather than passing the
1719       address of eptr, so that eptr can be a register variable. */
1720
1721       for (i = 1; i <= min; i++)
1722         {
1723         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1724         eptr += length;
1725         }
1726
1727       /* If min = max, continue at the same level without recursion.
1728       They are not both allowed to be zero. */
1729
1730       if (min == max) continue;
1731
1732       /* If minimizing, keep trying and advancing the pointer */
1733
1734       if (minimize)
1735         {
1736         for (fi = min;; fi++)
1737           {
1738           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1739           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1740           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1741             RRETURN(MATCH_NOMATCH);
1742           eptr += length;
1743           }
1744         /* Control never gets here */
1745         }
1746
1747       /* If maximizing, find the longest string and work backwards */
1748
1749       else
1750         {
1751         pp = eptr;
1752         for (i = min; i < max; i++)
1753           {
1754           if (!match_ref(offset, eptr, length, md, ims)) break;
1755           eptr += length;
1756           }
1757         while (eptr >= pp)
1758           {
1759           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1760           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1761           eptr -= length;
1762           }
1763         RRETURN(MATCH_NOMATCH);
1764         }
1765       }
1766     /* Control never gets here */
1767
1768
1769
1770     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1771     used when all the characters in the class have values in the range 0-255,
1772     and either the matching is caseful, or the characters are in the range
1773     0-127 when UTF-8 processing is enabled. The only difference between
1774     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1775     encountered.
1776
1777     First, look past the end of the item to see if there is repeat information
1778     following. Then obey similar code to character type repeats - written out
1779     again for speed. */
1780
1781     case OP_NCLASS:
1782     case OP_CLASS:
1783       {
1784       data = ecode + 1;                /* Save for matching */
1785       ecode += 33;                     /* Advance past the item */
1786
1787       switch (*ecode)
1788         {
1789         case OP_CRSTAR:
1790         case OP_CRMINSTAR:
1791         case OP_CRPLUS:
1792         case OP_CRMINPLUS:
1793         case OP_CRQUERY:
1794         case OP_CRMINQUERY:
1795         c = *ecode++ - OP_CRSTAR;
1796         minimize = (c & 1) != 0;
1797         min = rep_min[c];                 /* Pick up values from tables; */
1798         max = rep_max[c];                 /* zero for max => infinity */
1799         if (max == 0) max = INT_MAX;
1800         break;
1801
1802         case OP_CRRANGE:
1803         case OP_CRMINRANGE:
1804         minimize = (*ecode == OP_CRMINRANGE);
1805         min = GET2(ecode, 1);
1806         max = GET2(ecode, 3);
1807         if (max == 0) max = INT_MAX;
1808         ecode += 5;
1809         break;
1810
1811         default:               /* No repeat follows */
1812         min = max = 1;
1813         break;
1814         }
1815
1816       /* First, ensure the minimum number of matches are present. */
1817
1818 #ifdef SUPPORT_UTF8
1819       /* UTF-8 mode */
1820       if (utf8)
1821         {
1822         for (i = 1; i <= min; i++)
1823           {
1824           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1825           GETCHARINC(c, eptr);
1826           if (c > 255)
1827             {
1828             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1829             }
1830           else
1831             {
1832             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1833             }
1834           }
1835         }
1836       else
1837 #endif
1838       /* Not UTF-8 mode */
1839         {
1840         for (i = 1; i <= min; i++)
1841           {
1842           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1843           c = *eptr++;
1844           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1845           }
1846         }
1847
1848       /* If max == min we can continue with the main loop without the
1849       need to recurse. */
1850
1851       if (min == max) continue;
1852
1853       /* If minimizing, keep testing the rest of the expression and advancing
1854       the pointer while it matches the class. */
1855
1856       if (minimize)
1857         {
1858 #ifdef SUPPORT_UTF8
1859         /* UTF-8 mode */
1860         if (utf8)
1861           {
1862           for (fi = min;; fi++)
1863             {
1864             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1865             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1866             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1867             GETCHARINC(c, eptr);
1868             if (c > 255)
1869               {
1870               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1871               }
1872             else
1873               {
1874               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1875               }
1876             }
1877           }
1878         else
1879 #endif
1880         /* Not UTF-8 mode */
1881           {
1882           for (fi = min;; fi++)
1883             {
1884             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1885             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1886             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1887             c = *eptr++;
1888             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1889             }
1890           }
1891         /* Control never gets here */
1892         }
1893
1894       /* If maximizing, find the longest possible run, then work backwards. */
1895
1896       else
1897         {
1898         pp = eptr;
1899
1900 #ifdef SUPPORT_UTF8
1901         /* UTF-8 mode */
1902         if (utf8)
1903           {
1904           for (i = min; i < max; i++)
1905             {
1906             int len = 1;
1907             if (eptr >= md->end_subject) break;
1908             GETCHARLEN(c, eptr, len);
1909             if (c > 255)
1910               {
1911               if (op == OP_CLASS) break;
1912               }
1913             else
1914               {
1915               if ((data[c/8] & (1 << (c&7))) == 0) break;
1916               }
1917             eptr += len;
1918             }
1919           for (;;)
1920             {
1921             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1922             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1923             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1924             BACKCHAR(eptr);
1925             }
1926           }
1927         else
1928 #endif
1929           /* Not UTF-8 mode */
1930           {
1931           for (i = min; i < max; i++)
1932             {
1933             if (eptr >= md->end_subject) break;
1934             c = *eptr;
1935             if ((data[c/8] & (1 << (c&7))) == 0) break;
1936             eptr++;
1937             }
1938           while (eptr >= pp)
1939             {
1940             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1941             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1942             eptr--;
1943             }
1944           }
1945
1946         RRETURN(MATCH_NOMATCH);
1947         }
1948       }
1949     /* Control never gets here */
1950
1951
1952     /* Match an extended character class. This opcode is encountered only
1953     in UTF-8 mode, because that's the only time it is compiled. */
1954
1955 #ifdef SUPPORT_UTF8
1956     case OP_XCLASS:
1957       {
1958       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
1959       ecode += GET(ecode, 1);                      /* Advance past the item */
1960
1961       switch (*ecode)
1962         {
1963         case OP_CRSTAR:
1964         case OP_CRMINSTAR:
1965         case OP_CRPLUS:
1966         case OP_CRMINPLUS:
1967         case OP_CRQUERY:
1968         case OP_CRMINQUERY:
1969         c = *ecode++ - OP_CRSTAR;
1970         minimize = (c & 1) != 0;
1971         min = rep_min[c];                 /* Pick up values from tables; */
1972         max = rep_max[c];                 /* zero for max => infinity */
1973         if (max == 0) max = INT_MAX;
1974         break;
1975
1976         case OP_CRRANGE:
1977         case OP_CRMINRANGE:
1978         minimize = (*ecode == OP_CRMINRANGE);
1979         min = GET2(ecode, 1);
1980         max = GET2(ecode, 3);
1981         if (max == 0) max = INT_MAX;
1982         ecode += 5;
1983         break;
1984
1985         default:               /* No repeat follows */
1986         min = max = 1;
1987         break;
1988         }
1989
1990       /* First, ensure the minimum number of matches are present. */
1991
1992       for (i = 1; i <= min; i++)
1993         {
1994         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1995         GETCHARINC(c, eptr);
1996         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1997         }
1998
1999       /* If max == min we can continue with the main loop without the
2000       need to recurse. */
2001
2002       if (min == max) continue;
2003
2004       /* If minimizing, keep testing the rest of the expression and advancing
2005       the pointer while it matches the class. */
2006
2007       if (minimize)
2008         {
2009         for (fi = min;; fi++)
2010           {
2011           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2012           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2013           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2014           GETCHARINC(c, eptr);
2015           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2016           }
2017         /* Control never gets here */
2018         }
2019
2020       /* If maximizing, find the longest possible run, then work backwards. */
2021
2022       else
2023         {
2024         pp = eptr;
2025         for (i = min; i < max; i++)
2026           {
2027           int len = 1;
2028           if (eptr >= md->end_subject) break;
2029           GETCHARLEN(c, eptr, len);
2030           if (!_pcre_xclass(c, data)) break;
2031           eptr += len;
2032           }
2033         for(;;)
2034           {
2035           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2036           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2037           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2038           BACKCHAR(eptr)
2039           }
2040         RRETURN(MATCH_NOMATCH);
2041         }
2042
2043       /* Control never gets here */
2044       }
2045 #endif    /* End of XCLASS */
2046
2047     /* Match a single character, casefully */
2048
2049     case OP_CHAR:
2050 #ifdef SUPPORT_UTF8
2051     if (utf8)
2052       {
2053       length = 1;
2054       ecode++;
2055       GETCHARLEN(fc, ecode, length);
2056       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2057       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2058       }
2059     else
2060 #endif
2061
2062     /* Non-UTF-8 mode */
2063       {
2064       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2065       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2066       ecode += 2;
2067       }
2068     break;
2069
2070     /* Match a single character, caselessly */
2071
2072     case OP_CHARNC:
2073 #ifdef SUPPORT_UTF8
2074     if (utf8)
2075       {
2076       length = 1;
2077       ecode++;
2078       GETCHARLEN(fc, ecode, length);
2079
2080       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2081
2082       /* If the pattern character's value is < 128, we have only one byte, and
2083       can use the fast lookup table. */
2084
2085       if (fc < 128)
2086         {
2087         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2088         }
2089
2090       /* Otherwise we must pick up the subject character */
2091
2092       else
2093         {
2094         unsigned int dc;
2095         GETCHARINC(dc, eptr);
2096         ecode += length;
2097
2098         /* If we have Unicode property support, we can use it to test the other
2099         case of the character, if there is one. */
2100
2101         if (fc != dc)
2102           {
2103 #ifdef SUPPORT_UCP
2104           if (dc != _pcre_ucp_othercase(fc))
2105 #endif
2106             RRETURN(MATCH_NOMATCH);
2107           }
2108         }
2109       }
2110     else
2111 #endif   /* SUPPORT_UTF8 */
2112
2113     /* Non-UTF-8 mode */
2114       {
2115       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2116       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2117       ecode += 2;
2118       }
2119     break;
2120
2121     /* Match a single character repeatedly. */
2122
2123     case OP_EXACT:
2124     min = max = GET2(ecode, 1);
2125     ecode += 3;
2126     goto REPEATCHAR;
2127
2128     case OP_POSUPTO:
2129     possessive = TRUE;
2130     /* Fall through */
2131
2132     case OP_UPTO:
2133     case OP_MINUPTO:
2134     min = 0;
2135     max = GET2(ecode, 1);
2136     minimize = *ecode == OP_MINUPTO;
2137     ecode += 3;
2138     goto REPEATCHAR;
2139
2140     case OP_POSSTAR:
2141     possessive = TRUE;
2142     min = 0;
2143     max = INT_MAX;
2144     ecode++;
2145     goto REPEATCHAR;
2146
2147     case OP_POSPLUS:
2148     possessive = TRUE;
2149     min = 1;
2150     max = INT_MAX;
2151     ecode++;
2152     goto REPEATCHAR;
2153
2154     case OP_POSQUERY:
2155     possessive = TRUE;
2156     min = 0;
2157     max = 1;
2158     ecode++;
2159     goto REPEATCHAR;
2160
2161     case OP_STAR:
2162     case OP_MINSTAR:
2163     case OP_PLUS:
2164     case OP_MINPLUS:
2165     case OP_QUERY:
2166     case OP_MINQUERY:
2167     c = *ecode++ - OP_STAR;
2168     minimize = (c & 1) != 0;
2169     min = rep_min[c];                 /* Pick up values from tables; */
2170     max = rep_max[c];                 /* zero for max => infinity */
2171     if (max == 0) max = INT_MAX;
2172
2173     /* Common code for all repeated single-character matches. We can give
2174     up quickly if there are fewer than the minimum number of characters left in
2175     the subject. */
2176
2177     REPEATCHAR:
2178 #ifdef SUPPORT_UTF8
2179     if (utf8)
2180       {
2181       length = 1;
2182       charptr = ecode;
2183       GETCHARLEN(fc, ecode, length);
2184       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2185       ecode += length;
2186
2187       /* Handle multibyte character matching specially here. There is
2188       support for caseless matching if UCP support is present. */
2189
2190       if (length > 1)
2191         {
2192 #ifdef SUPPORT_UCP
2193         unsigned int othercase;
2194         if ((ims & PCRE_CASELESS) != 0 &&
2195             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2196           oclength = _pcre_ord2utf8(othercase, occhars);
2197         else oclength = 0;
2198 #endif  /* SUPPORT_UCP */
2199
2200         for (i = 1; i <= min; i++)
2201           {
2202           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2203 #ifdef SUPPORT_UCP
2204           /* Need braces because of following else */
2205           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2206           else
2207             {
2208             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2209             eptr += oclength;
2210             }
2211 #else   /* without SUPPORT_UCP */
2212           else { RRETURN(MATCH_NOMATCH); }
2213 #endif  /* SUPPORT_UCP */
2214           }
2215
2216         if (min == max) continue;
2217
2218         if (minimize)
2219           {
2220           for (fi = min;; fi++)
2221             {
2222             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2223             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2224             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2225             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2226 #ifdef SUPPORT_UCP
2227             /* Need braces because of following else */
2228             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2229             else
2230               {
2231               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2232               eptr += oclength;
2233               }
2234 #else   /* without SUPPORT_UCP */
2235             else { RRETURN (MATCH_NOMATCH); }
2236 #endif  /* SUPPORT_UCP */
2237             }
2238           /* Control never gets here */
2239           }
2240
2241         else  /* Maximize */
2242           {
2243           pp = eptr;
2244           for (i = min; i < max; i++)
2245             {
2246             if (eptr > md->end_subject - length) break;
2247             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2248 #ifdef SUPPORT_UCP
2249             else if (oclength == 0) break;
2250             else
2251               {
2252               if (memcmp(eptr, occhars, oclength) != 0) break;
2253               eptr += oclength;
2254               }
2255 #else   /* without SUPPORT_UCP */
2256             else break;
2257 #endif  /* SUPPORT_UCP */
2258             }
2259
2260           if (possessive) continue;
2261           for(;;)
2262            {
2263            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2264            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2265            if (eptr == pp) RRETURN(MATCH_NOMATCH);
2266 #ifdef SUPPORT_UCP
2267            eptr--;
2268            BACKCHAR(eptr);
2269 #else   /* without SUPPORT_UCP */
2270            eptr -= length;
2271 #endif  /* SUPPORT_UCP */
2272            }
2273           }
2274         /* Control never gets here */
2275         }
2276
2277       /* If the length of a UTF-8 character is 1, we fall through here, and
2278       obey the code as for non-UTF-8 characters below, though in this case the
2279       value of fc will always be < 128. */
2280       }
2281     else
2282 #endif  /* SUPPORT_UTF8 */
2283
2284     /* When not in UTF-8 mode, load a single-byte character. */
2285       {
2286       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2287       fc = *ecode++;
2288       }
2289
2290     /* The value of fc at this point is always less than 256, though we may or
2291     may not be in UTF-8 mode. The code is duplicated for the caseless and
2292     caseful cases, for speed, since matching characters is likely to be quite
2293     common. First, ensure the minimum number of matches are present. If min =
2294     max, continue at the same level without recursing. Otherwise, if
2295     minimizing, keep trying the rest of the expression and advancing one
2296     matching character if failing, up to the maximum. Alternatively, if
2297     maximizing, find the maximum number of characters and work backwards. */
2298
2299     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2300       max, eptr));
2301
2302     if ((ims & PCRE_CASELESS) != 0)
2303       {
2304       fc = md->lcc[fc];
2305       for (i = 1; i <= min; i++)
2306         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2307       if (min == max) continue;
2308       if (minimize)
2309         {
2310         for (fi = min;; fi++)
2311           {
2312           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2313           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2314           if (fi >= max || eptr >= md->end_subject ||
2315               fc != md->lcc[*eptr++])
2316             RRETURN(MATCH_NOMATCH);
2317           }
2318         /* Control never gets here */
2319         }
2320       else  /* Maximize */
2321         {
2322         pp = eptr;
2323         for (i = min; i < max; i++)
2324           {
2325           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2326           eptr++;
2327           }
2328         if (possessive) continue;
2329         while (eptr >= pp)
2330           {
2331           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2332           eptr--;
2333           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2334           }
2335         RRETURN(MATCH_NOMATCH);
2336         }
2337       /* Control never gets here */
2338       }
2339
2340     /* Caseful comparisons (includes all multi-byte characters) */
2341
2342     else
2343       {
2344       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2345       if (min == max) continue;
2346       if (minimize)
2347         {
2348         for (fi = min;; fi++)
2349           {
2350           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2351           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2352           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2353             RRETURN(MATCH_NOMATCH);
2354           }
2355         /* Control never gets here */
2356         }
2357       else  /* Maximize */
2358         {
2359         pp = eptr;
2360         for (i = min; i < max; i++)
2361           {
2362           if (eptr >= md->end_subject || fc != *eptr) break;
2363           eptr++;
2364           }
2365         if (possessive) continue;
2366         while (eptr >= pp)
2367           {
2368           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2369           eptr--;
2370           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2371           }
2372         RRETURN(MATCH_NOMATCH);
2373         }
2374       }
2375     /* Control never gets here */
2376
2377     /* Match a negated single one-byte character. The character we are
2378     checking can be multibyte. */
2379
2380     case OP_NOT:
2381     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2382     ecode++;
2383     GETCHARINCTEST(c, eptr);
2384     if ((ims & PCRE_CASELESS) != 0)
2385       {
2386 #ifdef SUPPORT_UTF8
2387       if (c < 256)
2388 #endif
2389       c = md->lcc[c];
2390       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2391       }
2392     else
2393       {
2394       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2395       }
2396     break;
2397
2398     /* Match a negated single one-byte character repeatedly. This is almost a
2399     repeat of the code for a repeated single character, but I haven't found a
2400     nice way of commoning these up that doesn't require a test of the
2401     positive/negative option for each character match. Maybe that wouldn't add
2402     very much to the time taken, but character matching *is* what this is all
2403     about... */
2404
2405     case OP_NOTEXACT:
2406     min = max = GET2(ecode, 1);
2407     ecode += 3;
2408     goto REPEATNOTCHAR;
2409
2410     case OP_NOTUPTO:
2411     case OP_NOTMINUPTO:
2412     min = 0;
2413     max = GET2(ecode, 1);
2414     minimize = *ecode == OP_NOTMINUPTO;
2415     ecode += 3;
2416     goto REPEATNOTCHAR;
2417
2418     case OP_NOTPOSSTAR:
2419     possessive = TRUE;
2420     min = 0;
2421     max = INT_MAX;
2422     ecode++;
2423     goto REPEATNOTCHAR;
2424
2425     case OP_NOTPOSPLUS:
2426     possessive = TRUE;
2427     min = 1;
2428     max = INT_MAX;
2429     ecode++;
2430     goto REPEATNOTCHAR;
2431
2432     case OP_NOTPOSQUERY:
2433     possessive = TRUE;
2434     min = 0;
2435     max = 1;
2436     ecode++;
2437     goto REPEATNOTCHAR;
2438
2439     case OP_NOTPOSUPTO:
2440     possessive = TRUE;
2441     min = 0;
2442     max = GET2(ecode, 1);
2443     ecode += 3;
2444     goto REPEATNOTCHAR;
2445
2446     case OP_NOTSTAR:
2447     case OP_NOTMINSTAR:
2448     case OP_NOTPLUS:
2449     case OP_NOTMINPLUS:
2450     case OP_NOTQUERY:
2451     case OP_NOTMINQUERY:
2452     c = *ecode++ - OP_NOTSTAR;
2453     minimize = (c & 1) != 0;
2454     min = rep_min[c];                 /* Pick up values from tables; */
2455     max = rep_max[c];                 /* zero for max => infinity */
2456     if (max == 0) max = INT_MAX;
2457
2458     /* Common code for all repeated single-byte matches. We can give up quickly
2459     if there are fewer than the minimum number of bytes left in the
2460     subject. */
2461
2462     REPEATNOTCHAR:
2463     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2464     fc = *ecode++;
2465
2466     /* The code is duplicated for the caseless and caseful cases, for speed,
2467     since matching characters is likely to be quite common. First, ensure the
2468     minimum number of matches are present. If min = max, continue at the same
2469     level without recursing. Otherwise, if minimizing, keep trying the rest of
2470     the expression and advancing one matching character if failing, up to the
2471     maximum. Alternatively, if maximizing, find the maximum number of
2472     characters and work backwards. */
2473
2474     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2475       max, eptr));
2476
2477     if ((ims & PCRE_CASELESS) != 0)
2478       {
2479       fc = md->lcc[fc];
2480
2481 #ifdef SUPPORT_UTF8
2482       /* UTF-8 mode */
2483       if (utf8)
2484         {
2485         register unsigned int d;
2486         for (i = 1; i <= min; i++)
2487           {
2488           GETCHARINC(d, eptr);
2489           if (d < 256) d = md->lcc[d];
2490           if (fc == d) RRETURN(MATCH_NOMATCH);
2491           }
2492         }
2493       else
2494 #endif
2495
2496       /* Not UTF-8 mode */
2497         {
2498         for (i = 1; i <= min; i++)
2499           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2500         }
2501
2502       if (min == max) continue;
2503
2504       if (minimize)
2505         {
2506 #ifdef SUPPORT_UTF8
2507         /* UTF-8 mode */
2508         if (utf8)
2509           {
2510           register unsigned int d;
2511           for (fi = min;; fi++)
2512             {
2513             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2514             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2515             GETCHARINC(d, eptr);
2516             if (d < 256) d = md->lcc[d];
2517             if (fi >= max || eptr >= md->end_subject || fc == d)
2518               RRETURN(MATCH_NOMATCH);
2519             }
2520           }
2521         else
2522 #endif
2523         /* Not UTF-8 mode */
2524           {
2525           for (fi = min;; fi++)
2526             {
2527             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2528             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2529             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2530               RRETURN(MATCH_NOMATCH);
2531             }
2532           }
2533         /* Control never gets here */
2534         }
2535
2536       /* Maximize case */
2537
2538       else
2539         {
2540         pp = eptr;
2541
2542 #ifdef SUPPORT_UTF8
2543         /* UTF-8 mode */
2544         if (utf8)
2545           {
2546           register unsigned int d;
2547           for (i = min; i < max; i++)
2548             {
2549             int len = 1;
2550             if (eptr >= md->end_subject) break;
2551             GETCHARLEN(d, eptr, len);
2552             if (d < 256) d = md->lcc[d];
2553             if (fc == d) break;
2554             eptr += len;
2555             }
2556         if (possessive) continue;
2557         for(;;)
2558             {
2559             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2560             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2561             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2562             BACKCHAR(eptr);
2563             }
2564           }
2565         else
2566 #endif
2567         /* Not UTF-8 mode */
2568           {
2569           for (i = min; i < max; i++)
2570             {
2571             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2572             eptr++;
2573             }
2574           if (possessive) continue;
2575           while (eptr >= pp)
2576             {
2577             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2578             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2579             eptr--;
2580             }
2581           }
2582
2583         RRETURN(MATCH_NOMATCH);
2584         }
2585       /* Control never gets here */
2586       }
2587
2588     /* Caseful comparisons */
2589
2590     else
2591       {
2592 #ifdef SUPPORT_UTF8
2593       /* UTF-8 mode */
2594       if (utf8)
2595         {
2596         register unsigned int d;
2597         for (i = 1; i <= min; i++)
2598           {
2599           GETCHARINC(d, eptr);
2600           if (fc == d) RRETURN(MATCH_NOMATCH);
2601           }
2602         }
2603       else
2604 #endif
2605       /* Not UTF-8 mode */
2606         {
2607         for (i = 1; i <= min; i++)
2608           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2609         }
2610
2611       if (min == max) continue;
2612
2613       if (minimize)
2614         {
2615 #ifdef SUPPORT_UTF8
2616         /* UTF-8 mode */
2617         if (utf8)
2618           {
2619           register unsigned int d;
2620           for (fi = min;; fi++)
2621             {
2622             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2623             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2624             GETCHARINC(d, eptr);
2625             if (fi >= max || eptr >= md->end_subject || fc == d)
2626               RRETURN(MATCH_NOMATCH);
2627             }
2628           }
2629         else
2630 #endif
2631         /* Not UTF-8 mode */
2632           {
2633           for (fi = min;; fi++)
2634             {
2635             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2636             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2637             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2638               RRETURN(MATCH_NOMATCH);
2639             }
2640           }
2641         /* Control never gets here */
2642         }
2643
2644       /* Maximize case */
2645
2646       else
2647         {
2648         pp = eptr;
2649
2650 #ifdef SUPPORT_UTF8
2651         /* UTF-8 mode */
2652         if (utf8)
2653           {
2654           register unsigned int d;
2655           for (i = min; i < max; i++)
2656             {
2657             int len = 1;
2658             if (eptr >= md->end_subject) break;
2659             GETCHARLEN(d, eptr, len);
2660             if (fc == d) break;
2661             eptr += len;
2662             }
2663           if (possessive) continue;
2664           for(;;)
2665             {
2666             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2667             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2668             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2669             BACKCHAR(eptr);
2670             }
2671           }
2672         else
2673 #endif
2674         /* Not UTF-8 mode */
2675           {
2676           for (i = min; i < max; i++)
2677             {
2678             if (eptr >= md->end_subject || fc == *eptr) break;
2679             eptr++;
2680             }
2681           if (possessive) continue;
2682           while (eptr >= pp)
2683             {
2684             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2685             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2686             eptr--;
2687             }
2688           }
2689
2690         RRETURN(MATCH_NOMATCH);
2691         }
2692       }
2693     /* Control never gets here */
2694
2695     /* Match a single character type repeatedly; several different opcodes
2696     share code. This is very similar to the code for single characters, but we
2697     repeat it in the interests of efficiency. */
2698
2699     case OP_TYPEEXACT:
2700     min = max = GET2(ecode, 1);
2701     minimize = TRUE;
2702     ecode += 3;
2703     goto REPEATTYPE;
2704
2705     case OP_TYPEUPTO:
2706     case OP_TYPEMINUPTO:
2707     min = 0;
2708     max = GET2(ecode, 1);
2709     minimize = *ecode == OP_TYPEMINUPTO;
2710     ecode += 3;
2711     goto REPEATTYPE;
2712
2713     case OP_TYPEPOSSTAR:
2714     possessive = TRUE;
2715     min = 0;
2716     max = INT_MAX;
2717     ecode++;
2718     goto REPEATTYPE;
2719
2720     case OP_TYPEPOSPLUS:
2721     possessive = TRUE;
2722     min = 1;
2723     max = INT_MAX;
2724     ecode++;
2725     goto REPEATTYPE;
2726
2727     case OP_TYPEPOSQUERY:
2728     possessive = TRUE;
2729     min = 0;
2730     max = 1;
2731     ecode++;
2732     goto REPEATTYPE;
2733
2734     case OP_TYPEPOSUPTO:
2735     possessive = TRUE;
2736     min = 0;
2737     max = GET2(ecode, 1);
2738     ecode += 3;
2739     goto REPEATTYPE;
2740
2741     case OP_TYPESTAR:
2742     case OP_TYPEMINSTAR:
2743     case OP_TYPEPLUS:
2744     case OP_TYPEMINPLUS:
2745     case OP_TYPEQUERY:
2746     case OP_TYPEMINQUERY:
2747     c = *ecode++ - OP_TYPESTAR;
2748     minimize = (c & 1) != 0;
2749     min = rep_min[c];                 /* Pick up values from tables; */
2750     max = rep_max[c];                 /* zero for max => infinity */
2751     if (max == 0) max = INT_MAX;
2752
2753     /* Common code for all repeated single character type matches. Note that
2754     in UTF-8 mode, '.' matches a character of any length, but for the other
2755     character types, the valid characters are all one-byte long. */
2756
2757     REPEATTYPE:
2758     ctype = *ecode++;      /* Code for the character type */
2759
2760 #ifdef SUPPORT_UCP
2761     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2762       {
2763       prop_fail_result = ctype == OP_NOTPROP;
2764       prop_type = *ecode++;
2765       prop_value = *ecode++;
2766       }
2767     else prop_type = -1;
2768 #endif
2769
2770     /* First, ensure the minimum number of matches are present. Use inline
2771     code for maximizing the speed, and do the type test once at the start
2772     (i.e. keep it out of the loop). Also we can test that there are at least
2773     the minimum number of bytes before we start. This isn't as effective in
2774     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2775     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2776     and single-bytes. */
2777
2778     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2779     if (min > 0)
2780       {
2781 #ifdef SUPPORT_UCP
2782       if (prop_type >= 0)
2783         {
2784         switch(prop_type)
2785           {
2786           case PT_ANY:
2787           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2788           for (i = 1; i <= min; i++)
2789             {
2790             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2791             GETCHARINCTEST(c, eptr);
2792             }
2793           break;
2794
2795           case PT_LAMP:
2796           for (i = 1; i <= min; i++)
2797             {
2798             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2799             GETCHARINCTEST(c, eptr);
2800             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2801             if ((prop_chartype == ucp_Lu ||
2802                  prop_chartype == ucp_Ll ||
2803                  prop_chartype == ucp_Lt) == prop_fail_result)
2804               RRETURN(MATCH_NOMATCH);
2805             }
2806           break;
2807
2808           case PT_GC:
2809           for (i = 1; i <= min; i++)
2810             {
2811             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2812             GETCHARINCTEST(c, eptr);
2813             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2814             if ((prop_category == prop_value) == prop_fail_result)
2815               RRETURN(MATCH_NOMATCH);
2816             }
2817           break;
2818
2819           case PT_PC:
2820           for (i = 1; i <= min; i++)
2821             {
2822             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823             GETCHARINCTEST(c, eptr);
2824             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825             if ((prop_chartype == prop_value) == prop_fail_result)
2826               RRETURN(MATCH_NOMATCH);
2827             }
2828           break;
2829
2830           case PT_SC:
2831           for (i = 1; i <= min; i++)
2832             {
2833             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834             GETCHARINCTEST(c, eptr);
2835             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836             if ((prop_script == prop_value) == prop_fail_result)
2837               RRETURN(MATCH_NOMATCH);
2838             }
2839           break;
2840
2841           default:
2842           RRETURN(PCRE_ERROR_INTERNAL);
2843           }
2844         }
2845
2846       /* Match extended Unicode sequences. We will get here only if the
2847       support is in the binary; otherwise a compile-time error occurs. */
2848
2849       else if (ctype == OP_EXTUNI)
2850         {
2851         for (i = 1; i <= min; i++)
2852           {
2853           GETCHARINCTEST(c, eptr);
2854           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2855           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2856           while (eptr < md->end_subject)
2857             {
2858             int len = 1;
2859             if (!utf8) c = *eptr; else
2860               {
2861               GETCHARLEN(c, eptr, len);
2862               }
2863             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2864             if (prop_category != ucp_M) break;
2865             eptr += len;
2866             }
2867           }
2868         }
2869
2870       else
2871 #endif     /* SUPPORT_UCP */
2872
2873 /* Handle all other cases when the coding is UTF-8 */
2874
2875 #ifdef SUPPORT_UTF8
2876       if (utf8) switch(ctype)
2877         {
2878         case OP_ANY:
2879         for (i = 1; i <= min; i++)
2880           {
2881           if (eptr >= md->end_subject ||
2882                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2883             RRETURN(MATCH_NOMATCH);
2884           eptr++;
2885           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2886           }
2887         break;
2888
2889         case OP_ANYBYTE:
2890         eptr += min;
2891         break;
2892
2893         case OP_ANYNL:
2894         for (i = 1; i <= min; i++)
2895           {
2896           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2897           GETCHARINC(c, eptr);
2898           switch(c)
2899             {
2900             default: RRETURN(MATCH_NOMATCH);
2901             case 0x000d:
2902             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2903             break;
2904             case 0x000a:
2905             case 0x000b:
2906             case 0x000c:
2907             case 0x0085:
2908             case 0x2028:
2909             case 0x2029:
2910             break;
2911             }
2912           }
2913         break;
2914
2915         case OP_NOT_HSPACE:
2916         for (i = 1; i <= min; i++)
2917           {
2918           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2919           GETCHARINC(c, eptr);
2920           switch(c)
2921             {
2922             default: break;
2923             case 0x09:      /* HT */
2924             case 0x20:      /* SPACE */
2925             case 0xa0:      /* NBSP */
2926             case 0x1680:    /* OGHAM SPACE MARK */
2927             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2928             case 0x2000:    /* EN QUAD */
2929             case 0x2001:    /* EM QUAD */
2930             case 0x2002:    /* EN SPACE */
2931             case 0x2003:    /* EM SPACE */
2932             case 0x2004:    /* THREE-PER-EM SPACE */
2933             case 0x2005:    /* FOUR-PER-EM SPACE */
2934             case 0x2006:    /* SIX-PER-EM SPACE */
2935             case 0x2007:    /* FIGURE SPACE */
2936             case 0x2008:    /* PUNCTUATION SPACE */
2937             case 0x2009:    /* THIN SPACE */
2938             case 0x200A:    /* HAIR SPACE */
2939             case 0x202f:    /* NARROW NO-BREAK SPACE */
2940             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2941             case 0x3000:    /* IDEOGRAPHIC SPACE */
2942             RRETURN(MATCH_NOMATCH);
2943             }
2944           }
2945         break;
2946
2947         case OP_HSPACE:
2948         for (i = 1; i <= min; i++)
2949           {
2950           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951           GETCHARINC(c, eptr);
2952           switch(c)
2953             {
2954             default: RRETURN(MATCH_NOMATCH);
2955             case 0x09:      /* HT */
2956             case 0x20:      /* SPACE */
2957             case 0xa0:      /* NBSP */
2958             case 0x1680:    /* OGHAM SPACE MARK */
2959             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2960             case 0x2000:    /* EN QUAD */
2961             case 0x2001:    /* EM QUAD */
2962             case 0x2002:    /* EN SPACE */
2963             case 0x2003:    /* EM SPACE */
2964             case 0x2004:    /* THREE-PER-EM SPACE */
2965             case 0x2005:    /* FOUR-PER-EM SPACE */
2966             case 0x2006:    /* SIX-PER-EM SPACE */
2967             case 0x2007:    /* FIGURE SPACE */
2968             case 0x2008:    /* PUNCTUATION SPACE */
2969             case 0x2009:    /* THIN SPACE */
2970             case 0x200A:    /* HAIR SPACE */
2971             case 0x202f:    /* NARROW NO-BREAK SPACE */
2972             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2973             case 0x3000:    /* IDEOGRAPHIC SPACE */
2974             break;
2975             }
2976           }
2977         break;
2978
2979         case OP_NOT_VSPACE:
2980         for (i = 1; i <= min; i++)
2981           {
2982           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2983           GETCHARINC(c, eptr);
2984           switch(c)
2985             {
2986             default: break;
2987             case 0x0a:      /* LF */
2988             case 0x0b:      /* VT */
2989             case 0x0c:      /* FF */
2990             case 0x0d:      /* CR */
2991             case 0x85:      /* NEL */
2992             case 0x2028:    /* LINE SEPARATOR */
2993             case 0x2029:    /* PARAGRAPH SEPARATOR */
2994             RRETURN(MATCH_NOMATCH);
2995             }
2996           }
2997         break;
2998
2999         case OP_VSPACE:
3000         for (i = 1; i <= min; i++)
3001           {
3002           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3003           GETCHARINC(c, eptr);
3004           switch(c)
3005             {
3006             default: RRETURN(MATCH_NOMATCH);
3007             case 0x0a:      /* LF */
3008             case 0x0b:      /* VT */
3009             case 0x0c:      /* FF */
3010             case 0x0d:      /* CR */
3011             case 0x85:      /* NEL */
3012             case 0x2028:    /* LINE SEPARATOR */
3013             case 0x2029:    /* PARAGRAPH SEPARATOR */
3014             break;
3015             }
3016           }
3017         break;
3018
3019         case OP_NOT_DIGIT:
3020         for (i = 1; i <= min; i++)
3021           {
3022           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3023           GETCHARINC(c, eptr);
3024           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3025             RRETURN(MATCH_NOMATCH);
3026           }
3027         break;
3028
3029         case OP_DIGIT:
3030         for (i = 1; i <= min; i++)
3031           {
3032           if (eptr >= md->end_subject ||
3033              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3034             RRETURN(MATCH_NOMATCH);
3035           /* No need to skip more bytes - we know it's a 1-byte character */
3036           }
3037         break;
3038
3039         case OP_NOT_WHITESPACE:
3040         for (i = 1; i <= min; i++)
3041           {
3042           if (eptr >= md->end_subject ||
3043              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3044             RRETURN(MATCH_NOMATCH);
3045           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3046           }
3047         break;
3048
3049         case OP_WHITESPACE:
3050         for (i = 1; i <= min; i++)
3051           {
3052           if (eptr >= md->end_subject ||
3053              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3054             RRETURN(MATCH_NOMATCH);
3055           /* No need to skip more bytes - we know it's a 1-byte character */
3056           }
3057         break;
3058
3059         case OP_NOT_WORDCHAR:
3060         for (i = 1; i <= min; i++)
3061           {
3062           if (eptr >= md->end_subject ||
3063              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3064             RRETURN(MATCH_NOMATCH);
3065           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3066           }
3067         break;
3068
3069         case OP_WORDCHAR:
3070         for (i = 1; i <= min; i++)
3071           {
3072           if (eptr >= md->end_subject ||
3073              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3074             RRETURN(MATCH_NOMATCH);
3075           /* No need to skip more bytes - we know it's a 1-byte character */
3076           }
3077         break;
3078
3079         default:
3080         RRETURN(PCRE_ERROR_INTERNAL);
3081         }  /* End switch(ctype) */
3082
3083       else
3084 #endif     /* SUPPORT_UTF8 */
3085
3086       /* Code for the non-UTF-8 case for minimum matching of operators other
3087       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3088       number of bytes present, as this was tested above. */
3089
3090       switch(ctype)
3091         {
3092         case OP_ANY:
3093         if ((ims & PCRE_DOTALL) == 0)
3094           {
3095           for (i = 1; i <= min; i++)
3096             {
3097             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3098             eptr++;
3099             }
3100           }
3101         else eptr += min;
3102         break;
3103
3104         case OP_ANYBYTE:
3105         eptr += min;
3106         break;
3107
3108         /* Because of the CRLF case, we can't assume the minimum number of
3109         bytes are present in this case. */
3110
3111         case OP_ANYNL:
3112         for (i = 1; i <= min; i++)
3113           {
3114           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3115           switch(*eptr++)
3116             {
3117             default: RRETURN(MATCH_NOMATCH);
3118             case 0x000d:
3119             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120             break;
3121             case 0x000a:
3122             case 0x000b:
3123             case 0x000c:
3124             case 0x0085:
3125             break;
3126             }
3127           }
3128         break;
3129
3130         case OP_NOT_HSPACE:
3131         for (i = 1; i <= min; i++)
3132           {
3133           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3134           switch(*eptr++)
3135             {
3136             default: break;
3137             case 0x09:      /* HT */
3138             case 0x20:      /* SPACE */
3139             case 0xa0:      /* NBSP */
3140             RRETURN(MATCH_NOMATCH);
3141             }
3142           }
3143         break;
3144
3145         case OP_HSPACE:
3146         for (i = 1; i <= min; i++)
3147           {
3148           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3149           switch(*eptr++)
3150             {
3151             default: RRETURN(MATCH_NOMATCH);
3152             case 0x09:      /* HT */
3153             case 0x20:      /* SPACE */
3154             case 0xa0:      /* NBSP */
3155             break;
3156             }
3157           }
3158         break;
3159
3160         case OP_NOT_VSPACE:
3161         for (i = 1; i <= min; i++)
3162           {
3163           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3164           switch(*eptr++)
3165             {
3166             default: break;
3167             case 0x0a:      /* LF */
3168             case 0x0b:      /* VT */
3169             case 0x0c:      /* FF */
3170             case 0x0d:      /* CR */
3171             case 0x85:      /* NEL */
3172             RRETURN(MATCH_NOMATCH);
3173             }
3174           }
3175         break;
3176
3177         case OP_VSPACE:
3178         for (i = 1; i <= min; i++)
3179           {
3180           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3181           switch(*eptr++)
3182             {
3183             default: RRETURN(MATCH_NOMATCH);
3184             case 0x0a:      /* LF */
3185             case 0x0b:      /* VT */
3186             case 0x0c:      /* FF */
3187             case 0x0d:      /* CR */
3188             case 0x85:      /* NEL */
3189             break;
3190             }
3191           }
3192         break;
3193
3194         case OP_NOT_DIGIT:
3195         for (i = 1; i <= min; i++)
3196           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3197         break;
3198
3199         case OP_DIGIT:
3200         for (i = 1; i <= min; i++)
3201           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3202         break;
3203
3204         case OP_NOT_WHITESPACE:
3205         for (i = 1; i <= min; i++)
3206           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3207         break;
3208
3209         case OP_WHITESPACE:
3210         for (i = 1; i <= min; i++)
3211           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3212         break;
3213
3214         case OP_NOT_WORDCHAR:
3215         for (i = 1; i <= min; i++)
3216           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3217             RRETURN(MATCH_NOMATCH);
3218         break;
3219
3220         case OP_WORDCHAR:
3221         for (i = 1; i <= min; i++)
3222           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3223             RRETURN(MATCH_NOMATCH);
3224         break;
3225
3226         default:
3227         RRETURN(PCRE_ERROR_INTERNAL);
3228         }
3229       }
3230
3231     /* If min = max, continue at the same level without recursing */
3232
3233     if (min == max) continue;
3234
3235     /* If minimizing, we have to test the rest of the pattern before each
3236     subsequent match. Again, separate the UTF-8 case for speed, and also
3237     separate the UCP cases. */
3238
3239     if (minimize)
3240       {
3241 #ifdef SUPPORT_UCP
3242       if (prop_type >= 0)
3243         {
3244         switch(prop_type)
3245           {
3246           case PT_ANY:
3247           for (fi = min;; fi++)
3248             {
3249             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3250             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3251             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3252             GETCHARINC(c, eptr);
3253             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3254             }
3255           /* Control never gets here */
3256
3257           case PT_LAMP:
3258           for (fi = min;; fi++)
3259             {
3260             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3261             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263             GETCHARINC(c, eptr);
3264             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3265             if ((prop_chartype == ucp_Lu ||
3266                  prop_chartype == ucp_Ll ||
3267                  prop_chartype == ucp_Lt) == prop_fail_result)
3268               RRETURN(MATCH_NOMATCH);
3269             }
3270           /* Control never gets here */
3271
3272           case PT_GC:
3273           for (fi = min;; fi++)
3274             {
3275             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3276             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3278             GETCHARINC(c, eptr);
3279             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3280             if ((prop_category == prop_value) == prop_fail_result)
3281               RRETURN(MATCH_NOMATCH);
3282             }
3283           /* Control never gets here */
3284
3285           case PT_PC:
3286           for (fi = min;; fi++)
3287             {
3288             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3289             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3290             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3291             GETCHARINC(c, eptr);
3292             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293             if ((prop_chartype == prop_value) == prop_fail_result)
3294               RRETURN(MATCH_NOMATCH);
3295             }
3296           /* Control never gets here */
3297
3298           case PT_SC:
3299           for (fi = min;; fi++)
3300             {
3301             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3302             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3303             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3304             GETCHARINC(c, eptr);
3305             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3306             if ((prop_script == prop_value) == prop_fail_result)
3307               RRETURN(MATCH_NOMATCH);
3308             }
3309           /* Control never gets here */
3310
3311           default:
3312           RRETURN(PCRE_ERROR_INTERNAL);
3313           }
3314         }
3315
3316       /* Match extended Unicode sequences. We will get here only if the
3317       support is in the binary; otherwise a compile-time error occurs. */
3318
3319       else if (ctype == OP_EXTUNI)
3320         {
3321         for (fi = min;; fi++)
3322           {
3323           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3324           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3325           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3326           GETCHARINCTEST(c, eptr);
3327           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3328           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3329           while (eptr < md->end_subject)
3330             {
3331             int len = 1;
3332             if (!utf8) c = *eptr; else
3333               {
3334               GETCHARLEN(c, eptr, len);
3335               }
3336             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3337             if (prop_category != ucp_M) break;
3338             eptr += len;
3339             }
3340           }
3341         }
3342
3343       else
3344 #endif     /* SUPPORT_UCP */
3345
3346 #ifdef SUPPORT_UTF8
3347       /* UTF-8 mode */
3348       if (utf8)
3349         {
3350         for (fi = min;; fi++)
3351           {
3352           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3353           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3354           if (fi >= max || eptr >= md->end_subject ||
3355                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3356                 IS_NEWLINE(eptr)))
3357             RRETURN(MATCH_NOMATCH);
3358
3359           GETCHARINC(c, eptr);
3360           switch(ctype)
3361             {
3362             case OP_ANY:        /* This is the DOTALL case */
3363             break;
3364
3365             case OP_ANYBYTE:
3366             break;
3367
3368             case OP_ANYNL:
3369             switch(c)
3370               {
3371               default: RRETURN(MATCH_NOMATCH);
3372               case 0x000d:
3373               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3374               break;
3375               case 0x000a:
3376               case 0x000b:
3377               case 0x000c:
3378               case 0x0085:
3379               case 0x2028:
3380               case 0x2029:
3381               break;
3382               }
3383             break;
3384
3385             case OP_NOT_HSPACE:
3386             switch(c)
3387               {
3388               default: break;
3389               case 0x09:      /* HT */
3390               case 0x20:      /* SPACE */
3391               case 0xa0:      /* NBSP */
3392               case 0x1680:    /* OGHAM SPACE MARK */
3393               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3394               case 0x2000:    /* EN QUAD */
3395               case 0x2001:    /* EM QUAD */
3396               case 0x2002:    /* EN SPACE */
3397               case 0x2003:    /* EM SPACE */
3398               case 0x2004:    /* THREE-PER-EM SPACE */
3399               case 0x2005:    /* FOUR-PER-EM SPACE */
3400               case 0x2006:    /* SIX-PER-EM SPACE */
3401               case 0x2007:    /* FIGURE SPACE */
3402               case 0x2008:    /* PUNCTUATION SPACE */
3403               case 0x2009:    /* THIN SPACE */
3404               case 0x200A:    /* HAIR SPACE */
3405               case 0x202f:    /* NARROW NO-BREAK SPACE */
3406               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3407               case 0x3000:    /* IDEOGRAPHIC SPACE */
3408               RRETURN(MATCH_NOMATCH);
3409               }
3410             break;
3411
3412             case OP_HSPACE:
3413             switch(c)
3414               {
3415               default: RRETURN(MATCH_NOMATCH);
3416               case 0x09:      /* HT */
3417               case 0x20:      /* SPACE */
3418               case 0xa0:      /* NBSP */
3419               case 0x1680:    /* OGHAM SPACE MARK */
3420               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3421               case 0x2000:    /* EN QUAD */
3422               case 0x2001:    /* EM QUAD */
3423               case 0x2002:    /* EN SPACE */
3424               case 0x2003:    /* EM SPACE */
3425               case 0x2004:    /* THREE-PER-EM SPACE */
3426               case 0x2005:    /* FOUR-PER-EM SPACE */
3427               case 0x2006:    /* SIX-PER-EM SPACE */
3428               case 0x2007:    /* FIGURE SPACE */
3429               case 0x2008:    /* PUNCTUATION SPACE */
3430               case 0x2009:    /* THIN SPACE */
3431               case 0x200A:    /* HAIR SPACE */
3432               case 0x202f:    /* NARROW NO-BREAK SPACE */
3433               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3434               case 0x3000:    /* IDEOGRAPHIC SPACE */
3435               break;
3436               }
3437             break;
3438
3439             case OP_NOT_VSPACE:
3440             switch(c)
3441               {
3442               default: break;
3443               case 0x0a:      /* LF */
3444               case 0x0b:      /* VT */
3445               case 0x0c:      /* FF */
3446               case 0x0d:      /* CR */
3447               case 0x85:      /* NEL */
3448               case 0x2028:    /* LINE SEPARATOR */
3449               case 0x2029:    /* PARAGRAPH SEPARATOR */
3450               RRETURN(MATCH_NOMATCH);
3451               }
3452             break;
3453
3454             case OP_VSPACE:
3455             switch(c)
3456               {
3457               default: RRETURN(MATCH_NOMATCH);
3458               case 0x0a:      /* LF */
3459               case 0x0b:      /* VT */
3460               case 0x0c:      /* FF */
3461               case 0x0d:      /* CR */
3462               case 0x85:      /* NEL */
3463               case 0x2028:    /* LINE SEPARATOR */
3464               case 0x2029:    /* PARAGRAPH SEPARATOR */
3465               break;
3466               }
3467             break;
3468
3469             case OP_NOT_DIGIT:
3470             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3471               RRETURN(MATCH_NOMATCH);
3472             break;
3473
3474             case OP_DIGIT:
3475             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3476               RRETURN(MATCH_NOMATCH);
3477             break;
3478
3479             case OP_NOT_WHITESPACE:
3480             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3481               RRETURN(MATCH_NOMATCH);
3482             break;
3483
3484             case OP_WHITESPACE:
3485             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3486               RRETURN(MATCH_NOMATCH);
3487             break;
3488
3489             case OP_NOT_WORDCHAR:
3490             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3491               RRETURN(MATCH_NOMATCH);
3492             break;
3493
3494             case OP_WORDCHAR:
3495             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3496               RRETURN(MATCH_NOMATCH);
3497             break;
3498
3499             default:
3500             RRETURN(PCRE_ERROR_INTERNAL);
3501             }
3502           }
3503         }
3504       else
3505 #endif
3506       /* Not UTF-8 mode */
3507         {
3508         for (fi = min;; fi++)
3509           {
3510           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3511           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3512           if (fi >= max || eptr >= md->end_subject ||
3513                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3514             RRETURN(MATCH_NOMATCH);
3515
3516           c = *eptr++;
3517           switch(ctype)
3518             {
3519             case OP_ANY:   /* This is the DOTALL case */
3520             break;
3521
3522             case OP_ANYBYTE:
3523             break;
3524
3525             case OP_ANYNL:
3526             switch(c)
3527               {
3528               default: RRETURN(MATCH_NOMATCH);
3529               case 0x000d:
3530               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3531               break;
3532               case 0x000a:
3533               case 0x000b:
3534               case 0x000c:
3535               case 0x0085:
3536               break;
3537               }
3538             break;
3539
3540             case OP_NOT_HSPACE:
3541             switch(c)
3542               {
3543               default: break;
3544               case 0x09:      /* HT */
3545               case 0x20:      /* SPACE */
3546               case 0xa0:      /* NBSP */
3547               RRETURN(MATCH_NOMATCH);
3548               }
3549             break;
3550
3551             case OP_HSPACE:
3552             switch(c)
3553               {
3554               default: RRETURN(MATCH_NOMATCH);
3555               case 0x09:      /* HT */
3556               case 0x20:      /* SPACE */
3557               case 0xa0:      /* NBSP */
3558               break;
3559               }
3560             break;
3561
3562             case OP_NOT_VSPACE:
3563             switch(c)
3564               {
3565               default: break;
3566               case 0x0a:      /* LF */
3567               case 0x0b:      /* VT */
3568               case 0x0c:      /* FF */
3569               case 0x0d:      /* CR */
3570               case 0x85:      /* NEL */
3571               RRETURN(MATCH_NOMATCH);
3572               }
3573             break;
3574
3575             case OP_VSPACE:
3576             switch(c)
3577               {
3578               default: RRETURN(MATCH_NOMATCH);
3579               case 0x0a:      /* LF */
3580               case 0x0b:      /* VT */
3581               case 0x0c:      /* FF */
3582               case 0x0d:      /* CR */
3583               case 0x85:      /* NEL */
3584               break;
3585               }
3586             break;
3587
3588             case OP_NOT_DIGIT:
3589             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3590             break;
3591
3592             case OP_DIGIT:
3593             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3594             break;
3595
3596             case OP_NOT_WHITESPACE:
3597             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3598             break;
3599
3600             case OP_WHITESPACE:
3601             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3602             break;
3603
3604             case OP_NOT_WORDCHAR:
3605             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3606             break;
3607
3608             case OP_WORDCHAR:
3609             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3610             break;
3611
3612             default:
3613             RRETURN(PCRE_ERROR_INTERNAL);
3614             }
3615           }
3616         }
3617       /* Control never gets here */
3618       }
3619
3620     /* If maximizing, it is worth using inline code for speed, doing the type
3621     test once at the start (i.e. keep it out of the loop). Again, keep the
3622     UTF-8 and UCP stuff separate. */
3623
3624     else
3625       {
3626       pp = eptr;  /* Remember where we started */
3627
3628 #ifdef SUPPORT_UCP
3629       if (prop_type >= 0)
3630         {
3631         switch(prop_type)
3632           {
3633           case PT_ANY:
3634           for (i = min; i < max; i++)
3635             {
3636             int len = 1;
3637             if (eptr >= md->end_subject) break;
3638             GETCHARLEN(c, eptr, len);
3639             if (prop_fail_result) break;
3640             eptr+= len;
3641             }
3642           break;
3643
3644           case PT_LAMP:
3645           for (i = min; i < max; i++)
3646             {
3647             int len = 1;
3648             if (eptr >= md->end_subject) break;
3649             GETCHARLEN(c, eptr, len);
3650             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3651             if ((prop_chartype == ucp_Lu ||
3652                  prop_chartype == ucp_Ll ||
3653                  prop_chartype == ucp_Lt) == prop_fail_result)
3654               break;
3655             eptr+= len;
3656             }
3657           break;
3658
3659           case PT_GC:
3660           for (i = min; i < max; i++)
3661             {
3662             int len = 1;
3663             if (eptr >= md->end_subject) break;
3664             GETCHARLEN(c, eptr, len);
3665             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3666             if ((prop_category == prop_value) == prop_fail_result)
3667               break;
3668             eptr+= len;
3669             }
3670           break;
3671
3672           case PT_PC:
3673           for (i = min; i < max; i++)
3674             {
3675             int len = 1;
3676             if (eptr >= md->end_subject) break;
3677             GETCHARLEN(c, eptr, len);
3678             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3679             if ((prop_chartype == prop_value) == prop_fail_result)
3680               break;
3681             eptr+= len;
3682             }
3683           break;
3684
3685           case PT_SC:
3686           for (i = min; i < max; i++)
3687             {
3688             int len = 1;
3689             if (eptr >= md->end_subject) break;
3690             GETCHARLEN(c, eptr, len);
3691             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3692             if ((prop_script == prop_value) == prop_fail_result)
3693               break;
3694             eptr+= len;
3695             }
3696           break;
3697           }
3698
3699         /* eptr is now past the end of the maximum run */
3700
3701         if (possessive) continue;
3702         for(;;)
3703           {
3704           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3705           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3706           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3707           BACKCHAR(eptr);
3708           }
3709         }
3710
3711       /* Match extended Unicode sequences. We will get here only if the
3712       support is in the binary; otherwise a compile-time error occurs. */
3713
3714       else if (ctype == OP_EXTUNI)
3715         {
3716         for (i = min; i < max; i++)
3717           {
3718           if (eptr >= md->end_subject) break;
3719           GETCHARINCTEST(c, eptr);
3720           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3721           if (prop_category == ucp_M) break;
3722           while (eptr < md->end_subject)
3723             {
3724             int len = 1;
3725             if (!utf8) c = *eptr; else
3726               {
3727               GETCHARLEN(c, eptr, len);
3728               }
3729             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3730             if (prop_category != ucp_M) break;
3731             eptr += len;
3732             }
3733           }
3734
3735         /* eptr is now past the end of the maximum run */
3736
3737         if (possessive) continue;
3738         for(;;)
3739           {
3740           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3741           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3742           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3743           for (;;)                        /* Move back over one extended */
3744             {
3745             int len = 1;
3746             BACKCHAR(eptr);
3747             if (!utf8) c = *eptr; else
3748               {
3749               GETCHARLEN(c, eptr, len);
3750               }
3751             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3752             if (prop_category != ucp_M) break;
3753             eptr--;
3754             }
3755           }
3756         }
3757
3758       else
3759 #endif   /* SUPPORT_UCP */
3760
3761 #ifdef SUPPORT_UTF8
3762       /* UTF-8 mode */
3763
3764       if (utf8)
3765         {
3766         switch(ctype)
3767           {
3768           case OP_ANY:
3769
3770           /* Special code is required for UTF8, but when the maximum is
3771           unlimited we don't need it, so we repeat the non-UTF8 code. This is
3772           probably worth it, because .* is quite a common idiom. */
3773
3774           if (max < INT_MAX)
3775             {
3776             if ((ims & PCRE_DOTALL) == 0)
3777               {
3778               for (i = min; i < max; i++)
3779                 {
3780                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3781                 eptr++;
3782                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3783                 }
3784               }
3785             else
3786               {
3787               for (i = min; i < max; i++)
3788                 {
3789                 if (eptr >= md->end_subject) break;
3790                 eptr++;
3791                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3792                 }
3793               }
3794             }
3795
3796           /* Handle unlimited UTF-8 repeat */
3797
3798           else
3799             {
3800             if ((ims & PCRE_DOTALL) == 0)
3801               {
3802               for (i = min; i < max; i++)
3803                 {
3804                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3805                 eptr++;
3806                 }
3807               break;
3808               }
3809             else
3810               {
3811               c = max - min;
3812               if (c > (unsigned int)(md->end_subject - eptr))
3813                 c = md->end_subject - eptr;
3814               eptr += c;
3815               }
3816             }
3817           break;
3818
3819           /* The byte case is the same as non-UTF8 */
3820
3821           case OP_ANYBYTE:
3822           c = max - min;
3823           if (c > (unsigned int)(md->end_subject - eptr))
3824             c = md->end_subject - eptr;
3825           eptr += c;
3826           break;
3827
3828           case OP_ANYNL:
3829           for (i = min; i < max; i++)
3830             {
3831             int len = 1;
3832             if (eptr >= md->end_subject) break;
3833             GETCHARLEN(c, eptr, len);
3834             if (c == 0x000d)
3835               {
3836               if (++eptr >= md->end_subject) break;
3837               if (*eptr == 0x000a) eptr++;
3838               }
3839             else
3840               {
3841               if (c != 0x000a && c != 0x000b && c != 0x000c &&
3842                   c != 0x0085 && c != 0x2028 && c != 0x2029)
3843                 break;
3844               eptr += len;
3845               }
3846             }
3847           break;
3848
3849           case OP_NOT_HSPACE:
3850           case OP_HSPACE:
3851           for (i = min; i < max; i++)
3852             {
3853             BOOL gotspace;
3854             int len = 1;
3855             if (eptr >= md->end_subject) break;
3856             GETCHARLEN(c, eptr, len);
3857             switch(c)
3858               {
3859               default: gotspace = FALSE; break;
3860               case 0x09:      /* HT */
3861               case 0x20:      /* SPACE */
3862               case 0xa0:      /* NBSP */
3863               case 0x1680:    /* OGHAM SPACE MARK */
3864               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3865               case 0x2000:    /* EN QUAD */
3866               case 0x2001:    /* EM QUAD */
3867               case 0x2002:    /* EN SPACE */
3868               case 0x2003:    /* EM SPACE */
3869               case 0x2004:    /* THREE-PER-EM SPACE */
3870               case 0x2005:    /* FOUR-PER-EM SPACE */
3871               case 0x2006:    /* SIX-PER-EM SPACE */
3872               case 0x2007:    /* FIGURE SPACE */
3873               case 0x2008:    /* PUNCTUATION SPACE */
3874               case 0x2009:    /* THIN SPACE */
3875               case 0x200A:    /* HAIR SPACE */
3876               case 0x202f:    /* NARROW NO-BREAK SPACE */
3877               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3878               case 0x3000:    /* IDEOGRAPHIC SPACE */
3879               gotspace = TRUE;
3880               break;
3881               }
3882             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3883             eptr += len;
3884             }
3885           break;
3886
3887           case OP_NOT_VSPACE:
3888           case OP_VSPACE:
3889           for (i = min; i < max; i++)
3890             {
3891             BOOL gotspace;
3892             int len = 1;
3893             if (eptr >= md->end_subject) break;
3894             GETCHARLEN(c, eptr, len);
3895             switch(c)
3896               {
3897               default: gotspace = FALSE; break;
3898               case 0x0a:      /* LF */
3899               case 0x0b:      /* VT */
3900               case 0x0c:      /* FF */
3901               case 0x0d:      /* CR */
3902               case 0x85:      /* NEL */
3903               case 0x2028:    /* LINE SEPARATOR */
3904               case 0x2029:    /* PARAGRAPH SEPARATOR */
3905               gotspace = TRUE;
3906               break;
3907               }
3908             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3909             eptr += len;
3910             }
3911           break;
3912
3913           case OP_NOT_DIGIT:
3914           for (i = min; i < max; i++)
3915             {
3916             int len = 1;
3917             if (eptr >= md->end_subject) break;
3918             GETCHARLEN(c, eptr, len);
3919             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3920             eptr+= len;
3921             }
3922           break;
3923
3924           case OP_DIGIT:
3925           for (i = min; i < max; i++)
3926             {
3927             int len = 1;
3928             if (eptr >= md->end_subject) break;
3929             GETCHARLEN(c, eptr, len);
3930             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3931             eptr+= len;
3932             }
3933           break;
3934
3935           case OP_NOT_WHITESPACE:
3936           for (i = min; i < max; i++)
3937             {
3938             int len = 1;
3939             if (eptr >= md->end_subject) break;
3940             GETCHARLEN(c, eptr, len);
3941             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3942             eptr+= len;
3943             }
3944           break;
3945
3946           case OP_WHITESPACE:
3947           for (i = min; i < max; i++)
3948             {
3949             int len = 1;
3950             if (eptr >= md->end_subject) break;
3951             GETCHARLEN(c, eptr, len);
3952             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3953             eptr+= len;
3954             }
3955           break;
3956
3957           case OP_NOT_WORDCHAR:
3958           for (i = min; i < max; i++)
3959             {
3960             int len = 1;
3961             if (eptr >= md->end_subject) break;
3962             GETCHARLEN(c, eptr, len);
3963             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3964             eptr+= len;
3965             }
3966           break;
3967
3968           case OP_WORDCHAR:
3969           for (i = min; i < max; i++)
3970             {
3971             int len = 1;
3972             if (eptr >= md->end_subject) break;
3973             GETCHARLEN(c, eptr, len);
3974             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3975             eptr+= len;
3976             }
3977           break;
3978
3979           default:
3980           RRETURN(PCRE_ERROR_INTERNAL);
3981           }
3982
3983         /* eptr is now past the end of the maximum run */
3984
3985         if (possessive) continue;
3986         for(;;)
3987           {
3988           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3989           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3990           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3991           BACKCHAR(eptr);
3992           }
3993         }
3994       else
3995 #endif
3996
3997       /* Not UTF-8 mode */
3998         {
3999         switch(ctype)
4000           {
4001           case OP_ANY:
4002           if ((ims & PCRE_DOTALL) == 0)
4003             {
4004             for (i = min; i < max; i++)
4005               {
4006               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4007               eptr++;
4008               }
4009             break;
4010             }
4011           /* For DOTALL case, fall through and treat as \C */
4012
4013           case OP_ANYBYTE:
4014           c = max - min;
4015           if (c > (unsigned int)(md->end_subject - eptr))
4016             c = md->end_subject - eptr;
4017           eptr += c;
4018           break;
4019
4020           case OP_ANYNL:
4021           for (i = min; i < max; i++)
4022             {
4023             if (eptr >= md->end_subject) break;
4024             c = *eptr;
4025             if (c == 0x000d)
4026               {
4027               if (++eptr >= md->end_subject) break;
4028               if (*eptr == 0x000a) eptr++;
4029               }
4030             else
4031               {
4032               if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4033                 break;
4034               eptr++;
4035               }
4036             }
4037           break;
4038
4039           case OP_NOT_HSPACE:
4040           for (i = min; i < max; i++)
4041             {
4042             if (eptr >= md->end_subject) break;
4043             c = *eptr;
4044             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4045             eptr++;
4046             }
4047           break;
4048
4049           case OP_HSPACE:
4050           for (i = min; i < max; i++)
4051             {
4052             if (eptr >= md->end_subject) break;
4053             c = *eptr;
4054             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4055             eptr++;
4056             }
4057           break;
4058
4059           case OP_NOT_VSPACE:
4060           for (i = min; i < max; i++)
4061             {
4062             if (eptr >= md->end_subject) break;
4063             c = *eptr;
4064             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4065               break;
4066             eptr++;
4067             }
4068           break;
4069
4070           case OP_VSPACE:
4071           for (i = min; i < max; i++)
4072             {
4073             if (eptr >= md->end_subject) break;
4074             c = *eptr;
4075             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4076               break;
4077             eptr++;
4078             }
4079           break;
4080
4081           case OP_NOT_DIGIT:
4082           for (i = min; i < max; i++)
4083             {
4084             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4085               break;
4086             eptr++;
4087             }
4088           break;
4089
4090           case OP_DIGIT:
4091           for (i = min; i < max; i++)
4092             {
4093             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4094               break;
4095             eptr++;
4096             }
4097           break;
4098
4099           case OP_NOT_WHITESPACE:
4100           for (i = min; i < max; i++)
4101             {
4102             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4103               break;
4104             eptr++;
4105             }
4106           break;
4107
4108           case OP_WHITESPACE:
4109           for (i = min; i < max; i++)
4110             {
4111             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4112               break;
4113             eptr++;
4114             }
4115           break;
4116
4117           case OP_NOT_WORDCHAR:
4118           for (i = min; i < max; i++)
4119             {
4120             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4121               break;
4122             eptr++;
4123             }
4124           break;
4125
4126           case OP_WORDCHAR:
4127           for (i = min; i < max; i++)
4128             {
4129             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4130               break;
4131             eptr++;
4132             }
4133           break;
4134
4135           default:
4136           RRETURN(PCRE_ERROR_INTERNAL);
4137           }
4138
4139         /* eptr is now past the end of the maximum run */
4140
4141         if (possessive) continue;
4142         while (eptr >= pp)
4143           {
4144           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4145           eptr--;
4146           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4147           }
4148         }
4149
4150       /* Get here if we can't make it match with any permitted repetitions */
4151
4152       RRETURN(MATCH_NOMATCH);
4153       }
4154     /* Control never gets here */
4155
4156     /* There's been some horrible disaster. Arrival here can only mean there is
4157     something seriously wrong in the code above or the OP_xxx definitions. */
4158
4159     default:
4160     DPRINTF(("Unknown opcode %d\n", *ecode));
4161     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4162     }
4163
4164   /* Do not stick any code in here without much thought; it is assumed
4165   that "continue" in the code above comes out to here to repeat the main
4166   loop. */
4167
4168   }             /* End of main loop */
4169 /* Control never reaches here */
4170
4171
4172 /* When compiling to use the heap rather than the stack for recursive calls to
4173 match(), the RRETURN() macro jumps here. The number that is saved in
4174 frame->Xwhere indicates which label we actually want to return to. */
4175
4176 #ifdef NO_RECURSE
4177 #define LBL(val) case val: goto L_RM##val;
4178 HEAP_RETURN:
4179 switch (frame->Xwhere)
4180   {
4181   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4182   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4183   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4184   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4185   LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4186   LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4187   default:
4188   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4189   return PCRE_ERROR_INTERNAL;
4190   }
4191 #undef LBL
4192 #endif  /* NO_RECURSE */
4193 }
4194
4195
4196 /***************************************************************************
4197 ****************************************************************************
4198                    RECURSION IN THE match() FUNCTION
4199
4200 Undefine all the macros that were defined above to handle this. */
4201
4202 #ifdef NO_RECURSE
4203 #undef eptr
4204 #undef ecode
4205 #undef mstart
4206 #undef offset_top
4207 #undef ims
4208 #undef eptrb
4209 #undef flags
4210
4211 #undef callpat
4212 #undef charptr
4213 #undef data
4214 #undef next
4215 #undef pp
4216 #undef prev
4217 #undef saved_eptr
4218
4219 #undef new_recursive
4220
4221 #undef cur_is_word
4222 #undef condition
4223 #undef prev_is_word
4224
4225 #undef original_ims
4226
4227 #undef ctype
4228 #undef length
4229 #undef max
4230 #undef min
4231 #undef number
4232 #undef offset
4233 #undef op
4234 #undef save_capture_last
4235 #undef save_offset1
4236 #undef save_offset2
4237 #undef save_offset3
4238 #undef stacksave
4239
4240 #undef newptrb
4241
4242 #endif
4243
4244 /* These two are defined as macros in both cases */
4245
4246 #undef fc
4247 #undef fi
4248
4249 /***************************************************************************
4250 ***************************************************************************/
4251
4252
4253
4254 /*************************************************
4255 *         Execute a Regular Expression           *
4256 *************************************************/
4257
4258 /* This function applies a compiled re to a subject string and picks out
4259 portions of the string if it matches. Two elements in the vector are set for
4260 each substring: the offsets to the start and end of the substring.
4261
4262 Arguments:
4263   argument_re     points to the compiled expression
4264   extra_data      points to extra data or is NULL
4265   subject         points to the subject string
4266   length          length of subject string (may contain binary zeros)
4267   start_offset    where to start in the subject string
4268   options         option bits
4269   offsets         points to a vector of ints to be filled in with offsets
4270   offsetcount     the number of elements in the vector
4271
4272 Returns:          > 0 => success; value is the number of elements filled in
4273                   = 0 => success, but offsets is not big enough
4274                    -1 => failed to match
4275                  < -1 => some kind of unexpected problem
4276 */
4277
4278 PCRE_EXP_DEFN int
4279 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4280   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4281   int offsetcount)
4282 {
4283 int rc, resetcount, ocount;
4284 int first_byte = -1;
4285 int req_byte = -1;
4286 int req_byte2 = -1;
4287 int newline;
4288 unsigned long int ims;
4289 BOOL using_temporary_offsets = FALSE;
4290 BOOL anchored;
4291 BOOL startline;
4292 BOOL firstline;
4293 BOOL first_byte_caseless = FALSE;
4294 BOOL req_byte_caseless = FALSE;
4295 BOOL utf8;
4296 match_data match_block;
4297 match_data *md = &match_block;
4298 const uschar *tables;
4299 const uschar *start_bits = NULL;
4300 USPTR start_match = (USPTR)subject + start_offset;
4301 USPTR end_subject;
4302 USPTR req_byte_ptr = start_match - 1;
4303 eptrblock eptrchain[EPTR_WORK_SIZE];
4304
4305 pcre_study_data internal_study;
4306 const pcre_study_data *study;
4307
4308 real_pcre internal_re;
4309 const real_pcre *external_re = (const real_pcre *)argument_re;
4310 const real_pcre *re = external_re;
4311
4312 /* Plausibility checks */
4313
4314 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4315 if (re == NULL || subject == NULL ||
4316    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4317 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4318
4319 /* Fish out the optional data from the extra_data structure, first setting
4320 the default values. */
4321
4322 study = NULL;
4323 md->match_limit = MATCH_LIMIT;
4324 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4325 md->callout_data = NULL;
4326
4327 /* The table pointer is always in native byte order. */
4328
4329 tables = external_re->tables;
4330
4331 if (extra_data != NULL)
4332   {
4333   register unsigned int flags = extra_data->flags;
4334   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4335     study = (const pcre_study_data *)extra_data->study_data;
4336   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4337     md->match_limit = extra_data->match_limit;
4338   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4339     md->match_limit_recursion = extra_data->match_limit_recursion;
4340   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4341     md->callout_data = extra_data->callout_data;
4342   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4343   }
4344
4345 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4346 is a feature that makes it possible to save compiled regex and re-use them
4347 in other programs later. */
4348
4349 if (tables == NULL) tables = _pcre_default_tables;
4350
4351 /* Check that the first field in the block is the magic number. If it is not,
4352 test for a regex that was compiled on a host of opposite endianness. If this is
4353 the case, flipped values are put in internal_re and internal_study if there was
4354 study data too. */
4355
4356 if (re->magic_number != MAGIC_NUMBER)
4357   {
4358   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4359   if (re == NULL) return PCRE_ERROR_BADMAGIC;
4360   if (study != NULL) study = &internal_study;
4361   }
4362
4363 /* Set up other data */
4364
4365 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4366 startline = (re->options & PCRE_STARTLINE) != 0;
4367 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4368
4369 /* The code starts after the real_pcre block and the capture name table. */
4370
4371 md->start_code = (const uschar *)external_re + re->name_table_offset +
4372   re->name_count * re->name_entry_size;
4373
4374 md->start_subject = (USPTR)subject;
4375 md->start_offset = start_offset;
4376 md->end_subject = md->start_subject + length;
4377 end_subject = md->end_subject;
4378
4379 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4380 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4381
4382 md->notbol = (options & PCRE_NOTBOL) != 0;
4383 md->noteol = (options & PCRE_NOTEOL) != 0;
4384 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4385 md->partial = (options & PCRE_PARTIAL) != 0;
4386 md->hitend = FALSE;
4387
4388 md->recursive = NULL;                   /* No recursion at top level */
4389 md->eptrchain = eptrchain;              /* Make workspace generally available */
4390
4391 md->lcc = tables + lcc_offset;
4392 md->ctypes = tables + ctypes_offset;
4393
4394 /* Handle different types of newline. The three bits give eight cases. If
4395 nothing is set at run time, whatever was used at compile time applies. */
4396
4397 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4398        PCRE_NEWLINE_BITS)
4399   {
4400   case 0: newline = NEWLINE; break;   /* Compile-time default */
4401   case PCRE_NEWLINE_CR: newline = '\r'; break;
4402   case PCRE_NEWLINE_LF: newline = '\n'; break;
4403   case PCRE_NEWLINE_CR+
4404        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4405   case PCRE_NEWLINE_ANY: newline = -1; break;
4406   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4407   default: return PCRE_ERROR_BADNEWLINE;
4408   }
4409
4410 if (newline == -2)
4411   {
4412   md->nltype = NLTYPE_ANYCRLF;
4413   }
4414 else if (newline < 0)
4415   {
4416   md->nltype = NLTYPE_ANY;
4417   }
4418 else
4419   {
4420   md->nltype = NLTYPE_FIXED;
4421   if (newline > 255)
4422     {
4423     md->nllen = 2;
4424     md->nl[0] = (newline >> 8) & 255;
4425     md->nl[1] = newline & 255;
4426     }
4427   else
4428     {
4429     md->nllen = 1;
4430     md->nl[0] = newline;
4431     }
4432   }
4433
4434 /* Partial matching is supported only for a restricted set of regexes at the
4435 moment. */
4436
4437 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4438   return PCRE_ERROR_BADPARTIAL;
4439
4440 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4441 back the character offset. */
4442
4443 #ifdef SUPPORT_UTF8
4444 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4445   {
4446   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4447     return PCRE_ERROR_BADUTF8;
4448   if (start_offset > 0 && start_offset < length)
4449     {
4450     int tb = ((uschar *)subject)[start_offset];
4451     if (tb > 127)
4452       {
4453       tb &= 0xc0;
4454       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4455       }
4456     }
4457   }
4458 #endif
4459
4460 /* The ims options can vary during the matching as a result of the presence
4461 of (?ims) items in the pattern. They are kept in a local variable so that
4462 restoring at the exit of a group is easy. */
4463
4464 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4465
4466 /* If the expression has got more back references than the offsets supplied can
4467 hold, we get a temporary chunk of working store to use during the matching.
4468 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4469 of 3. */
4470
4471 ocount = offsetcount - (offsetcount % 3);
4472
4473 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4474   {
4475   ocount = re->top_backref * 3 + 3;
4476   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4477   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4478   using_temporary_offsets = TRUE;
4479   DPRINTF(("Got memory to hold back references\n"));
4480   }
4481 else md->offset_vector = offsets;
4482
4483 md->offset_end = ocount;
4484 md->offset_max = (2*ocount)/3;
4485 md->offset_overflow = FALSE;
4486 md->capture_last = -1;
4487
4488 /* Compute the minimum number of offsets that we need to reset each time. Doing
4489 this makes a huge difference to execution time when there aren't many brackets
4490 in the pattern. */
4491
4492 resetcount = 2 + re->top_bracket * 2;
4493 if (resetcount > offsetcount) resetcount = ocount;
4494
4495 /* Reset the working variable associated with each extraction. These should
4496 never be used unless previously set, but they get saved and restored, and so we
4497 initialize them to avoid reading uninitialized locations. */
4498
4499 if (md->offset_vector != NULL)
4500   {
4501   register int *iptr = md->offset_vector + ocount;
4502   register int *iend = iptr - resetcount/2 + 1;
4503   while (--iptr >= iend) *iptr = -1;
4504   }
4505
4506 /* Set up the first character to match, if available. The first_byte value is
4507 never set for an anchored regular expression, but the anchoring may be forced
4508 at run time, so we have to test for anchoring. The first char may be unset for
4509 an unanchored pattern, of course. If there's no first char and the pattern was
4510 studied, there may be a bitmap of possible first characters. */
4511
4512 if (!anchored)
4513   {
4514   if ((re->options & PCRE_FIRSTSET) != 0)
4515     {
4516     first_byte = re->first_byte & 255;
4517     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4518       first_byte = md->lcc[first_byte];
4519     }
4520   else
4521     if (!startline && study != NULL &&
4522       (study->options & PCRE_STUDY_MAPPED) != 0)
4523         start_bits = study->start_bits;
4524   }
4525
4526 /* For anchored or unanchored matches, there may be a "last known required
4527 character" set. */
4528
4529 if ((re->options & PCRE_REQCHSET) != 0)
4530   {
4531   req_byte = re->req_byte & 255;
4532   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4533   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4534   }
4535
4536
4537 /* ==========================================================================*/
4538
4539 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4540 the loop runs just once. */
4541
4542 for(;;)
4543   {
4544   USPTR save_end_subject = end_subject;
4545
4546   /* Reset the maximum number of extractions we might see. */
4547
4548   if (md->offset_vector != NULL)
4549     {
4550     register int *iptr = md->offset_vector;
4551     register int *iend = iptr + resetcount;
4552     while (iptr < iend) *iptr++ = -1;
4553     }
4554
4555   /* Advance to a unique first char if possible. If firstline is TRUE, the
4556   start of the match is constrained to the first line of a multiline string.
4557   That is, the match must be before or at the first newline. Implement this by
4558   temporarily adjusting end_subject so that we stop scanning at a newline. If
4559   the match fails at the newline, later code breaks this loop. */
4560
4561   if (firstline)
4562     {
4563     USPTR t = start_match;
4564     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4565     end_subject = t;
4566     }
4567
4568   /* Now test for a unique first byte */
4569
4570   if (first_byte >= 0)
4571     {
4572     if (first_byte_caseless)
4573       while (start_match < end_subject &&
4574              md->lcc[*start_match] != first_byte)
4575         start_match++;
4576     else
4577       while (start_match < end_subject && *start_match != first_byte)
4578         start_match++;
4579     }
4580
4581   /* Or to just after a linebreak for a multiline match if possible */
4582
4583   else if (startline)
4584     {
4585     if (start_match > md->start_subject + start_offset)
4586       {
4587       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4588         start_match++;
4589
4590       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4591       and we are now at a LF, advance the match position by one more character.
4592       */
4593
4594       if (start_match[-1] == '\r' &&
4595            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4596            start_match < end_subject &&
4597            *start_match == '\n')
4598         start_match++;
4599       }
4600     }
4601
4602   /* Or to a non-unique first char after study */
4603
4604   else if (start_bits != NULL)
4605     {
4606     while (start_match < end_subject)
4607       {
4608       register unsigned int c = *start_match;
4609       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4610       }
4611     }
4612
4613   /* Restore fudged end_subject */
4614
4615   end_subject = save_end_subject;
4616
4617 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4618   printf(">>>> Match against: ");
4619   pchars(start_match, end_subject - start_match, TRUE, md);
4620   printf("\n");
4621 #endif
4622
4623   /* If req_byte is set, we know that that character must appear in the subject
4624   for the match to succeed. If the first character is set, req_byte must be
4625   later in the subject; otherwise the test starts at the match point. This
4626   optimization can save a huge amount of backtracking in patterns with nested
4627   unlimited repeats that aren't going to match. Writing separate code for
4628   cased/caseless versions makes it go faster, as does using an autoincrement
4629   and backing off on a match.
4630
4631   HOWEVER: when the subject string is very, very long, searching to its end can
4632   take a long time, and give bad performance on quite ordinary patterns. This
4633   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4634   string... so we don't do this when the string is sufficiently long.
4635
4636   ALSO: this processing is disabled when partial matching is requested.
4637   */
4638
4639   if (req_byte >= 0 &&
4640       end_subject - start_match < REQ_BYTE_MAX &&
4641       !md->partial)
4642     {
4643     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4644
4645     /* We don't need to repeat the search if we haven't yet reached the
4646     place we found it at last time. */
4647
4648     if (p > req_byte_ptr)
4649       {
4650       if (req_byte_caseless)
4651         {
4652         while (p < end_subject)
4653           {
4654           register int pp = *p++;
4655           if (pp == req_byte || pp == req_byte2) { p--; break; }
4656           }
4657         }
4658       else
4659         {
4660         while (p < end_subject)
4661           {
4662           if (*p++ == req_byte) { p--; break; }
4663           }
4664         }
4665
4666       /* If we can't find the required character, break the matching loop,
4667       forcing a match failure. */
4668
4669       if (p >= end_subject)
4670         {
4671         rc = MATCH_NOMATCH;
4672         break;
4673         }
4674
4675       /* If we have found the required character, save the point where we
4676       found it, so that we don't search again next time round the loop if
4677       the start hasn't passed this character yet. */
4678
4679       req_byte_ptr = p;
4680       }
4681     }
4682
4683   /* OK, we can now run the match. */
4684
4685   md->start_match_ptr = start_match;      /* Insurance */
4686   md->match_call_count = 0;
4687   md->eptrn = 0;                          /* Next free eptrchain slot */
4688   rc = match(start_match, md->start_code, start_match, 2, md,
4689     ims, NULL, 0, 0);
4690
4691   /* Any return other than MATCH_NOMATCH breaks the loop. */
4692
4693   if (rc != MATCH_NOMATCH) break;
4694
4695   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4696   newline in the subject (though it may continue over the newline). Therefore,
4697   if we have just failed to match, starting at a newline, do not continue. */
4698
4699   if (firstline && IS_NEWLINE(start_match)) break;
4700
4701   /* Advance the match position by one character. */
4702
4703   start_match++;
4704 #ifdef SUPPORT_UTF8
4705   if (utf8)
4706     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4707       start_match++;
4708 #endif
4709
4710   /* Break the loop if the pattern is anchored or if we have passed the end of
4711   the subject. */
4712
4713   if (anchored || start_match > end_subject) break;
4714
4715   /* If we have just passed a CR and the newline option is CRLF or ANY or
4716   ANYCRLF, and we are now at a LF, advance the match position by one more
4717   character. */
4718
4719   if (start_match[-1] == '\r' &&
4720        (md->nltype == NLTYPE_ANY ||
4721         md->nltype == NLTYPE_ANYCRLF ||
4722         md->nllen == 2) &&
4723        start_match < end_subject &&
4724        *start_match == '\n')
4725     start_match++;
4726
4727   }   /* End of for(;;) "bumpalong" loop */
4728
4729 /* ==========================================================================*/
4730
4731 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4732 conditions is true:
4733
4734 (1) The pattern is anchored;
4735
4736 (2) We are past the end of the subject;
4737
4738 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4739     this option requests that a match occur at or before the first newline in
4740     the subject.
4741
4742 When we have a match and the offset vector is big enough to deal with any
4743 backreferences, captured substring offsets will already be set up. In the case
4744 where we had to get some local store to hold offsets for backreference
4745 processing, copy those that we can. In this case there need not be overflow if
4746 certain parts of the pattern were not used, even though there are more
4747 capturing parentheses than vector slots. */
4748
4749 if (rc == MATCH_MATCH)
4750   {
4751   if (using_temporary_offsets)
4752     {
4753     if (offsetcount >= 4)
4754       {
4755       memcpy(offsets + 2, md->offset_vector + 2,
4756         (offsetcount - 2) * sizeof(int));
4757       DPRINTF(("Copied offsets from temporary memory\n"));
4758       }
4759     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4760     DPRINTF(("Freeing temporary memory\n"));
4761     (pcre_free)(md->offset_vector);
4762     }
4763
4764   /* Set the return code to the number of captured strings, or 0 if there are
4765   too many to fit into the vector. */
4766
4767   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4768
4769   /* If there is space, set up the whole thing as substring 0. The value of
4770   md->start_match_ptr might be modified if \K was encountered on the success
4771   matching path. */
4772
4773   if (offsetcount < 2) rc = 0; else
4774     {
4775     offsets[0] = md->start_match_ptr - md->start_subject;
4776     offsets[1] = md->end_match_ptr - md->start_subject;
4777     }
4778
4779   DPRINTF((">>>> returning %d\n", rc));
4780   return rc;
4781   }
4782
4783 /* Control gets here if there has been an error, or if the overall match
4784 attempt has failed at all permitted starting positions. */
4785
4786 if (using_temporary_offsets)
4787   {
4788   DPRINTF(("Freeing temporary memory\n"));
4789   (pcre_free)(md->offset_vector);
4790   }
4791
4792 if (rc != MATCH_NOMATCH)
4793   {
4794   DPRINTF((">>>> error: returning %d\n", rc));
4795   return rc;
4796   }
4797 else if (md->partial && md->hitend)
4798   {
4799   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4800   return PCRE_ERROR_PARTIAL;
4801   }
4802 else
4803   {
4804   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4805   return PCRE_ERROR_NOMATCH;
4806   }
4807 }
4808
4809 /* End of pcre_exec.c */