1 /* $Cambridge: exim/src/src/pcre/get.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
8 This is a library of functions to support regular expressions whose syntax
9 and semantics are as close as possible to those of the Perl 5 language. See
10 the file Tech.Notes for some information on the internals.
12 Written by: Philip Hazel <ph10@cam.ac.uk>
14 Copyright (c) 1997-2003 University of Cambridge
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
45 /* This module contains some convenience functions for extracting substrings
46 from the subject string after a regex match has succeeded. The original idea
47 for these functions came from Scott Wimer. */
50 /* Include the internals header, which itself includes Standard C headers plus
51 the external pcre header. */
56 /*************************************************
57 * Find number for named string *
58 *************************************************/
60 /* This function is used by the two extraction functions below, as well
61 as being generally available.
64 code the compiled regex
65 stringname the name whose number is required
67 Returns: the number of the named parentheses, or a negative number
68 (PCRE_ERROR_NOSUBSTRING) if not found
72 pcre_get_stringnumber(const pcre *code, const char *stringname)
79 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
81 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
83 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
85 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
91 int mid = (top + bot) / 2;
92 uschar *entry = nametable + entrysize*mid;
93 int c = strcmp(stringname, (char *)(entry + 2));
94 if (c == 0) return (entry[0] << 8) + entry[1];
95 if (c > 0) bot = mid + 1; else top = mid;
98 return PCRE_ERROR_NOSUBSTRING;
103 /*************************************************
104 * Copy captured string to given buffer *
105 *************************************************/
107 /* This function copies a single captured substring into a given buffer.
108 Note that we use memcpy() rather than strncpy() in case there are binary zeros
112 subject the subject string that was matched
113 ovector pointer to the offsets table
114 stringcount the number of substrings that were captured
115 (i.e. the yield of the pcre_exec call, unless
116 that was zero, in which case it should be 1/3
117 of the offset table size)
118 stringnumber the number of the required substring
119 buffer where to put the substring
120 size the size of the buffer
122 Returns: if successful:
123 the length of the copied string, not including the zero
124 that is put on the end; can be zero
126 PCRE_ERROR_NOMEMORY (-6) buffer too small
127 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
131 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
132 int stringnumber, char *buffer, int size)
135 if (stringnumber < 0 || stringnumber >= stringcount)
136 return PCRE_ERROR_NOSUBSTRING;
138 yield = ovector[stringnumber+1] - ovector[stringnumber];
139 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
140 memcpy(buffer, subject + ovector[stringnumber], yield);
147 /*************************************************
148 * Copy named captured string to given buffer *
149 *************************************************/
151 /* This function copies a single captured substring into a given buffer,
152 identifying it by name.
155 code the compiled regex
156 subject the subject string that was matched
157 ovector pointer to the offsets table
158 stringcount the number of substrings that were captured
159 (i.e. the yield of the pcre_exec call, unless
160 that was zero, in which case it should be 1/3
161 of the offset table size)
162 stringname the name of the required substring
163 buffer where to put the substring
164 size the size of the buffer
166 Returns: if successful:
167 the length of the copied string, not including the zero
168 that is put on the end; can be zero
170 PCRE_ERROR_NOMEMORY (-6) buffer too small
171 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
175 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
176 int stringcount, const char *stringname, char *buffer, int size)
178 int n = pcre_get_stringnumber(code, stringname);
179 if (n <= 0) return n;
180 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
185 /*************************************************
186 * Copy all captured strings to new store *
187 *************************************************/
189 /* This function gets one chunk of store and builds a list of pointers and all
190 of the captured substrings in it. A NULL pointer is put on the end of the list.
193 subject the subject string that was matched
194 ovector pointer to the offsets table
195 stringcount the number of substrings that were captured
196 (i.e. the yield of the pcre_exec call, unless
197 that was zero, in which case it should be 1/3
198 of the offset table size)
199 listptr set to point to the list of pointers
201 Returns: if successful: 0
203 PCRE_ERROR_NOMEMORY (-6) failed to get store
207 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
208 const char ***listptr)
211 int size = sizeof(char *);
212 int double_count = stringcount * 2;
216 for (i = 0; i < double_count; i += 2)
217 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
219 stringlist = (char **)(pcre_malloc)(size);
220 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
222 *listptr = (const char **)stringlist;
223 p = (char *)(stringlist + stringcount + 1);
225 for (i = 0; i < double_count; i += 2)
227 int len = ovector[i+1] - ovector[i];
228 memcpy(p, subject + ovector[i], len);
240 /*************************************************
241 * Free store obtained by get_substring_list *
242 *************************************************/
244 /* This function exists for the benefit of people calling PCRE from non-C
245 programs that can call its functions, but not free() or (pcre_free)() directly.
247 Argument: the result of a previous pcre_get_substring_list()
252 pcre_free_substring_list(const char **pointer)
254 (pcre_free)((void *)pointer);
259 /*************************************************
260 * Copy captured string to new store *
261 *************************************************/
263 /* This function copies a single captured substring into a piece of new
267 subject the subject string that was matched
268 ovector pointer to the offsets table
269 stringcount the number of substrings that were captured
270 (i.e. the yield of the pcre_exec call, unless
271 that was zero, in which case it should be 1/3
272 of the offset table size)
273 stringnumber the number of the required substring
274 stringptr where to put a pointer to the substring
276 Returns: if successful:
277 the length of the string, not including the zero that
278 is put on the end; can be zero
280 PCRE_ERROR_NOMEMORY (-6) failed to get store
281 PCRE_ERROR_NOSUBSTRING (-7) substring not present
285 pcre_get_substring(const char *subject, int *ovector, int stringcount,
286 int stringnumber, const char **stringptr)
290 if (stringnumber < 0 || stringnumber >= stringcount)
291 return PCRE_ERROR_NOSUBSTRING;
293 yield = ovector[stringnumber+1] - ovector[stringnumber];
294 substring = (char *)(pcre_malloc)(yield + 1);
295 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
296 memcpy(substring, subject + ovector[stringnumber], yield);
297 substring[yield] = 0;
298 *stringptr = substring;
304 /*************************************************
305 * Copy named captured string to new store *
306 *************************************************/
308 /* This function copies a single captured substring, identified by name, into
312 code the compiled regex
313 subject the subject string that was matched
314 ovector pointer to the offsets table
315 stringcount the number of substrings that were captured
316 (i.e. the yield of the pcre_exec call, unless
317 that was zero, in which case it should be 1/3
318 of the offset table size)
319 stringname the name of the required substring
320 stringptr where to put the pointer
322 Returns: if successful:
323 the length of the copied string, not including the zero
324 that is put on the end; can be zero
326 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
327 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
331 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
332 int stringcount, const char *stringname, const char **stringptr)
334 int n = pcre_get_stringnumber(code, stringname);
335 if (n <= 0) return n;
336 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
342 /*************************************************
343 * Free store obtained by get_substring *
344 *************************************************/
346 /* This function exists for the benefit of people calling PCRE from non-C
347 programs that can call its functions, but not free() or (pcre_free)() directly.
349 Argument: the result of a previous pcre_get_substring()
354 pcre_free_substring(const char *pointer)
356 (pcre_free)((void *)pointer);