1 /*************************************************
2 * Exim - an Internet mail transport agent *
3 *************************************************/
6 * Copyright (c) The Exim Maintainers 2022
10 /* Caching layers for compiled REs. There is a local layer in the process,
11 implemented as a tree for inserts and lookup. This cache is inherited from
12 the daemon, for the process tree deriving from there - but not by re-exec'd
13 proceses or commandline submission processes.
15 If the process has to compile, and is not the daemon or a re-exec'd exim,
16 it notifies the use of the RE to the daemon via a unix-domain socket.
17 This is a fire-and-forget send with no response, hence cheap from the point-of
18 view of the sender. I have not measured the overall comms costs. The
19 daemon also compiles the RE, and caches the result.
21 A second layer would be possible by asking the daemon via the notifier socket
22 (for a result from its cache, or a compile if it must). The comms overhead
23 is significant, not only for the channel but also for de/serialisation of
24 the compiled object. This makes it untenable for the primary use-case, the
25 transport process which has been re-exec'd to gain privs - and therefore does not
26 have the daemon-maintained cache. Using shared-memory might reduce that cost
27 (the attach time for the memory segment will matter); the implimentation
28 would require suitable R/W locks.
33 typedef struct re_req {
34 uschar notifier_reqtype;
36 uschar re[1]; /* extensible */
39 static tree_node * regex_cache = NULL;
40 static tree_node * regex_caseless_cache = NULL;
42 /******************************************************************************/
45 regex_to_daemon(const uschar * key, BOOL caseless)
47 int klen = Ustrlen(key) + 1;
48 int rlen = sizeof(re_req) + klen;
50 int fd, old_pool = store_pool;
52 DEBUG(D_expand|D_lists)
53 debug_printf_indent("sending RE '%s' to daemon\n", key);
55 store_pool = POOL_MAIN;
56 req = store_get(rlen, key); /* maybe need a size limit */
57 store_pool = old_pool;;
58 req->notifier_reqtype = NOTIFY_REGEX;
59 req->caseless = caseless;
60 memcpy(req->re, key, klen);
62 if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) >= 0)
64 struct sockaddr_un sa_un = {.sun_family = AF_UNIX};
65 ssize_t len = daemon_notifier_sockname(&sa_un);
67 if (sendto(fd, req, rlen, 0, (struct sockaddr *)&sa_un, (socklen_t)len) < 0)
69 debug_printf("%s: sendto %s\n", __FUNCTION__, strerror(errno));
72 else DEBUG(D_queue_run) debug_printf(" socket: %s\n", strerror(errno));
76 static const pcre2_code *
77 regex_from_cache(const uschar * key, BOOL caseless)
80 tree_search(caseless ? regex_caseless_cache : regex_cache, key);
81 DEBUG(D_expand|D_lists)
82 debug_printf_indent("compiled %sRE '%s' %sfound in local cache\n",
83 caseless ? "caseless " : "", key, node ? "" : "not ");
85 return node ? node->data.ptr : NULL;
90 regex_to_cache(const uschar * key, BOOL caseless, const pcre2_code * cre)
96 node = store_get(sizeof(tree_node) + Ustrlen(key) + 1, key); /* we are called with STORE_PERM */
97 Ustrcpy(node->name, key);
98 node->data.ptr = (void *)cre;
100 if (!tree_insertnode(caseless ? ®ex_caseless_cache : ®ex_cache, node))
101 { DEBUG(D_expand|D_lists) debug_printf_indent("duplicate key!\n"); }
102 else DEBUG(D_expand|D_lists)
103 debug_printf_indent("compiled RE '%s' saved in local cache\n", key);
105 /* Additionally, if not re-execed and not the daemon, tell the daemon of the RE
106 so it can add to the cache */
108 if (f.daemon_scion && !f.daemon_listen)
109 regex_to_daemon(key, caseless);
114 /******************************************************************************/
116 /*************************************************
117 * Compile regular expression and panic on fail *
118 *************************************************/
120 /* This function is called when failure to compile a regular expression leads
121 to a panic exit. In other cases, pcre_compile() is called directly. In many
122 cases where this function is used, the results of the compilation are to be
123 placed in long-lived store, so we temporarily reset the store management
124 functions that PCRE uses if the use_malloc flag is set.
127 pattern the pattern to compile
129 caseless caseless matching is required
130 cacheable use (writeback) cache
131 use_malloc TRUE if compile into malloc store
133 Returns: pointer to the compiled pattern
137 regex_must_compile(const uschar * pattern, mcs_flags flags, BOOL use_malloc)
139 BOOL caseless = !!(flags & MCS_CASELESS);
141 const pcre2_code * yield;
142 int old_pool = store_pool, err;
144 /* Optionall, check the cache and return if found */
146 if ( flags & MCS_CACHEABLE
147 && (yield = regex_from_cache(pattern, caseless)))
150 store_pool = POOL_PERM;
152 if (!(yield = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
153 caseless ? PCRE_COPT|PCRE2_CASELESS : PCRE_COPT,
154 &err, &offset, use_malloc ? pcre_mlc_cmp_ctx : pcre_gen_cmp_ctx)))
157 pcre2_get_error_message(err, errbuf, sizeof(errbuf));
158 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "regular expression error: "
159 "%s at offset %ld while compiling %s", errbuf, (long)offset, pattern);
164 /*pcre2_general_context_free(gctx);*/
167 if (flags & MCS_CACHEABLE)
168 regex_to_cache(pattern, caseless, yield);
170 store_pool = old_pool;
177 /* Wrapper for pcre2_compile() and error-message handling.
179 Arguments: pattern regex to compile
181 caseless flag for match variant
182 cacheable use (writeback) cache
183 errstr on error, filled in with error message
184 cctx compile-context for pcre2
186 Return: NULL on error, with errstr set. Otherwise, the compiled RE object
190 regex_compile(const uschar * pattern, mcs_flags flags, uschar ** errstr,
191 pcre2_compile_context * cctx)
193 const uschar * key = pattern;
194 BOOL caseless = !!(flags & MCS_CASELESS);
197 const pcre2_code * yield;
198 int old_pool = store_pool;
200 /* Optionally, check the cache and return if found */
202 if ( flags & MCS_CACHEABLE
203 && (yield = regex_from_cache(key, caseless)))
206 DEBUG(D_expand|D_lists) debug_printf_indent("compiling %sRE '%s'\n",
207 caseless ? "caseless " : "", pattern);
209 store_pool = POOL_PERM;
210 if (!(yield = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
211 caseless ? PCRE_COPT|PCRE2_CASELESS : PCRE_COPT,
212 &err, &offset, cctx)))
215 pcre2_get_error_message(err, errbuf, sizeof(errbuf));
216 store_pool = old_pool;
217 *errstr = string_sprintf("regular expression error in "
218 "\"%s\": %s at offset %ld", pattern, errbuf, (long)offset);
220 else if (flags & MCS_CACHEABLE)
221 regex_to_cache(key, caseless, yield);
222 store_pool = old_pool;
229 /* Handle a regex notify arriving at the daemon. We get sent the original RE;
230 compile it (again) and write to the cache. Later forked procs will be able to
231 read from the cache, unless they re-execed. Therefore, those latter never bother
232 sending us a notification. */
235 regex_at_daemon(const uschar * reqbuf)
237 const re_req * req = (const re_req *)reqbuf;
239 const pcre2_code * cre = regex_compile(req->re,
240 req->caseless ? MCS_CASELESS | MCS_CACHEABLE : MCS_CACHEABLE,
241 &errstr, pcre_gen_cmp_ctx);
243 DEBUG(D_any) if (!cre) debug_printf("%s\n", errstr);