Commit | Line | Data |
---|---|---|
7a6dbc2f | 1 | /* Copyright (C) 1991-1993, 1996-2006, 2009-2018 Free Software Foundation, Inc. |
8690e634 JK |
2 | This file is part of the GNU C Library. |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 3, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
7a6dbc2f | 15 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
8690e634 JK |
16 | |
17 | /* Match STRING against the file name pattern PATTERN, returning zero if | |
18 | it matches, nonzero if not. */ | |
19 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, | |
20 | const CHAR *string_end, bool no_leading_period, int flags) | |
21 | internal_function; | |
22 | static const CHAR *END (const CHAR *patternp) internal_function; | |
23 | ||
24 | static int | |
25 | internal_function | |
26 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
27 | bool no_leading_period, int flags) | |
28 | { | |
29 | register const CHAR *p = pattern, *n = string; | |
30 | register UCHAR c; | |
31 | #ifdef _LIBC | |
32 | # if WIDE_CHAR_VERSION | |
33 | const char *collseq = (const char *) | |
34 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); | |
35 | # else | |
36 | const UCHAR *collseq = (const UCHAR *) | |
37 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); | |
38 | # endif | |
39 | #endif | |
40 | ||
41 | while ((c = *p++) != L_('\0')) | |
42 | { | |
43 | bool new_no_leading_period = false; | |
44 | c = FOLD (c); | |
45 | ||
46 | switch (c) | |
47 | { | |
48 | case L_('?'): | |
49 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
50 | { | |
51 | int res; | |
52 | ||
53 | res = EXT (c, p, n, string_end, no_leading_period, | |
54 | flags); | |
55 | if (res != -1) | |
56 | return res; | |
57 | } | |
58 | ||
59 | if (n == string_end) | |
60 | return FNM_NOMATCH; | |
61 | else if (*n == L_('/') && (flags & FNM_FILE_NAME)) | |
62 | return FNM_NOMATCH; | |
63 | else if (*n == L_('.') && no_leading_period) | |
64 | return FNM_NOMATCH; | |
65 | break; | |
66 | ||
67 | case L_('\\'): | |
68 | if (!(flags & FNM_NOESCAPE)) | |
69 | { | |
70 | c = *p++; | |
71 | if (c == L_('\0')) | |
72 | /* Trailing \ loses. */ | |
73 | return FNM_NOMATCH; | |
74 | c = FOLD (c); | |
75 | } | |
76 | if (n == string_end || FOLD ((UCHAR) *n) != c) | |
77 | return FNM_NOMATCH; | |
78 | break; | |
79 | ||
80 | case L_('*'): | |
81 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
82 | { | |
83 | int res; | |
84 | ||
85 | res = EXT (c, p, n, string_end, no_leading_period, | |
86 | flags); | |
87 | if (res != -1) | |
88 | return res; | |
89 | } | |
90 | ||
91 | if (n != string_end && *n == L_('.') && no_leading_period) | |
92 | return FNM_NOMATCH; | |
93 | ||
94 | for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) | |
95 | { | |
96 | if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) | |
97 | { | |
98 | const CHAR *endp = END (p); | |
99 | if (endp != p) | |
100 | { | |
101 | /* This is a pattern. Skip over it. */ | |
102 | p = endp; | |
103 | continue; | |
104 | } | |
105 | } | |
106 | ||
107 | if (c == L_('?')) | |
108 | { | |
109 | /* A ? needs to match one character. */ | |
110 | if (n == string_end) | |
111 | /* There isn't another character; no match. */ | |
112 | return FNM_NOMATCH; | |
113 | else if (*n == L_('/') | |
114 | && __builtin_expect (flags & FNM_FILE_NAME, 0)) | |
115 | /* A slash does not match a wildcard under | |
116 | FNM_FILE_NAME. */ | |
117 | return FNM_NOMATCH; | |
118 | else | |
119 | /* One character of the string is consumed in matching | |
120 | this ? wildcard, so *??? won't match if there are | |
121 | less than three characters. */ | |
122 | ++n; | |
123 | } | |
124 | } | |
125 | ||
126 | if (c == L_('\0')) | |
127 | /* The wildcard(s) is/are the last element of the pattern. | |
128 | If the name is a file name and contains another slash | |
129 | this means it cannot match, unless the FNM_LEADING_DIR | |
130 | flag is set. */ | |
131 | { | |
132 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; | |
133 | ||
134 | if (flags & FNM_FILE_NAME) | |
135 | { | |
136 | if (flags & FNM_LEADING_DIR) | |
137 | result = 0; | |
138 | else | |
139 | { | |
140 | if (MEMCHR (n, L_('/'), string_end - n) == NULL) | |
141 | result = 0; | |
142 | } | |
143 | } | |
144 | ||
145 | return result; | |
146 | } | |
147 | else | |
148 | { | |
149 | const CHAR *endp; | |
150 | ||
151 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), | |
152 | string_end - n); | |
153 | if (endp == NULL) | |
154 | endp = string_end; | |
155 | ||
156 | if (c == L_('[') | |
157 | || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 | |
158 | && (c == L_('@') || c == L_('+') || c == L_('!')) | |
159 | && *p == L_('('))) | |
160 | { | |
161 | int flags2 = ((flags & FNM_FILE_NAME) | |
162 | ? flags : (flags & ~FNM_PERIOD)); | |
163 | bool no_leading_period2 = no_leading_period; | |
164 | ||
165 | for (--p; n < endp; ++n, no_leading_period2 = false) | |
166 | if (FCT (p, n, string_end, no_leading_period2, flags2) | |
167 | == 0) | |
168 | return 0; | |
169 | } | |
170 | else if (c == L_('/') && (flags & FNM_FILE_NAME)) | |
171 | { | |
172 | while (n < string_end && *n != L_('/')) | |
173 | ++n; | |
174 | if (n < string_end && *n == L_('/') | |
175 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) | |
176 | == 0)) | |
177 | return 0; | |
178 | } | |
179 | else | |
180 | { | |
181 | int flags2 = ((flags & FNM_FILE_NAME) | |
182 | ? flags : (flags & ~FNM_PERIOD)); | |
183 | int no_leading_period2 = no_leading_period; | |
184 | ||
185 | if (c == L_('\\') && !(flags & FNM_NOESCAPE)) | |
186 | c = *p; | |
187 | c = FOLD (c); | |
188 | for (--p; n < endp; ++n, no_leading_period2 = false) | |
189 | if (FOLD ((UCHAR) *n) == c | |
190 | && (FCT (p, n, string_end, no_leading_period2, flags2) | |
191 | == 0)) | |
192 | return 0; | |
193 | } | |
194 | } | |
195 | ||
196 | /* If we come here no match is possible with the wildcard. */ | |
197 | return FNM_NOMATCH; | |
198 | ||
199 | case L_('['): | |
200 | { | |
201 | /* Nonzero if the sense of the character class is inverted. */ | |
202 | const CHAR *p_init = p; | |
203 | const CHAR *n_init = n; | |
204 | register bool not; | |
205 | CHAR cold; | |
206 | UCHAR fn; | |
207 | ||
208 | if (posixly_correct == 0) | |
209 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
210 | ||
211 | if (n == string_end) | |
212 | return FNM_NOMATCH; | |
213 | ||
214 | if (*n == L_('.') && no_leading_period) | |
215 | return FNM_NOMATCH; | |
216 | ||
217 | if (*n == L_('/') && (flags & FNM_FILE_NAME)) | |
218 | /* '/' cannot be matched. */ | |
219 | return FNM_NOMATCH; | |
220 | ||
221 | not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); | |
222 | if (not) | |
223 | ++p; | |
224 | ||
225 | fn = FOLD ((UCHAR) *n); | |
226 | ||
227 | c = *p++; | |
228 | for (;;) | |
229 | { | |
4a626d0a PA |
230 | bool is_range = false; |
231 | ||
8690e634 JK |
232 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
233 | { | |
234 | if (*p == L_('\0')) | |
235 | return FNM_NOMATCH; | |
236 | c = FOLD ((UCHAR) *p); | |
237 | ++p; | |
238 | ||
239 | goto normal_bracket; | |
240 | } | |
241 | else if (c == L_('[') && *p == L_(':')) | |
242 | { | |
243 | /* Leave room for the null. */ | |
244 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; | |
245 | size_t c1 = 0; | |
246 | #if defined _LIBC || WIDE_CHAR_SUPPORT | |
247 | wctype_t wt; | |
248 | #endif | |
249 | const CHAR *startp = p; | |
250 | ||
251 | for (;;) | |
252 | { | |
253 | if (c1 == CHAR_CLASS_MAX_LENGTH) | |
254 | /* The name is too long and therefore the pattern | |
255 | is ill-formed. */ | |
256 | return FNM_NOMATCH; | |
257 | ||
258 | c = *++p; | |
259 | if (c == L_(':') && p[1] == L_(']')) | |
260 | { | |
261 | p += 2; | |
262 | break; | |
263 | } | |
264 | if (c < L_('a') || c >= L_('z')) | |
265 | { | |
266 | /* This cannot possibly be a character class name. | |
267 | Match it as a normal range. */ | |
268 | p = startp; | |
269 | c = L_('['); | |
270 | goto normal_bracket; | |
271 | } | |
272 | str[c1++] = c; | |
273 | } | |
274 | str[c1] = L_('\0'); | |
275 | ||
276 | #if defined _LIBC || WIDE_CHAR_SUPPORT | |
277 | wt = IS_CHAR_CLASS (str); | |
278 | if (wt == 0) | |
279 | /* Invalid character class name. */ | |
280 | return FNM_NOMATCH; | |
281 | ||
282 | # if defined _LIBC && ! WIDE_CHAR_VERSION | |
283 | /* The following code is glibc specific but does | |
284 | there a good job in speeding up the code since | |
285 | we can avoid the btowc() call. */ | |
286 | if (_ISCTYPE ((UCHAR) *n, wt)) | |
287 | goto matched; | |
288 | # else | |
289 | if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) | |
290 | goto matched; | |
291 | # endif | |
292 | #else | |
293 | if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n)) | |
294 | || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n)) | |
295 | || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n)) | |
296 | || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n)) | |
297 | || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n)) | |
298 | || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n)) | |
299 | || (STREQ (str, L_("lower")) && islower ((UCHAR) *n)) | |
300 | || (STREQ (str, L_("print")) && isprint ((UCHAR) *n)) | |
301 | || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n)) | |
302 | || (STREQ (str, L_("space")) && isspace ((UCHAR) *n)) | |
303 | || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n)) | |
304 | || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n))) | |
305 | goto matched; | |
306 | #endif | |
307 | c = *p++; | |
308 | } | |
309 | #ifdef _LIBC | |
310 | else if (c == L_('[') && *p == L_('=')) | |
311 | { | |
312 | UCHAR str[1]; | |
313 | uint32_t nrules = | |
314 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
315 | const CHAR *startp = p; | |
316 | ||
317 | c = *++p; | |
318 | if (c == L_('\0')) | |
319 | { | |
320 | p = startp; | |
321 | c = L_('['); | |
322 | goto normal_bracket; | |
323 | } | |
324 | str[0] = c; | |
325 | ||
326 | c = *++p; | |
327 | if (c != L_('=') || p[1] != L_(']')) | |
328 | { | |
329 | p = startp; | |
330 | c = L_('['); | |
331 | goto normal_bracket; | |
332 | } | |
333 | p += 2; | |
334 | ||
335 | if (nrules == 0) | |
336 | { | |
337 | if ((UCHAR) *n == str[0]) | |
338 | goto matched; | |
339 | } | |
340 | else | |
341 | { | |
342 | const int32_t *table; | |
343 | # if WIDE_CHAR_VERSION | |
344 | const int32_t *weights; | |
345 | const int32_t *extra; | |
346 | # else | |
347 | const unsigned char *weights; | |
348 | const unsigned char *extra; | |
349 | # endif | |
350 | const int32_t *indirect; | |
351 | int32_t idx; | |
352 | const UCHAR *cp = (const UCHAR *) str; | |
353 | ||
354 | /* This #include defines a local function! */ | |
355 | # if WIDE_CHAR_VERSION | |
356 | # include <locale/weightwc.h> | |
357 | # else | |
358 | # include <locale/weight.h> | |
359 | # endif | |
360 | ||
361 | # if WIDE_CHAR_VERSION | |
362 | table = (const int32_t *) | |
363 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); | |
364 | weights = (const int32_t *) | |
365 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); | |
366 | extra = (const int32_t *) | |
367 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); | |
368 | indirect = (const int32_t *) | |
369 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); | |
370 | # else | |
371 | table = (const int32_t *) | |
372 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); | |
373 | weights = (const unsigned char *) | |
374 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); | |
375 | extra = (const unsigned char *) | |
376 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | |
377 | indirect = (const int32_t *) | |
378 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | |
379 | # endif | |
380 | ||
381 | idx = findidx (&cp); | |
382 | if (idx != 0) | |
383 | { | |
384 | /* We found a table entry. Now see whether the | |
385 | character we are currently at has the same | |
386 | equivalence class value. */ | |
387 | int len = weights[idx & 0xffffff]; | |
388 | int32_t idx2; | |
389 | const UCHAR *np = (const UCHAR *) n; | |
390 | ||
391 | idx2 = findidx (&np); | |
392 | if (idx2 != 0 | |
393 | && (idx >> 24) == (idx2 >> 24) | |
394 | && len == weights[idx2 & 0xffffff]) | |
395 | { | |
396 | int cnt = 0; | |
397 | ||
398 | idx &= 0xffffff; | |
399 | idx2 &= 0xffffff; | |
400 | ||
401 | while (cnt < len | |
402 | && (weights[idx + 1 + cnt] | |
403 | == weights[idx2 + 1 + cnt])) | |
404 | ++cnt; | |
405 | ||
406 | if (cnt == len) | |
407 | goto matched; | |
408 | } | |
409 | } | |
410 | } | |
411 | ||
412 | c = *p++; | |
413 | } | |
414 | #endif | |
415 | else if (c == L_('\0')) | |
416 | { | |
417 | /* [ unterminated, treat as normal character. */ | |
418 | p = p_init; | |
419 | n = n_init; | |
420 | c = L_('['); | |
421 | goto normal_match; | |
422 | } | |
423 | else | |
424 | { | |
8690e634 JK |
425 | #ifdef _LIBC |
426 | bool is_seqval = false; | |
427 | ||
428 | if (c == L_('[') && *p == L_('.')) | |
429 | { | |
430 | uint32_t nrules = | |
431 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
432 | const CHAR *startp = p; | |
433 | size_t c1 = 0; | |
434 | ||
435 | while (1) | |
436 | { | |
437 | c = *++p; | |
438 | if (c == L_('.') && p[1] == L_(']')) | |
439 | { | |
440 | p += 2; | |
441 | break; | |
442 | } | |
443 | if (c == '\0') | |
444 | return FNM_NOMATCH; | |
445 | ++c1; | |
446 | } | |
447 | ||
448 | /* We have to handling the symbols differently in | |
449 | ranges since then the collation sequence is | |
450 | important. */ | |
451 | is_range = *p == L_('-') && p[1] != L_('\0'); | |
452 | ||
453 | if (nrules == 0) | |
454 | { | |
455 | /* There are no names defined in the collation | |
456 | data. Therefore we only accept the trivial | |
457 | names consisting of the character itself. */ | |
458 | if (c1 != 1) | |
459 | return FNM_NOMATCH; | |
460 | ||
461 | if (!is_range && *n == startp[1]) | |
462 | goto matched; | |
463 | ||
464 | cold = startp[1]; | |
465 | c = *p++; | |
466 | } | |
467 | else | |
468 | { | |
469 | int32_t table_size; | |
470 | const int32_t *symb_table; | |
471 | # ifdef WIDE_CHAR_VERSION | |
472 | char str[c1]; | |
473 | size_t strcnt; | |
474 | # else | |
475 | # define str (startp + 1) | |
476 | # endif | |
477 | const unsigned char *extra; | |
478 | int32_t idx; | |
479 | int32_t elem; | |
480 | int32_t second; | |
481 | int32_t hash; | |
482 | ||
483 | # ifdef WIDE_CHAR_VERSION | |
484 | /* We have to convert the name to a single-byte | |
485 | string. This is possible since the names | |
486 | consist of ASCII characters and the internal | |
487 | representation is UCS4. */ | |
488 | for (strcnt = 0; strcnt < c1; ++strcnt) | |
489 | str[strcnt] = startp[1 + strcnt]; | |
490 | # endif | |
491 | ||
492 | table_size = | |
493 | _NL_CURRENT_WORD (LC_COLLATE, | |
494 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
495 | symb_table = (const int32_t *) | |
496 | _NL_CURRENT (LC_COLLATE, | |
497 | _NL_COLLATE_SYMB_TABLEMB); | |
498 | extra = (const unsigned char *) | |
499 | _NL_CURRENT (LC_COLLATE, | |
500 | _NL_COLLATE_SYMB_EXTRAMB); | |
501 | ||
502 | /* Locate the character in the hashing table. */ | |
503 | hash = elem_hash (str, c1); | |
504 | ||
505 | idx = 0; | |
506 | elem = hash % table_size; | |
507 | if (symb_table[2 * elem] != 0) | |
508 | { | |
509 | second = hash % (table_size - 2) + 1; | |
510 | ||
511 | do | |
512 | { | |
513 | /* First compare the hashing value. */ | |
514 | if (symb_table[2 * elem] == hash | |
515 | && (c1 | |
516 | == extra[symb_table[2 * elem + 1]]) | |
517 | && memcmp (str, | |
518 | &extra[symb_table[2 * elem | |
519 | + 1] | |
520 | + 1], c1) == 0) | |
521 | { | |
522 | /* Yep, this is the entry. */ | |
523 | idx = symb_table[2 * elem + 1]; | |
524 | idx += 1 + extra[idx]; | |
525 | break; | |
526 | } | |
527 | ||
528 | /* Next entry. */ | |
529 | elem += second; | |
530 | } | |
531 | while (symb_table[2 * elem] != 0); | |
532 | } | |
533 | ||
534 | if (symb_table[2 * elem] != 0) | |
535 | { | |
536 | /* Compare the byte sequence but only if | |
537 | this is not part of a range. */ | |
538 | # ifdef WIDE_CHAR_VERSION | |
539 | int32_t *wextra; | |
540 | ||
541 | idx += 1 + extra[idx]; | |
542 | /* Adjust for the alignment. */ | |
543 | idx = (idx + 3) & ~3; | |
544 | ||
545 | wextra = (int32_t *) &extra[idx + 4]; | |
546 | # endif | |
547 | ||
548 | if (! is_range) | |
549 | { | |
550 | # ifdef WIDE_CHAR_VERSION | |
551 | for (c1 = 0; | |
552 | (int32_t) c1 < wextra[idx]; | |
553 | ++c1) | |
554 | if (n[c1] != wextra[1 + c1]) | |
555 | break; | |
556 | ||
557 | if ((int32_t) c1 == wextra[idx]) | |
558 | goto matched; | |
559 | # else | |
560 | for (c1 = 0; c1 < extra[idx]; ++c1) | |
561 | if (n[c1] != extra[1 + c1]) | |
562 | break; | |
563 | ||
564 | if (c1 == extra[idx]) | |
565 | goto matched; | |
566 | # endif | |
567 | } | |
568 | ||
569 | /* Get the collation sequence value. */ | |
570 | is_seqval = true; | |
571 | # ifdef WIDE_CHAR_VERSION | |
572 | cold = wextra[1 + wextra[idx]]; | |
573 | # else | |
574 | /* Adjust for the alignment. */ | |
575 | idx += 1 + extra[idx]; | |
576 | idx = (idx + 3) & ~4; | |
577 | cold = *((int32_t *) &extra[idx]); | |
578 | # endif | |
579 | ||
580 | c = *p++; | |
581 | } | |
582 | else if (c1 == 1) | |
583 | { | |
584 | /* No valid character. Match it as a | |
585 | single byte. */ | |
586 | if (!is_range && *n == str[0]) | |
587 | goto matched; | |
588 | ||
589 | cold = str[0]; | |
590 | c = *p++; | |
591 | } | |
592 | else | |
593 | return FNM_NOMATCH; | |
594 | } | |
595 | } | |
596 | else | |
597 | # undef str | |
598 | #endif | |
599 | { | |
600 | c = FOLD (c); | |
601 | normal_bracket: | |
602 | ||
603 | /* We have to handling the symbols differently in | |
604 | ranges since then the collation sequence is | |
605 | important. */ | |
606 | is_range = (*p == L_('-') && p[1] != L_('\0') | |
607 | && p[1] != L_(']')); | |
608 | ||
609 | if (!is_range && c == fn) | |
610 | goto matched; | |
611 | ||
612 | #if _LIBC | |
613 | /* This is needed if we goto normal_bracket; from | |
614 | outside of is_seqval's scope. */ | |
615 | is_seqval = false; | |
616 | #endif | |
617 | ||
618 | cold = c; | |
619 | c = *p++; | |
620 | } | |
621 | ||
622 | if (c == L_('-') && *p != L_(']')) | |
623 | { | |
624 | #if _LIBC | |
625 | /* We have to find the collation sequence | |
626 | value for C. Collation sequence is nothing | |
627 | we can regularly access. The sequence | |
628 | value is defined by the order in which the | |
629 | definitions of the collation values for the | |
630 | various characters appear in the source | |
631 | file. A strange concept, nowhere | |
632 | documented. */ | |
633 | uint32_t fcollseq; | |
634 | uint32_t lcollseq; | |
635 | UCHAR cend = *p++; | |
636 | ||
637 | # ifdef WIDE_CHAR_VERSION | |
638 | /* Search in the 'names' array for the characters. */ | |
639 | fcollseq = __collseq_table_lookup (collseq, fn); | |
640 | if (fcollseq == ~((uint32_t) 0)) | |
641 | /* XXX We don't know anything about the character | |
642 | we are supposed to match. This means we are | |
643 | failing. */ | |
644 | goto range_not_matched; | |
645 | ||
646 | if (is_seqval) | |
647 | lcollseq = cold; | |
648 | else | |
649 | lcollseq = __collseq_table_lookup (collseq, cold); | |
650 | # else | |
651 | fcollseq = collseq[fn]; | |
652 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; | |
653 | # endif | |
654 | ||
655 | is_seqval = false; | |
656 | if (cend == L_('[') && *p == L_('.')) | |
657 | { | |
658 | uint32_t nrules = | |
659 | _NL_CURRENT_WORD (LC_COLLATE, | |
660 | _NL_COLLATE_NRULES); | |
661 | const CHAR *startp = p; | |
662 | size_t c1 = 0; | |
663 | ||
664 | while (1) | |
665 | { | |
666 | c = *++p; | |
667 | if (c == L_('.') && p[1] == L_(']')) | |
668 | { | |
669 | p += 2; | |
670 | break; | |
671 | } | |
672 | if (c == '\0') | |
673 | return FNM_NOMATCH; | |
674 | ++c1; | |
675 | } | |
676 | ||
677 | if (nrules == 0) | |
678 | { | |
679 | /* There are no names defined in the | |
680 | collation data. Therefore we only | |
681 | accept the trivial names consisting | |
682 | of the character itself. */ | |
683 | if (c1 != 1) | |
684 | return FNM_NOMATCH; | |
685 | ||
686 | cend = startp[1]; | |
687 | } | |
688 | else | |
689 | { | |
690 | int32_t table_size; | |
691 | const int32_t *symb_table; | |
692 | # ifdef WIDE_CHAR_VERSION | |
693 | char str[c1]; | |
694 | size_t strcnt; | |
695 | # else | |
696 | # define str (startp + 1) | |
697 | # endif | |
698 | const unsigned char *extra; | |
699 | int32_t idx; | |
700 | int32_t elem; | |
701 | int32_t second; | |
702 | int32_t hash; | |
703 | ||
704 | # ifdef WIDE_CHAR_VERSION | |
705 | /* We have to convert the name to a single-byte | |
706 | string. This is possible since the names | |
707 | consist of ASCII characters and the internal | |
708 | representation is UCS4. */ | |
709 | for (strcnt = 0; strcnt < c1; ++strcnt) | |
710 | str[strcnt] = startp[1 + strcnt]; | |
711 | # endif | |
712 | ||
713 | table_size = | |
714 | _NL_CURRENT_WORD (LC_COLLATE, | |
715 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
716 | symb_table = (const int32_t *) | |
717 | _NL_CURRENT (LC_COLLATE, | |
718 | _NL_COLLATE_SYMB_TABLEMB); | |
719 | extra = (const unsigned char *) | |
720 | _NL_CURRENT (LC_COLLATE, | |
721 | _NL_COLLATE_SYMB_EXTRAMB); | |
722 | ||
723 | /* Locate the character in the hashing | |
724 | table. */ | |
725 | hash = elem_hash (str, c1); | |
726 | ||
727 | idx = 0; | |
728 | elem = hash % table_size; | |
729 | if (symb_table[2 * elem] != 0) | |
730 | { | |
731 | second = hash % (table_size - 2) + 1; | |
732 | ||
733 | do | |
734 | { | |
735 | /* First compare the hashing value. */ | |
736 | if (symb_table[2 * elem] == hash | |
737 | && (c1 | |
738 | == extra[symb_table[2 * elem + 1]]) | |
739 | && memcmp (str, | |
740 | &extra[symb_table[2 * elem + 1] | |
741 | + 1], c1) == 0) | |
742 | { | |
743 | /* Yep, this is the entry. */ | |
744 | idx = symb_table[2 * elem + 1]; | |
745 | idx += 1 + extra[idx]; | |
746 | break; | |
747 | } | |
748 | ||
749 | /* Next entry. */ | |
750 | elem += second; | |
751 | } | |
752 | while (symb_table[2 * elem] != 0); | |
753 | } | |
754 | ||
755 | if (symb_table[2 * elem] != 0) | |
756 | { | |
757 | /* Compare the byte sequence but only if | |
758 | this is not part of a range. */ | |
759 | # ifdef WIDE_CHAR_VERSION | |
760 | int32_t *wextra; | |
761 | ||
762 | idx += 1 + extra[idx]; | |
763 | /* Adjust for the alignment. */ | |
764 | idx = (idx + 3) & ~4; | |
765 | ||
766 | wextra = (int32_t *) &extra[idx + 4]; | |
767 | # endif | |
768 | /* Get the collation sequence value. */ | |
769 | is_seqval = true; | |
770 | # ifdef WIDE_CHAR_VERSION | |
771 | cend = wextra[1 + wextra[idx]]; | |
772 | # else | |
773 | /* Adjust for the alignment. */ | |
774 | idx += 1 + extra[idx]; | |
775 | idx = (idx + 3) & ~4; | |
776 | cend = *((int32_t *) &extra[idx]); | |
777 | # endif | |
778 | } | |
779 | else if (symb_table[2 * elem] != 0 && c1 == 1) | |
780 | { | |
781 | cend = str[0]; | |
782 | c = *p++; | |
783 | } | |
784 | else | |
785 | return FNM_NOMATCH; | |
786 | } | |
787 | # undef str | |
788 | } | |
789 | else | |
790 | { | |
791 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) | |
792 | cend = *p++; | |
793 | if (cend == L_('\0')) | |
794 | return FNM_NOMATCH; | |
795 | cend = FOLD (cend); | |
796 | } | |
797 | ||
798 | /* XXX It is not entirely clear to me how to handle | |
799 | characters which are not mentioned in the | |
800 | collation specification. */ | |
801 | if ( | |
802 | # ifdef WIDE_CHAR_VERSION | |
803 | lcollseq == 0xffffffff || | |
804 | # endif | |
805 | lcollseq <= fcollseq) | |
806 | { | |
807 | /* We have to look at the upper bound. */ | |
808 | uint32_t hcollseq; | |
809 | ||
810 | if (is_seqval) | |
811 | hcollseq = cend; | |
812 | else | |
813 | { | |
814 | # ifdef WIDE_CHAR_VERSION | |
815 | hcollseq = | |
816 | __collseq_table_lookup (collseq, cend); | |
817 | if (hcollseq == ~((uint32_t) 0)) | |
818 | { | |
819 | /* Hum, no information about the upper | |
820 | bound. The matching succeeds if the | |
821 | lower bound is matched exactly. */ | |
822 | if (lcollseq != fcollseq) | |
823 | goto range_not_matched; | |
824 | ||
825 | goto matched; | |
826 | } | |
827 | # else | |
828 | hcollseq = collseq[cend]; | |
829 | # endif | |
830 | } | |
831 | ||
832 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) | |
833 | goto matched; | |
834 | } | |
835 | # ifdef WIDE_CHAR_VERSION | |
836 | range_not_matched: | |
837 | # endif | |
838 | #else | |
839 | /* We use a boring value comparison of the character | |
840 | values. This is better than comparing using | |
841 | 'strcoll' since the latter would have surprising | |
842 | and sometimes fatal consequences. */ | |
843 | UCHAR cend = *p++; | |
844 | ||
845 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) | |
846 | cend = *p++; | |
847 | if (cend == L_('\0')) | |
848 | return FNM_NOMATCH; | |
849 | ||
850 | /* It is a range. */ | |
851 | if (cold <= fn && fn <= cend) | |
852 | goto matched; | |
853 | #endif | |
854 | ||
855 | c = *p++; | |
856 | } | |
857 | } | |
858 | ||
859 | if (c == L_(']')) | |
860 | break; | |
861 | } | |
862 | ||
863 | if (!not) | |
864 | return FNM_NOMATCH; | |
865 | break; | |
866 | ||
867 | matched: | |
868 | /* Skip the rest of the [...] that already matched. */ | |
869 | do | |
870 | { | |
871 | ignore_next: | |
872 | c = *p++; | |
873 | ||
874 | if (c == L_('\0')) | |
875 | /* [... (unterminated) loses. */ | |
876 | return FNM_NOMATCH; | |
877 | ||
878 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) | |
879 | { | |
880 | if (*p == L_('\0')) | |
881 | return FNM_NOMATCH; | |
882 | /* XXX 1003.2d11 is unclear if this is right. */ | |
883 | ++p; | |
884 | } | |
885 | else if (c == L_('[') && *p == L_(':')) | |
886 | { | |
887 | int c1 = 0; | |
888 | const CHAR *startp = p; | |
889 | ||
890 | while (1) | |
891 | { | |
892 | c = *++p; | |
893 | if (++c1 == CHAR_CLASS_MAX_LENGTH) | |
894 | return FNM_NOMATCH; | |
895 | ||
896 | if (*p == L_(':') && p[1] == L_(']')) | |
897 | break; | |
898 | ||
899 | if (c < L_('a') || c >= L_('z')) | |
900 | { | |
901 | p = startp; | |
902 | goto ignore_next; | |
903 | } | |
904 | } | |
905 | p += 2; | |
906 | c = *p++; | |
907 | } | |
908 | else if (c == L_('[') && *p == L_('=')) | |
909 | { | |
910 | c = *++p; | |
911 | if (c == L_('\0')) | |
912 | return FNM_NOMATCH; | |
913 | c = *++p; | |
914 | if (c != L_('=') || p[1] != L_(']')) | |
915 | return FNM_NOMATCH; | |
916 | p += 2; | |
917 | c = *p++; | |
918 | } | |
919 | else if (c == L_('[') && *p == L_('.')) | |
920 | { | |
921 | ++p; | |
922 | while (1) | |
923 | { | |
924 | c = *++p; | |
925 | if (c == '\0') | |
926 | return FNM_NOMATCH; | |
927 | ||
928 | if (*p == L_('.') && p[1] == L_(']')) | |
929 | break; | |
930 | } | |
931 | p += 2; | |
932 | c = *p++; | |
933 | } | |
934 | } | |
935 | while (c != L_(']')); | |
936 | if (not) | |
937 | return FNM_NOMATCH; | |
938 | } | |
939 | break; | |
940 | ||
941 | case L_('+'): | |
942 | case L_('@'): | |
943 | case L_('!'): | |
944 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
945 | { | |
946 | int res; | |
947 | ||
948 | res = EXT (c, p, n, string_end, no_leading_period, flags); | |
949 | if (res != -1) | |
950 | return res; | |
951 | } | |
952 | goto normal_match; | |
953 | ||
954 | case L_('/'): | |
955 | if (NO_LEADING_PERIOD (flags)) | |
956 | { | |
957 | if (n == string_end || c != (UCHAR) *n) | |
958 | return FNM_NOMATCH; | |
959 | ||
960 | new_no_leading_period = true; | |
961 | break; | |
962 | } | |
7a6dbc2f | 963 | FALLTHROUGH; |
8690e634 JK |
964 | default: |
965 | normal_match: | |
966 | if (n == string_end || c != FOLD ((UCHAR) *n)) | |
967 | return FNM_NOMATCH; | |
968 | } | |
969 | ||
970 | no_leading_period = new_no_leading_period; | |
971 | ++n; | |
972 | } | |
973 | ||
974 | if (n == string_end) | |
975 | return 0; | |
976 | ||
977 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) | |
978 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
979 | return 0; | |
980 | ||
981 | return FNM_NOMATCH; | |
982 | } | |
983 | ||
984 | ||
985 | static const CHAR * | |
986 | internal_function | |
987 | END (const CHAR *pattern) | |
988 | { | |
989 | const CHAR *p = pattern; | |
990 | ||
991 | while (1) | |
992 | if (*++p == L_('\0')) | |
993 | /* This is an invalid pattern. */ | |
994 | return pattern; | |
995 | else if (*p == L_('[')) | |
996 | { | |
997 | /* Handle brackets special. */ | |
998 | if (posixly_correct == 0) | |
999 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
1000 | ||
1001 | /* Skip the not sign. We have to recognize it because of a possibly | |
1002 | following ']'. */ | |
1003 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) | |
1004 | ++p; | |
1005 | /* A leading ']' is recognized as such. */ | |
1006 | if (*p == L_(']')) | |
1007 | ++p; | |
1008 | /* Skip over all characters of the list. */ | |
1009 | while (*p != L_(']')) | |
1010 | if (*p++ == L_('\0')) | |
1011 | /* This is no valid pattern. */ | |
1012 | return pattern; | |
1013 | } | |
1014 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') | |
1015 | || *p == L_('!')) && p[1] == L_('(')) | |
1016 | p = END (p + 1); | |
1017 | else if (*p == L_(')')) | |
1018 | break; | |
1019 | ||
1020 | return p + 1; | |
1021 | } | |
1022 | ||
1023 | ||
1024 | static int | |
1025 | internal_function | |
1026 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
1027 | bool no_leading_period, int flags) | |
1028 | { | |
1029 | const CHAR *startp; | |
1030 | size_t level; | |
1031 | struct patternlist | |
1032 | { | |
1033 | struct patternlist *next; | |
49e4877c | 1034 | CHAR str[FLEXIBLE_ARRAY_MEMBER]; |
8690e634 JK |
1035 | } *list = NULL; |
1036 | struct patternlist **lastp = &list; | |
1037 | size_t pattern_len = STRLEN (pattern); | |
1038 | const CHAR *p; | |
1039 | const CHAR *rs; | |
1040 | enum { ALLOCA_LIMIT = 8000 }; | |
1041 | ||
1042 | /* Parse the pattern. Store the individual parts in the list. */ | |
1043 | level = 0; | |
1044 | for (startp = p = pattern + 1; ; ++p) | |
1045 | if (*p == L_('\0')) | |
1046 | /* This is an invalid pattern. */ | |
1047 | return -1; | |
1048 | else if (*p == L_('[')) | |
1049 | { | |
1050 | /* Handle brackets special. */ | |
1051 | if (posixly_correct == 0) | |
1052 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
1053 | ||
1054 | /* Skip the not sign. We have to recognize it because of a possibly | |
1055 | following ']'. */ | |
1056 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) | |
1057 | ++p; | |
1058 | /* A leading ']' is recognized as such. */ | |
1059 | if (*p == L_(']')) | |
1060 | ++p; | |
1061 | /* Skip over all characters of the list. */ | |
1062 | while (*p != L_(']')) | |
1063 | if (*p++ == L_('\0')) | |
1064 | /* This is no valid pattern. */ | |
1065 | return -1; | |
1066 | } | |
1067 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') | |
1068 | || *p == L_('!')) && p[1] == L_('(')) | |
1069 | /* Remember the nesting level. */ | |
1070 | ++level; | |
1071 | else if (*p == L_(')')) | |
1072 | { | |
1073 | if (level-- == 0) | |
1074 | { | |
1075 | /* This means we found the end of the pattern. */ | |
1076 | #define NEW_PATTERN \ | |
1077 | struct patternlist *newp; \ | |
1078 | size_t plen; \ | |
1079 | size_t plensize; \ | |
1080 | size_t newpsize; \ | |
1081 | \ | |
1082 | plen = (opt == L_('?') || opt == L_('@') \ | |
1083 | ? pattern_len \ | |
1084 | : p - startp + 1UL); \ | |
1085 | plensize = plen * sizeof (CHAR); \ | |
49e4877c | 1086 | newpsize = FLEXSIZEOF (struct patternlist, str, plensize); \ |
8690e634 JK |
1087 | if ((size_t) -1 / sizeof (CHAR) < plen \ |
1088 | || newpsize < offsetof (struct patternlist, str) \ | |
1089 | || ALLOCA_LIMIT <= newpsize) \ | |
1090 | return -1; \ | |
1091 | newp = (struct patternlist *) alloca (newpsize); \ | |
1092 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ | |
1093 | newp->next = NULL; \ | |
1094 | *lastp = newp; \ | |
1095 | lastp = &newp->next | |
1096 | NEW_PATTERN; | |
1097 | break; | |
1098 | } | |
1099 | } | |
1100 | else if (*p == L_('|')) | |
1101 | { | |
1102 | if (level == 0) | |
1103 | { | |
1104 | NEW_PATTERN; | |
1105 | startp = p + 1; | |
1106 | } | |
1107 | } | |
1108 | assert (list != NULL); | |
1109 | assert (p[-1] == L_(')')); | |
1110 | #undef NEW_PATTERN | |
1111 | ||
1112 | switch (opt) | |
1113 | { | |
1114 | case L_('*'): | |
1115 | if (FCT (p, string, string_end, no_leading_period, flags) == 0) | |
1116 | return 0; | |
7a6dbc2f | 1117 | FALLTHROUGH; |
8690e634 JK |
1118 | case L_('+'): |
1119 | do | |
1120 | { | |
1121 | for (rs = string; rs <= string_end; ++rs) | |
1122 | /* First match the prefix with the current pattern with the | |
1123 | current pattern. */ | |
1124 | if (FCT (list->str, string, rs, no_leading_period, | |
1125 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 | |
1126 | /* This was successful. Now match the rest with the rest | |
1127 | of the pattern. */ | |
1128 | && (FCT (p, rs, string_end, | |
1129 | rs == string | |
1130 | ? no_leading_period | |
1131 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1132 | flags & FNM_FILE_NAME | |
1133 | ? flags : flags & ~FNM_PERIOD) == 0 | |
1134 | /* This didn't work. Try the whole pattern. */ | |
1135 | || (rs != string | |
1136 | && FCT (pattern - 1, rs, string_end, | |
1137 | rs == string | |
1138 | ? no_leading_period | |
1139 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1140 | flags & FNM_FILE_NAME | |
1141 | ? flags : flags & ~FNM_PERIOD) == 0))) | |
1142 | /* It worked. Signal success. */ | |
1143 | return 0; | |
1144 | } | |
1145 | while ((list = list->next) != NULL); | |
1146 | ||
1147 | /* None of the patterns lead to a match. */ | |
1148 | return FNM_NOMATCH; | |
1149 | ||
1150 | case L_('?'): | |
1151 | if (FCT (p, string, string_end, no_leading_period, flags) == 0) | |
1152 | return 0; | |
7a6dbc2f | 1153 | FALLTHROUGH; |
8690e634 JK |
1154 | case L_('@'): |
1155 | do | |
1156 | /* I cannot believe it but 'strcat' is actually acceptable | |
1157 | here. Match the entire string with the prefix from the | |
1158 | pattern list and the rest of the pattern following the | |
1159 | pattern list. */ | |
1160 | if (FCT (STRCAT (list->str, p), string, string_end, | |
1161 | no_leading_period, | |
1162 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) | |
1163 | /* It worked. Signal success. */ | |
1164 | return 0; | |
1165 | while ((list = list->next) != NULL); | |
1166 | ||
1167 | /* None of the patterns lead to a match. */ | |
1168 | return FNM_NOMATCH; | |
1169 | ||
1170 | case L_('!'): | |
1171 | for (rs = string; rs <= string_end; ++rs) | |
1172 | { | |
1173 | struct patternlist *runp; | |
1174 | ||
1175 | for (runp = list; runp != NULL; runp = runp->next) | |
1176 | if (FCT (runp->str, string, rs, no_leading_period, | |
1177 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) | |
1178 | break; | |
1179 | ||
1180 | /* If none of the patterns matched see whether the rest does. */ | |
1181 | if (runp == NULL | |
1182 | && (FCT (p, rs, string_end, | |
1183 | rs == string | |
1184 | ? no_leading_period | |
1185 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1186 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) | |
1187 | == 0)) | |
1188 | /* This is successful. */ | |
1189 | return 0; | |
1190 | } | |
1191 | ||
1192 | /* None of the patterns together with the rest of the pattern | |
1193 | lead to a match. */ | |
1194 | return FNM_NOMATCH; | |
1195 | ||
1196 | default: | |
1197 | assert (! "Invalid extended matching operator"); | |
1198 | break; | |
1199 | } | |
1200 | ||
1201 | return -1; | |
1202 | } | |
1203 | ||
1204 | ||
1205 | #undef FOLD | |
1206 | #undef CHAR | |
1207 | #undef UCHAR | |
1208 | #undef INT | |
1209 | #undef FCT | |
1210 | #undef EXT | |
1211 | #undef END | |
1212 | #undef MEMPCPY | |
1213 | #undef MEMCHR | |
1214 | #undef STRLEN | |
1215 | #undef STRCAT | |
1216 | #undef L_ | |
1217 | #undef BTOWC |