* TODO: Remove item about line numbers being off. It is useless
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
ba47c66a
PS
35#include "defs.h"
36#include <string.h>
dd3b648e
RP
37
38/*
39 * Define the syntax stuff, so we can do the \<...\> things.
40 */
41
42#ifndef Sword /* must be non-zero in some of the tests below... */
43#define Sword 1
44#endif
45
46#define SYNTAX(c) re_syntax_table[c]
47
48#ifdef SYNTAX_TABLE
49
50char *re_syntax_table;
51
52#else
53
54static char re_syntax_table[256];
55
56static void
57init_syntax_once ()
58{
59 register int c;
60 static int done = 0;
61
62 if (done)
63 return;
64
4ed97c9a 65 memset (re_syntax_table, '\0', sizeof re_syntax_table);
dd3b648e
RP
66
67 for (c = 'a'; c <= 'z'; c++)
68 re_syntax_table[c] = Sword;
69
70 for (c = 'A'; c <= 'Z'; c++)
71 re_syntax_table[c] = Sword;
72
73 for (c = '0'; c <= '9'; c++)
74 re_syntax_table[c] = Sword;
75
76 done = 1;
77}
78
79#endif /* SYNTAX_TABLE */
80#endif /* not emacs */
81
82#include "regex.h"
83
84/* Number of failure points to allocate space for initially,
85 when matching. If this number is exceeded, more space is allocated,
86 so it is not a hard limit. */
87
88#ifndef NFAILURES
89#define NFAILURES 80
90#endif /* NFAILURES */
91
92/* width of a byte in bits */
93
94#define BYTEWIDTH 8
95
96#ifndef SIGN_EXTEND_CHAR
97#define SIGN_EXTEND_CHAR(x) (x)
98#endif
99\f
100static int obscure_syntax = 0;
101
102/* Specify the precise syntax of regexp for compilation.
103 This provides for compatibility for various utilities
104 which historically have different, incompatible syntaxes.
105
106 The argument SYNTAX is a bit-mask containing the two bits
107 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
108
109int
110re_set_syntax (syntax)
51b57ded 111 int syntax;
dd3b648e
RP
112{
113 int ret;
114
115 ret = obscure_syntax;
116 obscure_syntax = syntax;
117 return ret;
118}
119\f
120/* re_compile_pattern takes a regular-expression string
121 and converts it into a buffer full of byte commands for matching.
122
123 PATTERN is the address of the pattern string
124 SIZE is the length of it.
125 BUFP is a struct re_pattern_buffer * which points to the info
126 on where to store the byte commands.
127 This structure contains a char * which points to the
128 actual space, which should have been obtained with malloc.
129 re_compile_pattern may use realloc to grow the buffer space.
130
131 The number of bytes of commands can be found out by looking in
132 the struct re_pattern_buffer that bufp pointed to,
133 after re_compile_pattern returns.
134*/
135
136#define PATPUSH(ch) (*b++ = (char) (ch))
137
138#define PATFETCH(c) \
139 {if (p == pend) goto end_of_pattern; \
140 c = * (unsigned char *) p++; \
141 if (translate) c = translate[c]; }
142
143#define PATFETCH_RAW(c) \
144 {if (p == pend) goto end_of_pattern; \
145 c = * (unsigned char *) p++; }
146
147#define PATUNFETCH p--
148
149#define EXTEND_BUFFER \
150 { char *old_buffer = bufp->buffer; \
151 if (bufp->allocated == (1<<16)) goto too_big; \
152 bufp->allocated *= 2; \
153 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
154 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
155 goto memory_exhausted; \
156 c = bufp->buffer - old_buffer; \
157 b += c; \
158 if (fixup_jump) \
159 fixup_jump += c; \
160 if (laststart) \
161 laststart += c; \
162 begalt += c; \
163 if (pending_exact) \
164 pending_exact += c; \
165 }
166
50e0dc41 167static void store_jump (), insert_jump ();
dd3b648e
RP
168
169char *
170re_compile_pattern (pattern, size, bufp)
171 char *pattern;
172 int size;
173 struct re_pattern_buffer *bufp;
174{
175 register char *b = bufp->buffer;
176 register char *p = pattern;
177 char *pend = pattern + size;
178 register unsigned c, c1;
179 char *p1;
180 unsigned char *translate = (unsigned char *) bufp->translate;
181
182 /* address of the count-byte of the most recently inserted "exactn" command.
183 This makes it possible to tell whether a new exact-match character
184 can be added to that command or requires a new "exactn" command. */
185
186 char *pending_exact = 0;
187
188 /* address of the place where a forward-jump should go
189 to the end of the containing expression.
190 Each alternative of an "or", except the last, ends with a forward-jump
191 of this sort. */
192
193 char *fixup_jump = 0;
194
195 /* address of start of the most recently finished expression.
196 This tells postfix * where to find the start of its operand. */
197
198 char *laststart = 0;
199
200 /* In processing a repeat, 1 means zero matches is allowed */
201
202 char zero_times_ok;
203
204 /* In processing a repeat, 1 means many matches is allowed */
205
206 char many_times_ok;
207
208 /* address of beginning of regexp, or inside of last \( */
209
210 char *begalt = b;
211
212 /* Stack of information saved by \( and restored by \).
213 Four stack elements are pushed by each \(:
214 First, the value of b.
215 Second, the value of fixup_jump.
216 Third, the value of regnum.
217 Fourth, the value of begalt. */
218
219 int stackb[40];
220 int *stackp = stackb;
221 int *stacke = stackb + 40;
222 int *stackt;
223
224 /* Counts \('s as they are encountered. Remembered for the matching \),
225 where it becomes the "register number" to put in the stop_memory command */
226
227 int regnum = 1;
228
229 bufp->fastmap_accurate = 0;
230
231#ifndef emacs
232#ifndef SYNTAX_TABLE
233 /*
234 * Initialize the syntax table.
235 */
236 init_syntax_once();
237#endif
238#endif
239
240 if (bufp->allocated == 0)
241 {
242 bufp->allocated = 28;
243 if (bufp->buffer)
244 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
245 bufp->buffer = (char *) realloc (bufp->buffer, 28);
246 else
247 /* Caller did not allocate a buffer. Do it for him */
248 bufp->buffer = (char *) malloc (28);
249 if (!bufp->buffer) goto memory_exhausted;
250 begalt = b = bufp->buffer;
251 }
252
253 while (p != pend)
254 {
255 if (b - bufp->buffer > bufp->allocated - 10)
256 /* Note that EXTEND_BUFFER clobbers c */
257 EXTEND_BUFFER;
258
259 PATFETCH (c);
260
261 switch (c)
262 {
263 case '$':
264 if (obscure_syntax & RE_TIGHT_VBAR)
265 {
266 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
267 goto normal_char;
268 /* Make operand of last vbar end before this `$'. */
269 if (fixup_jump)
270 store_jump (fixup_jump, jump, b);
271 fixup_jump = 0;
272 PATPUSH (endline);
273 break;
274 }
275
276 /* $ means succeed if at end of line, but only in special contexts.
277 If randomly in the middle of a pattern, it is a normal character. */
278 if (p == pend || *p == '\n'
279 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
280 || (obscure_syntax & RE_NO_BK_PARENS
281 ? *p == ')'
282 : *p == '\\' && p[1] == ')')
283 || (obscure_syntax & RE_NO_BK_VBAR
284 ? *p == '|'
285 : *p == '\\' && p[1] == '|'))
286 {
287 PATPUSH (endline);
288 break;
289 }
290 goto normal_char;
291
292 case '^':
293 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
294
295 if (laststart && p[-2] != '\n'
296 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
297 goto normal_char;
298 if (obscure_syntax & RE_TIGHT_VBAR)
299 {
300 if (p != pattern + 1
301 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
302 goto normal_char;
303 PATPUSH (begline);
304 begalt = b;
305 }
306 else
307 PATPUSH (begline);
308 break;
309
310 case '+':
311 case '?':
312 if (obscure_syntax & RE_BK_PLUS_QM)
313 goto normal_char;
314 handle_plus:
315 case '*':
316 /* If there is no previous pattern, char not special. */
317 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
318 goto normal_char;
319 /* If there is a sequence of repetition chars,
320 collapse it down to equivalent to just one. */
321 zero_times_ok = 0;
322 many_times_ok = 0;
323 while (1)
324 {
325 zero_times_ok |= c != '+';
326 many_times_ok |= c != '?';
327 if (p == pend)
328 break;
329 PATFETCH (c);
330 if (c == '*')
331 ;
332 else if (!(obscure_syntax & RE_BK_PLUS_QM)
333 && (c == '+' || c == '?'))
334 ;
335 else if ((obscure_syntax & RE_BK_PLUS_QM)
336 && c == '\\')
337 {
338 int c1;
339 PATFETCH (c1);
340 if (!(c1 == '+' || c1 == '?'))
341 {
342 PATUNFETCH;
343 PATUNFETCH;
344 break;
345 }
346 c = c1;
347 }
348 else
349 {
350 PATUNFETCH;
351 break;
352 }
353 }
354
355 /* Star, etc. applied to an empty pattern is equivalent
356 to an empty pattern. */
357 if (!laststart)
358 break;
359
360 /* Now we know whether 0 matches is allowed,
361 and whether 2 or more matches is allowed. */
362 if (many_times_ok)
363 {
364 /* If more than one repetition is allowed,
365 put in a backward jump at the end. */
366 store_jump (b, maybe_finalize_jump, laststart - 3);
367 b += 3;
368 }
369 insert_jump (on_failure_jump, laststart, b + 3, b);
370 pending_exact = 0;
371 b += 3;
372 if (!zero_times_ok)
373 {
374 /* At least one repetition required: insert before the loop
375 a skip over the initial on-failure-jump instruction */
376 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
377 b += 3;
378 }
379 break;
380
381 case '.':
382 laststart = b;
383 PATPUSH (anychar);
384 break;
385
386 case '[':
387 while (b - bufp->buffer
388 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
389 /* Note that EXTEND_BUFFER clobbers c */
390 EXTEND_BUFFER;
391
392 laststart = b;
393 if (*p == '^')
394 PATPUSH (charset_not), p++;
395 else
396 PATPUSH (charset);
397 p1 = p;
398
399 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
400 /* Clear the whole map */
4ed97c9a 401 memset (b, '\0', (1 << BYTEWIDTH) / BYTEWIDTH);
dd3b648e
RP
402 /* Read in characters and ranges, setting map bits */
403 while (1)
404 {
405 PATFETCH (c);
406 if (c == ']' && p != p1 + 1) break;
407 if (*p == '-' && p[1] != ']')
408 {
409 PATFETCH (c1);
410 PATFETCH (c1);
411 while (c <= c1)
412 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
413 }
414 else
415 {
416 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
417 }
418 }
419 /* Discard any bitmap bytes that are all 0 at the end of the map.
420 Decrement the map-length byte too. */
421 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
422 b[-1]--;
423 b += b[-1];
424 break;
425
426 case '(':
427 if (! (obscure_syntax & RE_NO_BK_PARENS))
428 goto normal_char;
429 else
430 goto handle_open;
431
432 case ')':
433 if (! (obscure_syntax & RE_NO_BK_PARENS))
434 goto normal_char;
435 else
436 goto handle_close;
437
438 case '\n':
439 if (! (obscure_syntax & RE_NEWLINE_OR))
440 goto normal_char;
441 else
442 goto handle_bar;
443
444 case '|':
445 if (! (obscure_syntax & RE_NO_BK_VBAR))
446 goto normal_char;
447 else
448 goto handle_bar;
449
450 case '\\':
451 if (p == pend) goto invalid_pattern;
452 PATFETCH_RAW (c);
453 switch (c)
454 {
455 case '(':
456 if (obscure_syntax & RE_NO_BK_PARENS)
457 goto normal_backsl;
458 handle_open:
459 if (stackp == stacke) goto nesting_too_deep;
460 if (regnum < RE_NREGS)
461 {
462 PATPUSH (start_memory);
463 PATPUSH (regnum);
464 }
465 *stackp++ = b - bufp->buffer;
466 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
467 *stackp++ = regnum++;
468 *stackp++ = begalt - bufp->buffer;
469 fixup_jump = 0;
470 laststart = 0;
471 begalt = b;
472 break;
473
474 case ')':
475 if (obscure_syntax & RE_NO_BK_PARENS)
476 goto normal_backsl;
477 handle_close:
478 if (stackp == stackb) goto unmatched_close;
479 begalt = *--stackp + bufp->buffer;
480 if (fixup_jump)
481 store_jump (fixup_jump, jump, b);
482 if (stackp[-1] < RE_NREGS)
483 {
484 PATPUSH (stop_memory);
485 PATPUSH (stackp[-1]);
486 }
487 stackp -= 2;
488 fixup_jump = 0;
489 if (*stackp)
490 fixup_jump = *stackp + bufp->buffer - 1;
491 laststart = *--stackp + bufp->buffer;
492 break;
493
494 case '|':
495 if (obscure_syntax & RE_NO_BK_VBAR)
496 goto normal_backsl;
497 handle_bar:
498 insert_jump (on_failure_jump, begalt, b + 6, b);
499 pending_exact = 0;
500 b += 3;
501 if (fixup_jump)
502 store_jump (fixup_jump, jump, b);
503 fixup_jump = b;
504 b += 3;
505 laststart = 0;
506 begalt = b;
507 break;
508
509#ifdef emacs
510 case '=':
511 PATPUSH (at_dot);
512 break;
513
514 case 's':
515 laststart = b;
516 PATPUSH (syntaxspec);
517 PATFETCH (c);
518 PATPUSH (syntax_spec_code[c]);
519 break;
520
521 case 'S':
522 laststart = b;
523 PATPUSH (notsyntaxspec);
524 PATFETCH (c);
525 PATPUSH (syntax_spec_code[c]);
526 break;
527#endif /* emacs */
528
529 case 'w':
530 laststart = b;
531 PATPUSH (wordchar);
532 break;
533
534 case 'W':
535 laststart = b;
536 PATPUSH (notwordchar);
537 break;
538
539 case '<':
540 PATPUSH (wordbeg);
541 break;
542
543 case '>':
544 PATPUSH (wordend);
545 break;
546
547 case 'b':
548 PATPUSH (wordbound);
549 break;
550
551 case 'B':
552 PATPUSH (notwordbound);
553 break;
554
555 case '`':
556 PATPUSH (begbuf);
557 break;
558
559 case '\'':
560 PATPUSH (endbuf);
561 break;
562
563 case '1':
564 case '2':
565 case '3':
566 case '4':
567 case '5':
568 case '6':
569 case '7':
570 case '8':
571 case '9':
572 c1 = c - '0';
573 if (c1 >= regnum)
574 goto normal_char;
575 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
576 if (*stackt == c1)
577 goto normal_char;
578 laststart = b;
579 PATPUSH (duplicate);
580 PATPUSH (c1);
581 break;
582
583 case '+':
584 case '?':
585 if (obscure_syntax & RE_BK_PLUS_QM)
586 goto handle_plus;
587
588 default:
589 normal_backsl:
590 /* You might think it would be useful for \ to mean
591 not to translate; but if we don't translate it
592 it will never match anything. */
593 if (translate) c = translate[c];
594 goto normal_char;
595 }
596 break;
597
598 default:
599 normal_char:
600 if (!pending_exact || pending_exact + *pending_exact + 1 != b
601 || *pending_exact == 0177 || *p == '*' || *p == '^'
602 || ((obscure_syntax & RE_BK_PLUS_QM)
603 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
604 : (*p == '+' || *p == '?')))
605 {
606 laststart = b;
607 PATPUSH (exactn);
608 pending_exact = b;
609 PATPUSH (0);
610 }
611 PATPUSH (c);
612 (*pending_exact)++;
613 }
614 }
615
616 if (fixup_jump)
617 store_jump (fixup_jump, jump, b);
618
619 if (stackp != stackb) goto unmatched_open;
620
621 bufp->used = b - bufp->buffer;
622 return 0;
623
624 invalid_pattern:
625 return "Invalid regular expression";
626
627 unmatched_open:
628 return "Unmatched \\(";
629
630 unmatched_close:
631 return "Unmatched \\)";
632
633 end_of_pattern:
634 return "Premature end of regular expression";
635
636 nesting_too_deep:
637 return "Nesting too deep";
638
639 too_big:
640 return "Regular expression too big";
641
642 memory_exhausted:
643 return "Memory exhausted";
644}
645
646/* Store where `from' points a jump operation to jump to where `to' points.
647 `opcode' is the opcode to store. */
648
50e0dc41 649static void
dd3b648e
RP
650store_jump (from, opcode, to)
651 char *from, *to;
652 char opcode;
653{
654 from[0] = opcode;
655 from[1] = (to - (from + 3)) & 0377;
656 from[2] = (to - (from + 3)) >> 8;
657}
658
659/* Open up space at char FROM, and insert there a jump to TO.
660 CURRENT_END gives te end of the storage no in use,
661 so we know how much data to copy up.
662 OP is the opcode of the jump to insert.
663
664 If you call this function, you must zero out pending_exact. */
665
50e0dc41 666static void
dd3b648e
RP
667insert_jump (op, from, to, current_end)
668 char op;
669 char *from, *to, *current_end;
670{
671 register char *pto = current_end + 3;
672 register char *pfrom = current_end;
673 while (pfrom != from)
674 *--pto = *--pfrom;
675 store_jump (from, op, to);
676}
677\f
678/* Given a pattern, compute a fastmap from it.
679 The fastmap records which of the (1 << BYTEWIDTH) possible characters
680 can start a string that matches the pattern.
681 This fastmap is used by re_search to skip quickly over totally implausible text.
682
683 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
684 as bufp->fastmap.
685 The other components of bufp describe the pattern to be used. */
686
687void
688re_compile_fastmap (bufp)
689 struct re_pattern_buffer *bufp;
690{
691 unsigned char *pattern = (unsigned char *) bufp->buffer;
692 int size = bufp->used;
693 register char *fastmap = bufp->fastmap;
694 register unsigned char *p = pattern;
695 register unsigned char *pend = pattern + size;
51b57ded 696 register int j;
dd3b648e
RP
697 unsigned char *translate = (unsigned char *) bufp->translate;
698
699 unsigned char *stackb[NFAILURES];
700 unsigned char **stackp = stackb;
701
4ed97c9a 702 memset (fastmap, '\0', (1 << BYTEWIDTH));
dd3b648e
RP
703 bufp->fastmap_accurate = 1;
704 bufp->can_be_null = 0;
705
706 while (p)
707 {
708 if (p == pend)
709 {
710 bufp->can_be_null = 1;
711 break;
712 }
713#ifdef SWITCH_ENUM_BUG
714 switch ((int) ((enum regexpcode) *p++))
715#else
716 switch ((enum regexpcode) *p++)
717#endif
718 {
719 case exactn:
720 if (translate)
721 fastmap[translate[p[1]]] = 1;
722 else
723 fastmap[p[1]] = 1;
724 break;
725
726 case begline:
727 case before_dot:
728 case at_dot:
729 case after_dot:
730 case begbuf:
731 case endbuf:
732 case wordbound:
733 case notwordbound:
734 case wordbeg:
735 case wordend:
736 continue;
737
738 case endline:
739 if (translate)
740 fastmap[translate['\n']] = 1;
741 else
742 fastmap['\n'] = 1;
743 if (bufp->can_be_null != 1)
744 bufp->can_be_null = 2;
745 break;
746
747 case finalize_jump:
748 case maybe_finalize_jump:
749 case jump:
750 case dummy_failure_jump:
751 bufp->can_be_null = 1;
752 j = *p++ & 0377;
753 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
754 p += j + 1; /* The 1 compensates for missing ++ above */
755 if (j > 0)
756 continue;
757 /* Jump backward reached implies we just went through
758 the body of a loop and matched nothing.
759 Opcode jumped to should be an on_failure_jump.
760 Just treat it like an ordinary jump.
761 For a * loop, it has pushed its failure point already;
762 if so, discard that as redundant. */
763 if ((enum regexpcode) *p != on_failure_jump)
764 continue;
765 p++;
766 j = *p++ & 0377;
767 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
768 p += j + 1; /* The 1 compensates for missing ++ above */
769 if (stackp != stackb && *stackp == p)
770 stackp--;
771 continue;
772
773 case on_failure_jump:
774 j = *p++ & 0377;
775 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
776 p++;
777 *++stackp = p + j;
778 continue;
779
780 case start_memory:
781 case stop_memory:
782 p++;
783 continue;
784
785 case duplicate:
786 bufp->can_be_null = 1;
787 fastmap['\n'] = 1;
788 case anychar:
789 for (j = 0; j < (1 << BYTEWIDTH); j++)
790 if (j != '\n')
791 fastmap[j] = 1;
792 if (bufp->can_be_null)
793 return;
794 /* Don't return; check the alternative paths
795 so we can set can_be_null if appropriate. */
796 break;
797
798 case wordchar:
799 for (j = 0; j < (1 << BYTEWIDTH); j++)
800 if (SYNTAX (j) == Sword)
801 fastmap[j] = 1;
802 break;
803
804 case notwordchar:
805 for (j = 0; j < (1 << BYTEWIDTH); j++)
806 if (SYNTAX (j) != Sword)
807 fastmap[j] = 1;
808 break;
809
810#ifdef emacs
811 case syntaxspec:
812 k = *p++;
813 for (j = 0; j < (1 << BYTEWIDTH); j++)
814 if (SYNTAX (j) == (enum syntaxcode) k)
815 fastmap[j] = 1;
816 break;
817
818 case notsyntaxspec:
819 k = *p++;
820 for (j = 0; j < (1 << BYTEWIDTH); j++)
821 if (SYNTAX (j) != (enum syntaxcode) k)
822 fastmap[j] = 1;
823 break;
824#endif /* emacs */
825
826 case charset:
827 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
828 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
829 {
830 if (translate)
831 fastmap[translate[j]] = 1;
832 else
833 fastmap[j] = 1;
834 }
835 break;
836
837 case charset_not:
838 /* Chars beyond end of map must be allowed */
839 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
840 if (translate)
841 fastmap[translate[j]] = 1;
842 else
843 fastmap[j] = 1;
844
845 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
846 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
847 {
848 if (translate)
849 fastmap[translate[j]] = 1;
850 else
851 fastmap[j] = 1;
852 }
853 break;
854 }
855
856 /* Get here means we have successfully found the possible starting characters
857 of one path of the pattern. We need not follow this path any farther.
858 Instead, look at the next alternative remembered in the stack. */
859 if (stackp != stackb)
860 p = *stackp--;
861 else
862 break;
863 }
864}
865\f
866/* Like re_search_2, below, but only one string is specified. */
867
868int
869re_search (pbufp, string, size, startpos, range, regs)
870 struct re_pattern_buffer *pbufp;
871 char *string;
872 int size, startpos, range;
873 struct re_registers *regs;
874{
875 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
876}
877
878/* Like re_match_2 but tries first a match starting at index STARTPOS,
879 then at STARTPOS + 1, and so on.
880 RANGE is the number of places to try before giving up.
881 If RANGE is negative, the starting positions tried are
882 STARTPOS, STARTPOS - 1, etc.
883 It is up to the caller to make sure that range is not so large
884 as to take the starting position outside of the input strings.
885
886The value returned is the position at which the match was found,
887 or -1 if no match was found,
888 or -2 if error (such as failure stack overflow). */
889
890int
891re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
892 struct re_pattern_buffer *pbufp;
893 char *string1, *string2;
894 int size1, size2;
895 int startpos;
896 register int range;
897 struct re_registers *regs;
898 int mstop;
899{
900 register char *fastmap = pbufp->fastmap;
901 register unsigned char *translate = (unsigned char *) pbufp->translate;
902 int total = size1 + size2;
903 int val;
904
905 /* Update the fastmap now if not correct already */
906 if (fastmap && !pbufp->fastmap_accurate)
907 re_compile_fastmap (pbufp);
908
909 /* Don't waste time in a long search for a pattern
910 that says it is anchored. */
911 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
912 && range > 0)
913 {
914 if (startpos > 0)
915 return -1;
916 else
917 range = 1;
918 }
919
920 while (1)
921 {
922 /* If a fastmap is supplied, skip quickly over characters
923 that cannot possibly be the start of a match.
924 Note, however, that if the pattern can possibly match
925 the null string, we must test it at each starting point
926 so that we take the first null string we get. */
927
928 if (fastmap && startpos < total && pbufp->can_be_null != 1)
929 {
930 if (range > 0)
931 {
932 register int lim = 0;
933 register unsigned char *p;
934 int irange = range;
935 if (startpos < size1 && startpos + range >= size1)
936 lim = range - (size1 - startpos);
937
938 p = ((unsigned char *)
939 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
940
941 if (translate)
942 {
943 while (range > lim && !fastmap[translate[*p++]])
944 range--;
945 }
946 else
947 {
948 while (range > lim && !fastmap[*p++])
949 range--;
950 }
951 startpos += irange - range;
952 }
953 else
954 {
955 register unsigned char c;
956 if (startpos >= size1)
957 c = string2[startpos - size1];
958 else
959 c = string1[startpos];
960 c &= 0xff;
961 if (translate ? !fastmap[translate[c]] : !fastmap[c])
962 goto advance;
963 }
964 }
965
966 if (range >= 0 && startpos == total
967 && fastmap && pbufp->can_be_null == 0)
968 return -1;
969
970 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
971 if (0 <= val)
972 {
973 if (val == -2)
974 return -2;
975 return startpos;
976 }
977
978#ifdef C_ALLOCA
979 alloca (0);
980#endif /* C_ALLOCA */
981
982 advance:
983 if (!range) break;
984 if (range > 0) range--, startpos++; else range++, startpos--;
985 }
986 return -1;
987}
988\f
989#ifndef emacs /* emacs never uses this */
990int
991re_match (pbufp, string, size, pos, regs)
992 struct re_pattern_buffer *pbufp;
993 char *string;
994 int size, pos;
995 struct re_registers *regs;
996{
997 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
998}
999#endif /* emacs */
1000
1001/* Maximum size of failure stack. Beyond this, overflow is an error. */
1002
1003int re_max_failures = 2000;
1004
9823e3f4 1005static int memcmp_translate();
dd3b648e
RP
1006/* Match the pattern described by PBUFP
1007 against data which is the virtual concatenation of STRING1 and STRING2.
1008 SIZE1 and SIZE2 are the sizes of the two data strings.
1009 Start the match at position POS.
1010 Do not consider matching past the position MSTOP.
1011
1012 If pbufp->fastmap is nonzero, then it had better be up to date.
1013
1014 The reason that the data to match are specified as two components
1015 which are to be regarded as concatenated
1016 is so this function can be used directly on the contents of an Emacs buffer.
1017
1018 -1 is returned if there is no match. -2 is returned if there is
1019 an error (such as match stack overflow). Otherwise the value is the length
1020 of the substring which was matched. */
1021
1022int
1023re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1024 struct re_pattern_buffer *pbufp;
1025 unsigned char *string1, *string2;
1026 int size1, size2;
1027 int pos;
1028 struct re_registers *regs;
1029 int mstop;
1030{
1031 register unsigned char *p = (unsigned char *) pbufp->buffer;
1032 register unsigned char *pend = p + pbufp->used;
1033 /* End of first string */
1034 unsigned char *end1;
1035 /* End of second string */
1036 unsigned char *end2;
1037 /* Pointer just past last char to consider matching */
1038 unsigned char *end_match_1, *end_match_2;
1039 register unsigned char *d, *dend;
1040 register int mcnt;
1041 unsigned char *translate = (unsigned char *) pbufp->translate;
1042
1043 /* Failure point stack. Each place that can handle a failure further down the line
1044 pushes a failure point on this stack. It consists of two char *'s.
1045 The first one pushed is where to resume scanning the pattern;
1046 the second pushed is where to resume scanning the strings.
1047 If the latter is zero, the failure point is a "dummy".
1048 If a failure happens and the innermost failure point is dormant,
1049 it discards that failure point and tries the next one. */
1050
1051 unsigned char *initial_stack[2 * NFAILURES];
1052 unsigned char **stackb = initial_stack;
1053 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1054
1055 /* Information on the "contents" of registers.
1056 These are pointers into the input strings; they record
1057 just what was matched (on this attempt) by some part of the pattern.
1058 The start_memory command stores the start of a register's contents
1059 and the stop_memory command stores the end.
1060
1061 At that point, regstart[regnum] points to the first character in the register,
1062 regend[regnum] points to the first character beyond the end of the register,
1063 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1064 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1065
1066 unsigned char *regstart[RE_NREGS];
1067 unsigned char *regend[RE_NREGS];
1068 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1069
1070 /* Set up pointers to ends of strings.
1071 Don't allow the second string to be empty unless both are empty. */
1072 if (!size2)
1073 {
1074 string2 = string1;
1075 size2 = size1;
1076 string1 = 0;
1077 size1 = 0;
1078 }
1079 end1 = string1 + size1;
1080 end2 = string2 + size2;
1081
1082 /* Compute where to stop matching, within the two strings */
1083 if (mstop <= size1)
1084 {
1085 end_match_1 = string1 + mstop;
1086 end_match_2 = string2;
1087 }
1088 else
1089 {
1090 end_match_1 = end1;
1091 end_match_2 = string2 + mstop - size1;
1092 }
1093
1094 /* Initialize \) text positions to -1
1095 to mark ones that no \( or \) has been seen for. */
1096
1097 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1098 regend[mcnt] = (unsigned char *) -1;
1099
1100 /* `p' scans through the pattern as `d' scans through the data.
1101 `dend' is the end of the input string that `d' points within.
1102 `d' is advanced into the following input string whenever necessary,
1103 but this happens before fetching;
1104 therefore, at the beginning of the loop,
1105 `d' can be pointing at the end of a string,
1106 but it cannot equal string2. */
1107
1108 if (pos <= size1)
1109 d = string1 + pos, dend = end_match_1;
1110 else
1111 d = string2 + pos - size1, dend = end_match_2;
1112
1113/* Write PREFETCH; just before fetching a character with *d. */
1114#define PREFETCH \
1115 while (d == dend) \
1116 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1117 d = string2; /* end of string1 => advance to string2. */ \
1118 dend = end_match_2; }
1119
1120 /* This loop loops over pattern commands.
1121 It exits by returning from the function if match is complete,
1122 or it drops through if match fails at this starting point in the input data. */
1123
1124 while (1)
1125 {
1126 if (p == pend)
1127 /* End of pattern means we have succeeded! */
1128 {
1129 /* If caller wants register contents data back, convert it to indices */
1130 if (regs)
1131 {
1132 regs->start[0] = pos;
1133 if (dend == end_match_1)
1134 regs->end[0] = d - string1;
1135 else
1136 regs->end[0] = d - string2 + size1;
1137 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1138 {
1139 if (regend[mcnt] == (unsigned char *) -1)
1140 {
1141 regs->start[mcnt] = -1;
1142 regs->end[mcnt] = -1;
1143 continue;
1144 }
1145 if (regstart_seg1[mcnt])
1146 regs->start[mcnt] = regstart[mcnt] - string1;
1147 else
1148 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1149 if (regend_seg1[mcnt])
1150 regs->end[mcnt] = regend[mcnt] - string1;
1151 else
1152 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1153 }
1154 }
1155 if (dend == end_match_1)
1156 return (d - string1 - pos);
1157 else
1158 return d - string2 + size1 - pos;
1159 }
1160
1161 /* Otherwise match next pattern command */
1162#ifdef SWITCH_ENUM_BUG
1163 switch ((int) ((enum regexpcode) *p++))
1164#else
1165 switch ((enum regexpcode) *p++)
1166#endif
1167 {
1168
1169 /* \( is represented by a start_memory, \) by a stop_memory.
1170 Both of those commands contain a "register number" argument.
1171 The text matched within the \( and \) is recorded under that number.
1172 Then, \<digit> turns into a `duplicate' command which
1173 is followed by the numeric value of <digit> as the register number. */
1174
1175 case start_memory:
1176 regstart[*p] = d;
1177 regstart_seg1[*p++] = (dend == end_match_1);
1178 break;
1179
1180 case stop_memory:
1181 regend[*p] = d;
1182 regend_seg1[*p++] = (dend == end_match_1);
1183 break;
1184
1185 case duplicate:
1186 {
1187 int regno = *p++; /* Get which register to match against */
1188 register unsigned char *d2, *dend2;
1189
1190 d2 = regstart[regno];
1191 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1192 ? regend[regno] : end_match_1);
1193 while (1)
1194 {
1195 /* Advance to next segment in register contents, if necessary */
1196 while (d2 == dend2)
1197 {
1198 if (dend2 == end_match_2) break;
1199 if (dend2 == regend[regno]) break;
1200 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1201 }
1202 /* At end of register contents => success */
1203 if (d2 == dend2) break;
1204
1205 /* Advance to next segment in data being matched, if necessary */
1206 PREFETCH;
1207
1208 /* mcnt gets # consecutive chars to compare */
1209 mcnt = dend - d;
1210 if (mcnt > dend2 - d2)
1211 mcnt = dend2 - d2;
1212 /* Compare that many; failure if mismatch, else skip them. */
9823e3f4 1213 if (translate ? memcmp_translate (d, d2, mcnt, translate) : memcmp (d, d2, mcnt))
dd3b648e
RP
1214 goto fail;
1215 d += mcnt, d2 += mcnt;
1216 }
1217 }
1218 break;
1219
1220 case anychar:
1221 /* fetch a data character */
1222 PREFETCH;
1223 /* Match anything but a newline. */
1224 if ((translate ? translate[*d++] : *d++) == '\n')
1225 goto fail;
1226 break;
1227
1228 case charset:
1229 case charset_not:
1230 {
1231 /* Nonzero for charset_not */
1232 int not = 0;
1233 register int c;
1234 if (*(p - 1) == (unsigned char) charset_not)
1235 not = 1;
1236
1237 /* fetch a data character */
1238 PREFETCH;
1239
1240 if (translate)
1241 c = translate [*d];
1242 else
1243 c = *d;
1244
1245 if (c < *p * BYTEWIDTH
1246 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1247 not = !not;
1248
1249 p += 1 + *p;
1250
1251 if (!not) goto fail;
1252 d++;
1253 break;
1254 }
1255
1256 case begline:
1257 if (d == string1 || d[-1] == '\n')
1258 break;
1259 goto fail;
1260
1261 case endline:
1262 if (d == end2
1263 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1264 break;
1265 goto fail;
1266
1267 /* "or" constructs ("|") are handled by starting each alternative
1268 with an on_failure_jump that points to the start of the next alternative.
1269 Each alternative except the last ends with a jump to the joining point.
1270 (Actually, each jump except for the last one really jumps
1271 to the following jump, because tensioning the jumps is a hassle.) */
1272
1273 /* The start of a stupid repeat has an on_failure_jump that points
1274 past the end of the repeat text.
1275 This makes a failure point so that, on failure to match a repetition,
1276 matching restarts past as many repetitions have been found
1277 with no way to fail and look for another one. */
1278
1279 /* A smart repeat is similar but loops back to the on_failure_jump
1280 so that each repetition makes another failure point. */
1281
1282 case on_failure_jump:
1283 if (stackp == stacke)
1284 {
1285 unsigned char **stackx;
1286 if (stacke - stackb > re_max_failures * 2)
1287 return -2;
1288 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1289 * sizeof (char *));
ade40d31 1290 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1291 stackp = stackx + (stackp - stackb);
1292 stacke = stackx + 2 * (stacke - stackb);
1293 stackb = stackx;
1294 }
1295 mcnt = *p++ & 0377;
1296 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1297 p++;
1298 *stackp++ = mcnt + p;
1299 *stackp++ = d;
1300 break;
1301
1302 /* The end of a smart repeat has an maybe_finalize_jump back.
1303 Change it either to a finalize_jump or an ordinary jump. */
1304
1305 case maybe_finalize_jump:
1306 mcnt = *p++ & 0377;
1307 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1308 p++;
1309 {
1310 register unsigned char *p2 = p;
1311 /* Compare what follows with the begining of the repeat.
1312 If we can establish that there is nothing that they would
1313 both match, we can change to finalize_jump */
1314 while (p2 != pend
1315 && (*p2 == (unsigned char) stop_memory
1316 || *p2 == (unsigned char) start_memory))
1317 p2++;
1318 if (p2 == pend)
1319 p[-3] = (unsigned char) finalize_jump;
1320 else if (*p2 == (unsigned char) exactn
1321 || *p2 == (unsigned char) endline)
1322 {
1323 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1324 register unsigned char *p1 = p + mcnt;
1325 /* p1[0] ... p1[2] are an on_failure_jump.
1326 Examine what follows that */
1327 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1328 p[-3] = (unsigned char) finalize_jump;
1329 else if (p1[3] == (unsigned char) charset
1330 || p1[3] == (unsigned char) charset_not)
1331 {
1332 int not = p1[3] == (unsigned char) charset_not;
1333 if (c < p1[4] * BYTEWIDTH
1334 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1335 not = !not;
1336 /* not is 1 if c would match */
1337 /* That means it is not safe to finalize */
1338 if (!not)
1339 p[-3] = (unsigned char) finalize_jump;
1340 }
1341 }
1342 }
1343 p -= 2;
1344 if (p[-1] != (unsigned char) finalize_jump)
1345 {
1346 p[-1] = (unsigned char) jump;
1347 goto nofinalize;
1348 }
1349
1350 /* The end of a stupid repeat has a finalize-jump
1351 back to the start, where another failure point will be made
1352 which will point after all the repetitions found so far. */
1353
1354 case finalize_jump:
1355 stackp -= 2;
1356
1357 case jump:
1358 nofinalize:
1359 mcnt = *p++ & 0377;
1360 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1361 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1362 break;
1363
1364 case dummy_failure_jump:
1365 if (stackp == stacke)
1366 {
1367 unsigned char **stackx
1368 = (unsigned char **) alloca (2 * (stacke - stackb)
1369 * sizeof (char *));
ade40d31 1370 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1371 stackp = stackx + (stackp - stackb);
1372 stacke = stackx + 2 * (stacke - stackb);
1373 stackb = stackx;
1374 }
1375 *stackp++ = 0;
1376 *stackp++ = 0;
1377 goto nofinalize;
1378
1379 case wordbound:
1380 if (d == string1 /* Points to first char */
1381 || d == end2 /* Points to end */
1382 || (d == end1 && size2 == 0)) /* Points to end */
1383 break;
1384 if ((SYNTAX (d[-1]) == Sword)
1385 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1386 break;
1387 goto fail;
1388
1389 case notwordbound:
1390 if (d == string1 /* Points to first char */
1391 || d == end2 /* Points to end */
1392 || (d == end1 && size2 == 0)) /* Points to end */
1393 goto fail;
1394 if ((SYNTAX (d[-1]) == Sword)
1395 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1396 goto fail;
1397 break;
1398
1399 case wordbeg:
1400 if (d == end2 /* Points to end */
1401 || (d == end1 && size2 == 0) /* Points to end */
1402 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1403 goto fail;
1404 if (d == string1 /* Points to first char */
1405 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1406 break;
1407 goto fail;
1408
1409 case wordend:
1410 if (d == string1 /* Points to first char */
1411 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1412 goto fail;
1413 if (d == end2 /* Points to end */
1414 || (d == end1 && size2 == 0) /* Points to end */
1415 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1416 break;
1417 goto fail;
1418
1419#ifdef emacs
1420 case before_dot:
1421 if (((d - string2 <= (unsigned) size2)
1422 ? d - bf_p2 : d - bf_p1)
1423 <= point)
1424 goto fail;
1425 break;
1426
1427 case at_dot:
1428 if (((d - string2 <= (unsigned) size2)
1429 ? d - bf_p2 : d - bf_p1)
1430 == point)
1431 goto fail;
1432 break;
1433
1434 case after_dot:
1435 if (((d - string2 <= (unsigned) size2)
1436 ? d - bf_p2 : d - bf_p1)
1437 >= point)
1438 goto fail;
1439 break;
1440
1441 case wordchar:
1442 mcnt = (int) Sword;
1443 goto matchsyntax;
1444
1445 case syntaxspec:
1446 mcnt = *p++;
1447 matchsyntax:
1448 PREFETCH;
1449 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1450 break;
1451
1452 case notwordchar:
1453 mcnt = (int) Sword;
1454 goto matchnotsyntax;
1455
1456 case notsyntaxspec:
1457 mcnt = *p++;
1458 matchnotsyntax:
1459 PREFETCH;
1460 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1461 break;
1462#else
1463 case wordchar:
1464 PREFETCH;
1465 if (SYNTAX (*d++) == 0) goto fail;
1466 break;
1467
1468 case notwordchar:
1469 PREFETCH;
1470 if (SYNTAX (*d++) != 0) goto fail;
1471 break;
1472#endif /* not emacs */
1473
1474 case begbuf:
1475 if (d == string1) /* Note, d cannot equal string2 */
1476 break; /* unless string1 == string2. */
1477 goto fail;
1478
1479 case endbuf:
1480 if (d == end2 || (d == end1 && size2 == 0))
1481 break;
1482 goto fail;
1483
1484 case exactn:
1485 /* Match the next few pattern characters exactly.
1486 mcnt is how many characters to match. */
1487 mcnt = *p++;
1488 if (translate)
1489 {
1490 do
1491 {
1492 PREFETCH;
1493 if (translate[*d++] != *p++) goto fail;
1494 }
1495 while (--mcnt);
1496 }
1497 else
1498 {
1499 do
1500 {
1501 PREFETCH;
1502 if (*d++ != *p++) goto fail;
1503 }
1504 while (--mcnt);
1505 }
1506 break;
1507 }
1508 continue; /* Successfully matched one pattern command; keep matching */
1509
1510 /* Jump here if any matching operation fails. */
1511 fail:
1512 if (stackp != stackb)
1513 /* A restart point is known. Restart there and pop it. */
1514 {
1515 if (!stackp[-2])
1516 { /* If innermost failure point is dormant, flush it and keep looking */
1517 stackp -= 2;
1518 goto fail;
1519 }
1520 d = *--stackp;
1521 p = *--stackp;
1522 if (d >= string1 && d <= end1)
1523 dend = end_match_1;
1524 }
1525 else break; /* Matching at this starting point really fails! */
1526 }
1527 return -1; /* Failure to match */
1528}
1529
1530static int
9823e3f4 1531memcmp_translate (s1, s2, len, translate)
dd3b648e
RP
1532 unsigned char *s1, *s2;
1533 register int len;
1534 unsigned char *translate;
1535{
1536 register unsigned char *p1 = s1, *p2 = s2;
1537 while (len)
1538 {
1539 if (translate [*p1++] != translate [*p2++]) return 1;
1540 len--;
1541 }
1542 return 0;
1543}
1544\f
1545/* Entry points compatible with bsd4.2 regex library */
1546
1547#ifndef emacs
1548
1549static struct re_pattern_buffer re_comp_buf;
1550
1551char *
1552re_comp (s)
ba47c66a 1553 const char *s;
dd3b648e
RP
1554{
1555 if (!s)
1556 {
1557 if (!re_comp_buf.buffer)
1558 return "No previous regular expression";
1559 return 0;
1560 }
1561
1562 if (!re_comp_buf.buffer)
1563 {
1564 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1565 return "Memory exhausted";
1566 re_comp_buf.allocated = 200;
1567 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1568 return "Memory exhausted";
1569 }
1570 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1571}
1572
1573int
1574re_exec (s)
1575 char *s;
1576{
1577 int len = strlen (s);
1578 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1579}
1580
1581#endif /* emacs */
1582\f
1583#ifdef test
1584
1585#include <stdio.h>
1586
1587/* Indexed by a character, gives the upper case equivalent of the character */
1588
1589static char upcase[0400] =
1590 { 000, 001, 002, 003, 004, 005, 006, 007,
1591 010, 011, 012, 013, 014, 015, 016, 017,
1592 020, 021, 022, 023, 024, 025, 026, 027,
1593 030, 031, 032, 033, 034, 035, 036, 037,
1594 040, 041, 042, 043, 044, 045, 046, 047,
1595 050, 051, 052, 053, 054, 055, 056, 057,
1596 060, 061, 062, 063, 064, 065, 066, 067,
1597 070, 071, 072, 073, 074, 075, 076, 077,
1598 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1599 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1600 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1601 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1602 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1603 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1604 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1605 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1606 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1607 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1608 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1609 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1610 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1611 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1612 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1613 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1614 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1615 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1616 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1617 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1618 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1619 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1620 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1621 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1622 };
1623
1624main (argc, argv)
1625 int argc;
1626 char **argv;
1627{
1628 char pat[80];
1629 struct re_pattern_buffer buf;
1630 int i;
1631 char c;
1632 char fastmap[(1 << BYTEWIDTH)];
1633
1634 /* Allow a command argument to specify the style of syntax. */
1635 if (argc > 1)
1636 obscure_syntax = atoi (argv[1]);
1637
1638 buf.allocated = 40;
1639 buf.buffer = (char *) malloc (buf.allocated);
1640 buf.fastmap = fastmap;
1641 buf.translate = upcase;
1642
1643 while (1)
1644 {
1645 gets (pat);
1646
1647 if (*pat)
1648 {
1649 re_compile_pattern (pat, strlen(pat), &buf);
1650
1651 for (i = 0; i < buf.used; i++)
1652 printchar (buf.buffer[i]);
1653
199b2450 1654 putchar_unfiltered ('\n');
dd3b648e 1655
199b2450 1656 printf_unfiltered ("%d allocated, %d used.\n", buf.allocated, buf.used);
dd3b648e
RP
1657
1658 re_compile_fastmap (&buf);
199b2450 1659 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1660 for (i = 0; i < (1 << BYTEWIDTH); i++)
1661 if (fastmap[i]) printchar (i);
199b2450 1662 putchar_unfiltered ('\n');
dd3b648e
RP
1663 }
1664
1665 gets (pat); /* Now read the string to match against */
1666
1667 i = re_match (&buf, pat, strlen (pat), 0, 0);
199b2450 1668 printf_unfiltered ("Match value %d.\n", i);
dd3b648e
RP
1669 }
1670}
1671
1672#ifdef NOTDEF
1673print_buf (bufp)
1674 struct re_pattern_buffer *bufp;
1675{
1676 int i;
1677
199b2450 1678 printf_unfiltered ("buf is :\n----------------\n");
dd3b648e
RP
1679 for (i = 0; i < bufp->used; i++)
1680 printchar (bufp->buffer[i]);
1681
199b2450 1682 printf_unfiltered ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
dd3b648e 1683
199b2450 1684 printf_unfiltered ("Allowed by fastmap: ");
dd3b648e
RP
1685 for (i = 0; i < (1 << BYTEWIDTH); i++)
1686 if (bufp->fastmap[i])
1687 printchar (i);
199b2450 1688 printf_unfiltered ("\nAllowed by translate: ");
dd3b648e
RP
1689 if (bufp->translate)
1690 for (i = 0; i < (1 << BYTEWIDTH); i++)
1691 if (bufp->translate[i])
1692 printchar (i);
199b2450
TL
1693 printf_unfiltered ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1694 printf_unfiltered ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
dd3b648e
RP
1695}
1696#endif
1697
1698printchar (c)
1699 char c;
1700{
1701 if (c < 041 || c >= 0177)
1702 {
199b2450
TL
1703 putchar_unfiltered ('\\');
1704 putchar_unfiltered (((c >> 6) & 3) + '0');
1705 putchar_unfiltered (((c >> 3) & 7) + '0');
1706 putchar_unfiltered ((c & 7) + '0');
dd3b648e
RP
1707 }
1708 else
199b2450 1709 putchar_unfiltered (c);
dd3b648e
RP
1710}
1711
1712error (string)
1713 char *string;
1714{
199b2450 1715 puts_unfiltered (string);
dd3b648e
RP
1716 exit (1);
1717}
1718
1719#endif /* test */
This page took 0.212878 seconds and 4 git commands to generate.