* symtab.h (struct linetable), xcoffread.c (arrange_linetable):
[deliverable/binutils-gdb.git] / gdb / regex.c
CommitLineData
dd3b648e
RP
1/* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3
99a7de40
JG
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2 of the License, or
7(at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
dd3b648e
RP
17
18/* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
22
23#ifdef emacs
24
25/* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
27
28#include "config.h"
29#include "lisp.h"
30#include "buffer.h"
31#include "syntax.h"
32
33#else /* not emacs */
34
dd3b648e
RP
35#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
36#define bzero(s,n) memset((s),0,(n))
dd3b648e
RP
37
38/* Make alloca work the best possible way. */
39#ifdef __GNUC__
40#define alloca __builtin_alloca
41#else
42#ifdef sparc
43#include <alloca.h>
44#endif
45#endif
46
47/*
48 * Define the syntax stuff, so we can do the \<...\> things.
49 */
50
51#ifndef Sword /* must be non-zero in some of the tests below... */
52#define Sword 1
53#endif
54
55#define SYNTAX(c) re_syntax_table[c]
56
57#ifdef SYNTAX_TABLE
58
59char *re_syntax_table;
60
61#else
62
63static char re_syntax_table[256];
64
65static void
66init_syntax_once ()
67{
68 register int c;
69 static int done = 0;
70
71 if (done)
72 return;
73
74 bzero (re_syntax_table, sizeof re_syntax_table);
75
76 for (c = 'a'; c <= 'z'; c++)
77 re_syntax_table[c] = Sword;
78
79 for (c = 'A'; c <= 'Z'; c++)
80 re_syntax_table[c] = Sword;
81
82 for (c = '0'; c <= '9'; c++)
83 re_syntax_table[c] = Sword;
84
85 done = 1;
86}
87
88#endif /* SYNTAX_TABLE */
89#endif /* not emacs */
90
91#include "regex.h"
92
93/* Number of failure points to allocate space for initially,
94 when matching. If this number is exceeded, more space is allocated,
95 so it is not a hard limit. */
96
97#ifndef NFAILURES
98#define NFAILURES 80
99#endif /* NFAILURES */
100
101/* width of a byte in bits */
102
103#define BYTEWIDTH 8
104
105#ifndef SIGN_EXTEND_CHAR
106#define SIGN_EXTEND_CHAR(x) (x)
107#endif
108\f
109static int obscure_syntax = 0;
110
111/* Specify the precise syntax of regexp for compilation.
112 This provides for compatibility for various utilities
113 which historically have different, incompatible syntaxes.
114
115 The argument SYNTAX is a bit-mask containing the two bits
116 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
117
118int
119re_set_syntax (syntax)
51b57ded 120 int syntax;
dd3b648e
RP
121{
122 int ret;
123
124 ret = obscure_syntax;
125 obscure_syntax = syntax;
126 return ret;
127}
128\f
129/* re_compile_pattern takes a regular-expression string
130 and converts it into a buffer full of byte commands for matching.
131
132 PATTERN is the address of the pattern string
133 SIZE is the length of it.
134 BUFP is a struct re_pattern_buffer * which points to the info
135 on where to store the byte commands.
136 This structure contains a char * which points to the
137 actual space, which should have been obtained with malloc.
138 re_compile_pattern may use realloc to grow the buffer space.
139
140 The number of bytes of commands can be found out by looking in
141 the struct re_pattern_buffer that bufp pointed to,
142 after re_compile_pattern returns.
143*/
144
145#define PATPUSH(ch) (*b++ = (char) (ch))
146
147#define PATFETCH(c) \
148 {if (p == pend) goto end_of_pattern; \
149 c = * (unsigned char *) p++; \
150 if (translate) c = translate[c]; }
151
152#define PATFETCH_RAW(c) \
153 {if (p == pend) goto end_of_pattern; \
154 c = * (unsigned char *) p++; }
155
156#define PATUNFETCH p--
157
158#define EXTEND_BUFFER \
159 { char *old_buffer = bufp->buffer; \
160 if (bufp->allocated == (1<<16)) goto too_big; \
161 bufp->allocated *= 2; \
162 if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
163 if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
164 goto memory_exhausted; \
165 c = bufp->buffer - old_buffer; \
166 b += c; \
167 if (fixup_jump) \
168 fixup_jump += c; \
169 if (laststart) \
170 laststart += c; \
171 begalt += c; \
172 if (pending_exact) \
173 pending_exact += c; \
174 }
175
50e0dc41 176static void store_jump (), insert_jump ();
dd3b648e
RP
177
178char *
179re_compile_pattern (pattern, size, bufp)
180 char *pattern;
181 int size;
182 struct re_pattern_buffer *bufp;
183{
184 register char *b = bufp->buffer;
185 register char *p = pattern;
186 char *pend = pattern + size;
187 register unsigned c, c1;
188 char *p1;
189 unsigned char *translate = (unsigned char *) bufp->translate;
190
191 /* address of the count-byte of the most recently inserted "exactn" command.
192 This makes it possible to tell whether a new exact-match character
193 can be added to that command or requires a new "exactn" command. */
194
195 char *pending_exact = 0;
196
197 /* address of the place where a forward-jump should go
198 to the end of the containing expression.
199 Each alternative of an "or", except the last, ends with a forward-jump
200 of this sort. */
201
202 char *fixup_jump = 0;
203
204 /* address of start of the most recently finished expression.
205 This tells postfix * where to find the start of its operand. */
206
207 char *laststart = 0;
208
209 /* In processing a repeat, 1 means zero matches is allowed */
210
211 char zero_times_ok;
212
213 /* In processing a repeat, 1 means many matches is allowed */
214
215 char many_times_ok;
216
217 /* address of beginning of regexp, or inside of last \( */
218
219 char *begalt = b;
220
221 /* Stack of information saved by \( and restored by \).
222 Four stack elements are pushed by each \(:
223 First, the value of b.
224 Second, the value of fixup_jump.
225 Third, the value of regnum.
226 Fourth, the value of begalt. */
227
228 int stackb[40];
229 int *stackp = stackb;
230 int *stacke = stackb + 40;
231 int *stackt;
232
233 /* Counts \('s as they are encountered. Remembered for the matching \),
234 where it becomes the "register number" to put in the stop_memory command */
235
236 int regnum = 1;
237
238 bufp->fastmap_accurate = 0;
239
240#ifndef emacs
241#ifndef SYNTAX_TABLE
242 /*
243 * Initialize the syntax table.
244 */
245 init_syntax_once();
246#endif
247#endif
248
249 if (bufp->allocated == 0)
250 {
251 bufp->allocated = 28;
252 if (bufp->buffer)
253 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
254 bufp->buffer = (char *) realloc (bufp->buffer, 28);
255 else
256 /* Caller did not allocate a buffer. Do it for him */
257 bufp->buffer = (char *) malloc (28);
258 if (!bufp->buffer) goto memory_exhausted;
259 begalt = b = bufp->buffer;
260 }
261
262 while (p != pend)
263 {
264 if (b - bufp->buffer > bufp->allocated - 10)
265 /* Note that EXTEND_BUFFER clobbers c */
266 EXTEND_BUFFER;
267
268 PATFETCH (c);
269
270 switch (c)
271 {
272 case '$':
273 if (obscure_syntax & RE_TIGHT_VBAR)
274 {
275 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
276 goto normal_char;
277 /* Make operand of last vbar end before this `$'. */
278 if (fixup_jump)
279 store_jump (fixup_jump, jump, b);
280 fixup_jump = 0;
281 PATPUSH (endline);
282 break;
283 }
284
285 /* $ means succeed if at end of line, but only in special contexts.
286 If randomly in the middle of a pattern, it is a normal character. */
287 if (p == pend || *p == '\n'
288 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
289 || (obscure_syntax & RE_NO_BK_PARENS
290 ? *p == ')'
291 : *p == '\\' && p[1] == ')')
292 || (obscure_syntax & RE_NO_BK_VBAR
293 ? *p == '|'
294 : *p == '\\' && p[1] == '|'))
295 {
296 PATPUSH (endline);
297 break;
298 }
299 goto normal_char;
300
301 case '^':
302 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
303
304 if (laststart && p[-2] != '\n'
305 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
306 goto normal_char;
307 if (obscure_syntax & RE_TIGHT_VBAR)
308 {
309 if (p != pattern + 1
310 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
311 goto normal_char;
312 PATPUSH (begline);
313 begalt = b;
314 }
315 else
316 PATPUSH (begline);
317 break;
318
319 case '+':
320 case '?':
321 if (obscure_syntax & RE_BK_PLUS_QM)
322 goto normal_char;
323 handle_plus:
324 case '*':
325 /* If there is no previous pattern, char not special. */
326 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
327 goto normal_char;
328 /* If there is a sequence of repetition chars,
329 collapse it down to equivalent to just one. */
330 zero_times_ok = 0;
331 many_times_ok = 0;
332 while (1)
333 {
334 zero_times_ok |= c != '+';
335 many_times_ok |= c != '?';
336 if (p == pend)
337 break;
338 PATFETCH (c);
339 if (c == '*')
340 ;
341 else if (!(obscure_syntax & RE_BK_PLUS_QM)
342 && (c == '+' || c == '?'))
343 ;
344 else if ((obscure_syntax & RE_BK_PLUS_QM)
345 && c == '\\')
346 {
347 int c1;
348 PATFETCH (c1);
349 if (!(c1 == '+' || c1 == '?'))
350 {
351 PATUNFETCH;
352 PATUNFETCH;
353 break;
354 }
355 c = c1;
356 }
357 else
358 {
359 PATUNFETCH;
360 break;
361 }
362 }
363
364 /* Star, etc. applied to an empty pattern is equivalent
365 to an empty pattern. */
366 if (!laststart)
367 break;
368
369 /* Now we know whether 0 matches is allowed,
370 and whether 2 or more matches is allowed. */
371 if (many_times_ok)
372 {
373 /* If more than one repetition is allowed,
374 put in a backward jump at the end. */
375 store_jump (b, maybe_finalize_jump, laststart - 3);
376 b += 3;
377 }
378 insert_jump (on_failure_jump, laststart, b + 3, b);
379 pending_exact = 0;
380 b += 3;
381 if (!zero_times_ok)
382 {
383 /* At least one repetition required: insert before the loop
384 a skip over the initial on-failure-jump instruction */
385 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
386 b += 3;
387 }
388 break;
389
390 case '.':
391 laststart = b;
392 PATPUSH (anychar);
393 break;
394
395 case '[':
396 while (b - bufp->buffer
397 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
398 /* Note that EXTEND_BUFFER clobbers c */
399 EXTEND_BUFFER;
400
401 laststart = b;
402 if (*p == '^')
403 PATPUSH (charset_not), p++;
404 else
405 PATPUSH (charset);
406 p1 = p;
407
408 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
409 /* Clear the whole map */
410 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
411 /* Read in characters and ranges, setting map bits */
412 while (1)
413 {
414 PATFETCH (c);
415 if (c == ']' && p != p1 + 1) break;
416 if (*p == '-' && p[1] != ']')
417 {
418 PATFETCH (c1);
419 PATFETCH (c1);
420 while (c <= c1)
421 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
422 }
423 else
424 {
425 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
426 }
427 }
428 /* Discard any bitmap bytes that are all 0 at the end of the map.
429 Decrement the map-length byte too. */
430 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
431 b[-1]--;
432 b += b[-1];
433 break;
434
435 case '(':
436 if (! (obscure_syntax & RE_NO_BK_PARENS))
437 goto normal_char;
438 else
439 goto handle_open;
440
441 case ')':
442 if (! (obscure_syntax & RE_NO_BK_PARENS))
443 goto normal_char;
444 else
445 goto handle_close;
446
447 case '\n':
448 if (! (obscure_syntax & RE_NEWLINE_OR))
449 goto normal_char;
450 else
451 goto handle_bar;
452
453 case '|':
454 if (! (obscure_syntax & RE_NO_BK_VBAR))
455 goto normal_char;
456 else
457 goto handle_bar;
458
459 case '\\':
460 if (p == pend) goto invalid_pattern;
461 PATFETCH_RAW (c);
462 switch (c)
463 {
464 case '(':
465 if (obscure_syntax & RE_NO_BK_PARENS)
466 goto normal_backsl;
467 handle_open:
468 if (stackp == stacke) goto nesting_too_deep;
469 if (regnum < RE_NREGS)
470 {
471 PATPUSH (start_memory);
472 PATPUSH (regnum);
473 }
474 *stackp++ = b - bufp->buffer;
475 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
476 *stackp++ = regnum++;
477 *stackp++ = begalt - bufp->buffer;
478 fixup_jump = 0;
479 laststart = 0;
480 begalt = b;
481 break;
482
483 case ')':
484 if (obscure_syntax & RE_NO_BK_PARENS)
485 goto normal_backsl;
486 handle_close:
487 if (stackp == stackb) goto unmatched_close;
488 begalt = *--stackp + bufp->buffer;
489 if (fixup_jump)
490 store_jump (fixup_jump, jump, b);
491 if (stackp[-1] < RE_NREGS)
492 {
493 PATPUSH (stop_memory);
494 PATPUSH (stackp[-1]);
495 }
496 stackp -= 2;
497 fixup_jump = 0;
498 if (*stackp)
499 fixup_jump = *stackp + bufp->buffer - 1;
500 laststart = *--stackp + bufp->buffer;
501 break;
502
503 case '|':
504 if (obscure_syntax & RE_NO_BK_VBAR)
505 goto normal_backsl;
506 handle_bar:
507 insert_jump (on_failure_jump, begalt, b + 6, b);
508 pending_exact = 0;
509 b += 3;
510 if (fixup_jump)
511 store_jump (fixup_jump, jump, b);
512 fixup_jump = b;
513 b += 3;
514 laststart = 0;
515 begalt = b;
516 break;
517
518#ifdef emacs
519 case '=':
520 PATPUSH (at_dot);
521 break;
522
523 case 's':
524 laststart = b;
525 PATPUSH (syntaxspec);
526 PATFETCH (c);
527 PATPUSH (syntax_spec_code[c]);
528 break;
529
530 case 'S':
531 laststart = b;
532 PATPUSH (notsyntaxspec);
533 PATFETCH (c);
534 PATPUSH (syntax_spec_code[c]);
535 break;
536#endif /* emacs */
537
538 case 'w':
539 laststart = b;
540 PATPUSH (wordchar);
541 break;
542
543 case 'W':
544 laststart = b;
545 PATPUSH (notwordchar);
546 break;
547
548 case '<':
549 PATPUSH (wordbeg);
550 break;
551
552 case '>':
553 PATPUSH (wordend);
554 break;
555
556 case 'b':
557 PATPUSH (wordbound);
558 break;
559
560 case 'B':
561 PATPUSH (notwordbound);
562 break;
563
564 case '`':
565 PATPUSH (begbuf);
566 break;
567
568 case '\'':
569 PATPUSH (endbuf);
570 break;
571
572 case '1':
573 case '2':
574 case '3':
575 case '4':
576 case '5':
577 case '6':
578 case '7':
579 case '8':
580 case '9':
581 c1 = c - '0';
582 if (c1 >= regnum)
583 goto normal_char;
584 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
585 if (*stackt == c1)
586 goto normal_char;
587 laststart = b;
588 PATPUSH (duplicate);
589 PATPUSH (c1);
590 break;
591
592 case '+':
593 case '?':
594 if (obscure_syntax & RE_BK_PLUS_QM)
595 goto handle_plus;
596
597 default:
598 normal_backsl:
599 /* You might think it would be useful for \ to mean
600 not to translate; but if we don't translate it
601 it will never match anything. */
602 if (translate) c = translate[c];
603 goto normal_char;
604 }
605 break;
606
607 default:
608 normal_char:
609 if (!pending_exact || pending_exact + *pending_exact + 1 != b
610 || *pending_exact == 0177 || *p == '*' || *p == '^'
611 || ((obscure_syntax & RE_BK_PLUS_QM)
612 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
613 : (*p == '+' || *p == '?')))
614 {
615 laststart = b;
616 PATPUSH (exactn);
617 pending_exact = b;
618 PATPUSH (0);
619 }
620 PATPUSH (c);
621 (*pending_exact)++;
622 }
623 }
624
625 if (fixup_jump)
626 store_jump (fixup_jump, jump, b);
627
628 if (stackp != stackb) goto unmatched_open;
629
630 bufp->used = b - bufp->buffer;
631 return 0;
632
633 invalid_pattern:
634 return "Invalid regular expression";
635
636 unmatched_open:
637 return "Unmatched \\(";
638
639 unmatched_close:
640 return "Unmatched \\)";
641
642 end_of_pattern:
643 return "Premature end of regular expression";
644
645 nesting_too_deep:
646 return "Nesting too deep";
647
648 too_big:
649 return "Regular expression too big";
650
651 memory_exhausted:
652 return "Memory exhausted";
653}
654
655/* Store where `from' points a jump operation to jump to where `to' points.
656 `opcode' is the opcode to store. */
657
50e0dc41 658static void
dd3b648e
RP
659store_jump (from, opcode, to)
660 char *from, *to;
661 char opcode;
662{
663 from[0] = opcode;
664 from[1] = (to - (from + 3)) & 0377;
665 from[2] = (to - (from + 3)) >> 8;
666}
667
668/* Open up space at char FROM, and insert there a jump to TO.
669 CURRENT_END gives te end of the storage no in use,
670 so we know how much data to copy up.
671 OP is the opcode of the jump to insert.
672
673 If you call this function, you must zero out pending_exact. */
674
50e0dc41 675static void
dd3b648e
RP
676insert_jump (op, from, to, current_end)
677 char op;
678 char *from, *to, *current_end;
679{
680 register char *pto = current_end + 3;
681 register char *pfrom = current_end;
682 while (pfrom != from)
683 *--pto = *--pfrom;
684 store_jump (from, op, to);
685}
686\f
687/* Given a pattern, compute a fastmap from it.
688 The fastmap records which of the (1 << BYTEWIDTH) possible characters
689 can start a string that matches the pattern.
690 This fastmap is used by re_search to skip quickly over totally implausible text.
691
692 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
693 as bufp->fastmap.
694 The other components of bufp describe the pattern to be used. */
695
696void
697re_compile_fastmap (bufp)
698 struct re_pattern_buffer *bufp;
699{
700 unsigned char *pattern = (unsigned char *) bufp->buffer;
701 int size = bufp->used;
702 register char *fastmap = bufp->fastmap;
703 register unsigned char *p = pattern;
704 register unsigned char *pend = pattern + size;
51b57ded 705 register int j;
dd3b648e
RP
706 unsigned char *translate = (unsigned char *) bufp->translate;
707
708 unsigned char *stackb[NFAILURES];
709 unsigned char **stackp = stackb;
710
711 bzero (fastmap, (1 << BYTEWIDTH));
712 bufp->fastmap_accurate = 1;
713 bufp->can_be_null = 0;
714
715 while (p)
716 {
717 if (p == pend)
718 {
719 bufp->can_be_null = 1;
720 break;
721 }
722#ifdef SWITCH_ENUM_BUG
723 switch ((int) ((enum regexpcode) *p++))
724#else
725 switch ((enum regexpcode) *p++)
726#endif
727 {
728 case exactn:
729 if (translate)
730 fastmap[translate[p[1]]] = 1;
731 else
732 fastmap[p[1]] = 1;
733 break;
734
735 case begline:
736 case before_dot:
737 case at_dot:
738 case after_dot:
739 case begbuf:
740 case endbuf:
741 case wordbound:
742 case notwordbound:
743 case wordbeg:
744 case wordend:
745 continue;
746
747 case endline:
748 if (translate)
749 fastmap[translate['\n']] = 1;
750 else
751 fastmap['\n'] = 1;
752 if (bufp->can_be_null != 1)
753 bufp->can_be_null = 2;
754 break;
755
756 case finalize_jump:
757 case maybe_finalize_jump:
758 case jump:
759 case dummy_failure_jump:
760 bufp->can_be_null = 1;
761 j = *p++ & 0377;
762 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
763 p += j + 1; /* The 1 compensates for missing ++ above */
764 if (j > 0)
765 continue;
766 /* Jump backward reached implies we just went through
767 the body of a loop and matched nothing.
768 Opcode jumped to should be an on_failure_jump.
769 Just treat it like an ordinary jump.
770 For a * loop, it has pushed its failure point already;
771 if so, discard that as redundant. */
772 if ((enum regexpcode) *p != on_failure_jump)
773 continue;
774 p++;
775 j = *p++ & 0377;
776 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
777 p += j + 1; /* The 1 compensates for missing ++ above */
778 if (stackp != stackb && *stackp == p)
779 stackp--;
780 continue;
781
782 case on_failure_jump:
783 j = *p++ & 0377;
784 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
785 p++;
786 *++stackp = p + j;
787 continue;
788
789 case start_memory:
790 case stop_memory:
791 p++;
792 continue;
793
794 case duplicate:
795 bufp->can_be_null = 1;
796 fastmap['\n'] = 1;
797 case anychar:
798 for (j = 0; j < (1 << BYTEWIDTH); j++)
799 if (j != '\n')
800 fastmap[j] = 1;
801 if (bufp->can_be_null)
802 return;
803 /* Don't return; check the alternative paths
804 so we can set can_be_null if appropriate. */
805 break;
806
807 case wordchar:
808 for (j = 0; j < (1 << BYTEWIDTH); j++)
809 if (SYNTAX (j) == Sword)
810 fastmap[j] = 1;
811 break;
812
813 case notwordchar:
814 for (j = 0; j < (1 << BYTEWIDTH); j++)
815 if (SYNTAX (j) != Sword)
816 fastmap[j] = 1;
817 break;
818
819#ifdef emacs
820 case syntaxspec:
821 k = *p++;
822 for (j = 0; j < (1 << BYTEWIDTH); j++)
823 if (SYNTAX (j) == (enum syntaxcode) k)
824 fastmap[j] = 1;
825 break;
826
827 case notsyntaxspec:
828 k = *p++;
829 for (j = 0; j < (1 << BYTEWIDTH); j++)
830 if (SYNTAX (j) != (enum syntaxcode) k)
831 fastmap[j] = 1;
832 break;
833#endif /* emacs */
834
835 case charset:
836 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
837 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
838 {
839 if (translate)
840 fastmap[translate[j]] = 1;
841 else
842 fastmap[j] = 1;
843 }
844 break;
845
846 case charset_not:
847 /* Chars beyond end of map must be allowed */
848 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
849 if (translate)
850 fastmap[translate[j]] = 1;
851 else
852 fastmap[j] = 1;
853
854 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
855 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
856 {
857 if (translate)
858 fastmap[translate[j]] = 1;
859 else
860 fastmap[j] = 1;
861 }
862 break;
863 }
864
865 /* Get here means we have successfully found the possible starting characters
866 of one path of the pattern. We need not follow this path any farther.
867 Instead, look at the next alternative remembered in the stack. */
868 if (stackp != stackb)
869 p = *stackp--;
870 else
871 break;
872 }
873}
874\f
875/* Like re_search_2, below, but only one string is specified. */
876
877int
878re_search (pbufp, string, size, startpos, range, regs)
879 struct re_pattern_buffer *pbufp;
880 char *string;
881 int size, startpos, range;
882 struct re_registers *regs;
883{
884 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
885}
886
887/* Like re_match_2 but tries first a match starting at index STARTPOS,
888 then at STARTPOS + 1, and so on.
889 RANGE is the number of places to try before giving up.
890 If RANGE is negative, the starting positions tried are
891 STARTPOS, STARTPOS - 1, etc.
892 It is up to the caller to make sure that range is not so large
893 as to take the starting position outside of the input strings.
894
895The value returned is the position at which the match was found,
896 or -1 if no match was found,
897 or -2 if error (such as failure stack overflow). */
898
899int
900re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
901 struct re_pattern_buffer *pbufp;
902 char *string1, *string2;
903 int size1, size2;
904 int startpos;
905 register int range;
906 struct re_registers *regs;
907 int mstop;
908{
909 register char *fastmap = pbufp->fastmap;
910 register unsigned char *translate = (unsigned char *) pbufp->translate;
911 int total = size1 + size2;
912 int val;
913
914 /* Update the fastmap now if not correct already */
915 if (fastmap && !pbufp->fastmap_accurate)
916 re_compile_fastmap (pbufp);
917
918 /* Don't waste time in a long search for a pattern
919 that says it is anchored. */
920 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
921 && range > 0)
922 {
923 if (startpos > 0)
924 return -1;
925 else
926 range = 1;
927 }
928
929 while (1)
930 {
931 /* If a fastmap is supplied, skip quickly over characters
932 that cannot possibly be the start of a match.
933 Note, however, that if the pattern can possibly match
934 the null string, we must test it at each starting point
935 so that we take the first null string we get. */
936
937 if (fastmap && startpos < total && pbufp->can_be_null != 1)
938 {
939 if (range > 0)
940 {
941 register int lim = 0;
942 register unsigned char *p;
943 int irange = range;
944 if (startpos < size1 && startpos + range >= size1)
945 lim = range - (size1 - startpos);
946
947 p = ((unsigned char *)
948 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
949
950 if (translate)
951 {
952 while (range > lim && !fastmap[translate[*p++]])
953 range--;
954 }
955 else
956 {
957 while (range > lim && !fastmap[*p++])
958 range--;
959 }
960 startpos += irange - range;
961 }
962 else
963 {
964 register unsigned char c;
965 if (startpos >= size1)
966 c = string2[startpos - size1];
967 else
968 c = string1[startpos];
969 c &= 0xff;
970 if (translate ? !fastmap[translate[c]] : !fastmap[c])
971 goto advance;
972 }
973 }
974
975 if (range >= 0 && startpos == total
976 && fastmap && pbufp->can_be_null == 0)
977 return -1;
978
979 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
980 if (0 <= val)
981 {
982 if (val == -2)
983 return -2;
984 return startpos;
985 }
986
987#ifdef C_ALLOCA
988 alloca (0);
989#endif /* C_ALLOCA */
990
991 advance:
992 if (!range) break;
993 if (range > 0) range--, startpos++; else range++, startpos--;
994 }
995 return -1;
996}
997\f
998#ifndef emacs /* emacs never uses this */
999int
1000re_match (pbufp, string, size, pos, regs)
1001 struct re_pattern_buffer *pbufp;
1002 char *string;
1003 int size, pos;
1004 struct re_registers *regs;
1005{
1006 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1007}
1008#endif /* emacs */
1009
1010/* Maximum size of failure stack. Beyond this, overflow is an error. */
1011
1012int re_max_failures = 2000;
1013
1014static int bcmp_translate();
1015/* Match the pattern described by PBUFP
1016 against data which is the virtual concatenation of STRING1 and STRING2.
1017 SIZE1 and SIZE2 are the sizes of the two data strings.
1018 Start the match at position POS.
1019 Do not consider matching past the position MSTOP.
1020
1021 If pbufp->fastmap is nonzero, then it had better be up to date.
1022
1023 The reason that the data to match are specified as two components
1024 which are to be regarded as concatenated
1025 is so this function can be used directly on the contents of an Emacs buffer.
1026
1027 -1 is returned if there is no match. -2 is returned if there is
1028 an error (such as match stack overflow). Otherwise the value is the length
1029 of the substring which was matched. */
1030
1031int
1032re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1033 struct re_pattern_buffer *pbufp;
1034 unsigned char *string1, *string2;
1035 int size1, size2;
1036 int pos;
1037 struct re_registers *regs;
1038 int mstop;
1039{
1040 register unsigned char *p = (unsigned char *) pbufp->buffer;
1041 register unsigned char *pend = p + pbufp->used;
1042 /* End of first string */
1043 unsigned char *end1;
1044 /* End of second string */
1045 unsigned char *end2;
1046 /* Pointer just past last char to consider matching */
1047 unsigned char *end_match_1, *end_match_2;
1048 register unsigned char *d, *dend;
1049 register int mcnt;
1050 unsigned char *translate = (unsigned char *) pbufp->translate;
1051
1052 /* Failure point stack. Each place that can handle a failure further down the line
1053 pushes a failure point on this stack. It consists of two char *'s.
1054 The first one pushed is where to resume scanning the pattern;
1055 the second pushed is where to resume scanning the strings.
1056 If the latter is zero, the failure point is a "dummy".
1057 If a failure happens and the innermost failure point is dormant,
1058 it discards that failure point and tries the next one. */
1059
1060 unsigned char *initial_stack[2 * NFAILURES];
1061 unsigned char **stackb = initial_stack;
1062 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1063
1064 /* Information on the "contents" of registers.
1065 These are pointers into the input strings; they record
1066 just what was matched (on this attempt) by some part of the pattern.
1067 The start_memory command stores the start of a register's contents
1068 and the stop_memory command stores the end.
1069
1070 At that point, regstart[regnum] points to the first character in the register,
1071 regend[regnum] points to the first character beyond the end of the register,
1072 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1073 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1074
1075 unsigned char *regstart[RE_NREGS];
1076 unsigned char *regend[RE_NREGS];
1077 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1078
1079 /* Set up pointers to ends of strings.
1080 Don't allow the second string to be empty unless both are empty. */
1081 if (!size2)
1082 {
1083 string2 = string1;
1084 size2 = size1;
1085 string1 = 0;
1086 size1 = 0;
1087 }
1088 end1 = string1 + size1;
1089 end2 = string2 + size2;
1090
1091 /* Compute where to stop matching, within the two strings */
1092 if (mstop <= size1)
1093 {
1094 end_match_1 = string1 + mstop;
1095 end_match_2 = string2;
1096 }
1097 else
1098 {
1099 end_match_1 = end1;
1100 end_match_2 = string2 + mstop - size1;
1101 }
1102
1103 /* Initialize \) text positions to -1
1104 to mark ones that no \( or \) has been seen for. */
1105
1106 for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1107 regend[mcnt] = (unsigned char *) -1;
1108
1109 /* `p' scans through the pattern as `d' scans through the data.
1110 `dend' is the end of the input string that `d' points within.
1111 `d' is advanced into the following input string whenever necessary,
1112 but this happens before fetching;
1113 therefore, at the beginning of the loop,
1114 `d' can be pointing at the end of a string,
1115 but it cannot equal string2. */
1116
1117 if (pos <= size1)
1118 d = string1 + pos, dend = end_match_1;
1119 else
1120 d = string2 + pos - size1, dend = end_match_2;
1121
1122/* Write PREFETCH; just before fetching a character with *d. */
1123#define PREFETCH \
1124 while (d == dend) \
1125 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1126 d = string2; /* end of string1 => advance to string2. */ \
1127 dend = end_match_2; }
1128
1129 /* This loop loops over pattern commands.
1130 It exits by returning from the function if match is complete,
1131 or it drops through if match fails at this starting point in the input data. */
1132
1133 while (1)
1134 {
1135 if (p == pend)
1136 /* End of pattern means we have succeeded! */
1137 {
1138 /* If caller wants register contents data back, convert it to indices */
1139 if (regs)
1140 {
1141 regs->start[0] = pos;
1142 if (dend == end_match_1)
1143 regs->end[0] = d - string1;
1144 else
1145 regs->end[0] = d - string2 + size1;
1146 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1147 {
1148 if (regend[mcnt] == (unsigned char *) -1)
1149 {
1150 regs->start[mcnt] = -1;
1151 regs->end[mcnt] = -1;
1152 continue;
1153 }
1154 if (regstart_seg1[mcnt])
1155 regs->start[mcnt] = regstart[mcnt] - string1;
1156 else
1157 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1158 if (regend_seg1[mcnt])
1159 regs->end[mcnt] = regend[mcnt] - string1;
1160 else
1161 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1162 }
1163 }
1164 if (dend == end_match_1)
1165 return (d - string1 - pos);
1166 else
1167 return d - string2 + size1 - pos;
1168 }
1169
1170 /* Otherwise match next pattern command */
1171#ifdef SWITCH_ENUM_BUG
1172 switch ((int) ((enum regexpcode) *p++))
1173#else
1174 switch ((enum regexpcode) *p++)
1175#endif
1176 {
1177
1178 /* \( is represented by a start_memory, \) by a stop_memory.
1179 Both of those commands contain a "register number" argument.
1180 The text matched within the \( and \) is recorded under that number.
1181 Then, \<digit> turns into a `duplicate' command which
1182 is followed by the numeric value of <digit> as the register number. */
1183
1184 case start_memory:
1185 regstart[*p] = d;
1186 regstart_seg1[*p++] = (dend == end_match_1);
1187 break;
1188
1189 case stop_memory:
1190 regend[*p] = d;
1191 regend_seg1[*p++] = (dend == end_match_1);
1192 break;
1193
1194 case duplicate:
1195 {
1196 int regno = *p++; /* Get which register to match against */
1197 register unsigned char *d2, *dend2;
1198
1199 d2 = regstart[regno];
1200 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1201 ? regend[regno] : end_match_1);
1202 while (1)
1203 {
1204 /* Advance to next segment in register contents, if necessary */
1205 while (d2 == dend2)
1206 {
1207 if (dend2 == end_match_2) break;
1208 if (dend2 == regend[regno]) break;
1209 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1210 }
1211 /* At end of register contents => success */
1212 if (d2 == dend2) break;
1213
1214 /* Advance to next segment in data being matched, if necessary */
1215 PREFETCH;
1216
1217 /* mcnt gets # consecutive chars to compare */
1218 mcnt = dend - d;
1219 if (mcnt > dend2 - d2)
1220 mcnt = dend2 - d2;
1221 /* Compare that many; failure if mismatch, else skip them. */
1222 if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1223 goto fail;
1224 d += mcnt, d2 += mcnt;
1225 }
1226 }
1227 break;
1228
1229 case anychar:
1230 /* fetch a data character */
1231 PREFETCH;
1232 /* Match anything but a newline. */
1233 if ((translate ? translate[*d++] : *d++) == '\n')
1234 goto fail;
1235 break;
1236
1237 case charset:
1238 case charset_not:
1239 {
1240 /* Nonzero for charset_not */
1241 int not = 0;
1242 register int c;
1243 if (*(p - 1) == (unsigned char) charset_not)
1244 not = 1;
1245
1246 /* fetch a data character */
1247 PREFETCH;
1248
1249 if (translate)
1250 c = translate [*d];
1251 else
1252 c = *d;
1253
1254 if (c < *p * BYTEWIDTH
1255 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1256 not = !not;
1257
1258 p += 1 + *p;
1259
1260 if (!not) goto fail;
1261 d++;
1262 break;
1263 }
1264
1265 case begline:
1266 if (d == string1 || d[-1] == '\n')
1267 break;
1268 goto fail;
1269
1270 case endline:
1271 if (d == end2
1272 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1273 break;
1274 goto fail;
1275
1276 /* "or" constructs ("|") are handled by starting each alternative
1277 with an on_failure_jump that points to the start of the next alternative.
1278 Each alternative except the last ends with a jump to the joining point.
1279 (Actually, each jump except for the last one really jumps
1280 to the following jump, because tensioning the jumps is a hassle.) */
1281
1282 /* The start of a stupid repeat has an on_failure_jump that points
1283 past the end of the repeat text.
1284 This makes a failure point so that, on failure to match a repetition,
1285 matching restarts past as many repetitions have been found
1286 with no way to fail and look for another one. */
1287
1288 /* A smart repeat is similar but loops back to the on_failure_jump
1289 so that each repetition makes another failure point. */
1290
1291 case on_failure_jump:
1292 if (stackp == stacke)
1293 {
1294 unsigned char **stackx;
1295 if (stacke - stackb > re_max_failures * 2)
1296 return -2;
1297 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1298 * sizeof (char *));
ade40d31 1299 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1300 stackp = stackx + (stackp - stackb);
1301 stacke = stackx + 2 * (stacke - stackb);
1302 stackb = stackx;
1303 }
1304 mcnt = *p++ & 0377;
1305 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1306 p++;
1307 *stackp++ = mcnt + p;
1308 *stackp++ = d;
1309 break;
1310
1311 /* The end of a smart repeat has an maybe_finalize_jump back.
1312 Change it either to a finalize_jump or an ordinary jump. */
1313
1314 case maybe_finalize_jump:
1315 mcnt = *p++ & 0377;
1316 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1317 p++;
1318 {
1319 register unsigned char *p2 = p;
1320 /* Compare what follows with the begining of the repeat.
1321 If we can establish that there is nothing that they would
1322 both match, we can change to finalize_jump */
1323 while (p2 != pend
1324 && (*p2 == (unsigned char) stop_memory
1325 || *p2 == (unsigned char) start_memory))
1326 p2++;
1327 if (p2 == pend)
1328 p[-3] = (unsigned char) finalize_jump;
1329 else if (*p2 == (unsigned char) exactn
1330 || *p2 == (unsigned char) endline)
1331 {
1332 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1333 register unsigned char *p1 = p + mcnt;
1334 /* p1[0] ... p1[2] are an on_failure_jump.
1335 Examine what follows that */
1336 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1337 p[-3] = (unsigned char) finalize_jump;
1338 else if (p1[3] == (unsigned char) charset
1339 || p1[3] == (unsigned char) charset_not)
1340 {
1341 int not = p1[3] == (unsigned char) charset_not;
1342 if (c < p1[4] * BYTEWIDTH
1343 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1344 not = !not;
1345 /* not is 1 if c would match */
1346 /* That means it is not safe to finalize */
1347 if (!not)
1348 p[-3] = (unsigned char) finalize_jump;
1349 }
1350 }
1351 }
1352 p -= 2;
1353 if (p[-1] != (unsigned char) finalize_jump)
1354 {
1355 p[-1] = (unsigned char) jump;
1356 goto nofinalize;
1357 }
1358
1359 /* The end of a stupid repeat has a finalize-jump
1360 back to the start, where another failure point will be made
1361 which will point after all the repetitions found so far. */
1362
1363 case finalize_jump:
1364 stackp -= 2;
1365
1366 case jump:
1367 nofinalize:
1368 mcnt = *p++ & 0377;
1369 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1370 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1371 break;
1372
1373 case dummy_failure_jump:
1374 if (stackp == stacke)
1375 {
1376 unsigned char **stackx
1377 = (unsigned char **) alloca (2 * (stacke - stackb)
1378 * sizeof (char *));
ade40d31 1379 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
dd3b648e
RP
1380 stackp = stackx + (stackp - stackb);
1381 stacke = stackx + 2 * (stacke - stackb);
1382 stackb = stackx;
1383 }
1384 *stackp++ = 0;
1385 *stackp++ = 0;
1386 goto nofinalize;
1387
1388 case wordbound:
1389 if (d == string1 /* Points to first char */
1390 || d == end2 /* Points to end */
1391 || (d == end1 && size2 == 0)) /* Points to end */
1392 break;
1393 if ((SYNTAX (d[-1]) == Sword)
1394 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1395 break;
1396 goto fail;
1397
1398 case notwordbound:
1399 if (d == string1 /* Points to first char */
1400 || d == end2 /* Points to end */
1401 || (d == end1 && size2 == 0)) /* Points to end */
1402 goto fail;
1403 if ((SYNTAX (d[-1]) == Sword)
1404 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1405 goto fail;
1406 break;
1407
1408 case wordbeg:
1409 if (d == end2 /* Points to end */
1410 || (d == end1 && size2 == 0) /* Points to end */
1411 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1412 goto fail;
1413 if (d == string1 /* Points to first char */
1414 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1415 break;
1416 goto fail;
1417
1418 case wordend:
1419 if (d == string1 /* Points to first char */
1420 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1421 goto fail;
1422 if (d == end2 /* Points to end */
1423 || (d == end1 && size2 == 0) /* Points to end */
1424 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1425 break;
1426 goto fail;
1427
1428#ifdef emacs
1429 case before_dot:
1430 if (((d - string2 <= (unsigned) size2)
1431 ? d - bf_p2 : d - bf_p1)
1432 <= point)
1433 goto fail;
1434 break;
1435
1436 case at_dot:
1437 if (((d - string2 <= (unsigned) size2)
1438 ? d - bf_p2 : d - bf_p1)
1439 == point)
1440 goto fail;
1441 break;
1442
1443 case after_dot:
1444 if (((d - string2 <= (unsigned) size2)
1445 ? d - bf_p2 : d - bf_p1)
1446 >= point)
1447 goto fail;
1448 break;
1449
1450 case wordchar:
1451 mcnt = (int) Sword;
1452 goto matchsyntax;
1453
1454 case syntaxspec:
1455 mcnt = *p++;
1456 matchsyntax:
1457 PREFETCH;
1458 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1459 break;
1460
1461 case notwordchar:
1462 mcnt = (int) Sword;
1463 goto matchnotsyntax;
1464
1465 case notsyntaxspec:
1466 mcnt = *p++;
1467 matchnotsyntax:
1468 PREFETCH;
1469 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1470 break;
1471#else
1472 case wordchar:
1473 PREFETCH;
1474 if (SYNTAX (*d++) == 0) goto fail;
1475 break;
1476
1477 case notwordchar:
1478 PREFETCH;
1479 if (SYNTAX (*d++) != 0) goto fail;
1480 break;
1481#endif /* not emacs */
1482
1483 case begbuf:
1484 if (d == string1) /* Note, d cannot equal string2 */
1485 break; /* unless string1 == string2. */
1486 goto fail;
1487
1488 case endbuf:
1489 if (d == end2 || (d == end1 && size2 == 0))
1490 break;
1491 goto fail;
1492
1493 case exactn:
1494 /* Match the next few pattern characters exactly.
1495 mcnt is how many characters to match. */
1496 mcnt = *p++;
1497 if (translate)
1498 {
1499 do
1500 {
1501 PREFETCH;
1502 if (translate[*d++] != *p++) goto fail;
1503 }
1504 while (--mcnt);
1505 }
1506 else
1507 {
1508 do
1509 {
1510 PREFETCH;
1511 if (*d++ != *p++) goto fail;
1512 }
1513 while (--mcnt);
1514 }
1515 break;
1516 }
1517 continue; /* Successfully matched one pattern command; keep matching */
1518
1519 /* Jump here if any matching operation fails. */
1520 fail:
1521 if (stackp != stackb)
1522 /* A restart point is known. Restart there and pop it. */
1523 {
1524 if (!stackp[-2])
1525 { /* If innermost failure point is dormant, flush it and keep looking */
1526 stackp -= 2;
1527 goto fail;
1528 }
1529 d = *--stackp;
1530 p = *--stackp;
1531 if (d >= string1 && d <= end1)
1532 dend = end_match_1;
1533 }
1534 else break; /* Matching at this starting point really fails! */
1535 }
1536 return -1; /* Failure to match */
1537}
1538
1539static int
1540bcmp_translate (s1, s2, len, translate)
1541 unsigned char *s1, *s2;
1542 register int len;
1543 unsigned char *translate;
1544{
1545 register unsigned char *p1 = s1, *p2 = s2;
1546 while (len)
1547 {
1548 if (translate [*p1++] != translate [*p2++]) return 1;
1549 len--;
1550 }
1551 return 0;
1552}
1553\f
1554/* Entry points compatible with bsd4.2 regex library */
1555
1556#ifndef emacs
1557
1558static struct re_pattern_buffer re_comp_buf;
1559
1560char *
1561re_comp (s)
1562 char *s;
1563{
1564 if (!s)
1565 {
1566 if (!re_comp_buf.buffer)
1567 return "No previous regular expression";
1568 return 0;
1569 }
1570
1571 if (!re_comp_buf.buffer)
1572 {
1573 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1574 return "Memory exhausted";
1575 re_comp_buf.allocated = 200;
1576 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1577 return "Memory exhausted";
1578 }
1579 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1580}
1581
1582int
1583re_exec (s)
1584 char *s;
1585{
1586 int len = strlen (s);
1587 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1588}
1589
1590#endif /* emacs */
1591\f
1592#ifdef test
1593
1594#include <stdio.h>
1595
1596/* Indexed by a character, gives the upper case equivalent of the character */
1597
1598static char upcase[0400] =
1599 { 000, 001, 002, 003, 004, 005, 006, 007,
1600 010, 011, 012, 013, 014, 015, 016, 017,
1601 020, 021, 022, 023, 024, 025, 026, 027,
1602 030, 031, 032, 033, 034, 035, 036, 037,
1603 040, 041, 042, 043, 044, 045, 046, 047,
1604 050, 051, 052, 053, 054, 055, 056, 057,
1605 060, 061, 062, 063, 064, 065, 066, 067,
1606 070, 071, 072, 073, 074, 075, 076, 077,
1607 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1608 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1609 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1610 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1611 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1612 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1613 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1614 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1615 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1616 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1617 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1618 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1619 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1620 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1621 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1622 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1623 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1624 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1625 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1626 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1627 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1628 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1629 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1630 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1631 };
1632
1633main (argc, argv)
1634 int argc;
1635 char **argv;
1636{
1637 char pat[80];
1638 struct re_pattern_buffer buf;
1639 int i;
1640 char c;
1641 char fastmap[(1 << BYTEWIDTH)];
1642
1643 /* Allow a command argument to specify the style of syntax. */
1644 if (argc > 1)
1645 obscure_syntax = atoi (argv[1]);
1646
1647 buf.allocated = 40;
1648 buf.buffer = (char *) malloc (buf.allocated);
1649 buf.fastmap = fastmap;
1650 buf.translate = upcase;
1651
1652 while (1)
1653 {
1654 gets (pat);
1655
1656 if (*pat)
1657 {
1658 re_compile_pattern (pat, strlen(pat), &buf);
1659
1660 for (i = 0; i < buf.used; i++)
1661 printchar (buf.buffer[i]);
1662
1663 putchar ('\n');
1664
1665 printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1666
1667 re_compile_fastmap (&buf);
1668 printf ("Allowed by fastmap: ");
1669 for (i = 0; i < (1 << BYTEWIDTH); i++)
1670 if (fastmap[i]) printchar (i);
1671 putchar ('\n');
1672 }
1673
1674 gets (pat); /* Now read the string to match against */
1675
1676 i = re_match (&buf, pat, strlen (pat), 0, 0);
1677 printf ("Match value %d.\n", i);
1678 }
1679}
1680
1681#ifdef NOTDEF
1682print_buf (bufp)
1683 struct re_pattern_buffer *bufp;
1684{
1685 int i;
1686
1687 printf ("buf is :\n----------------\n");
1688 for (i = 0; i < bufp->used; i++)
1689 printchar (bufp->buffer[i]);
1690
1691 printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1692
1693 printf ("Allowed by fastmap: ");
1694 for (i = 0; i < (1 << BYTEWIDTH); i++)
1695 if (bufp->fastmap[i])
1696 printchar (i);
1697 printf ("\nAllowed by translate: ");
1698 if (bufp->translate)
1699 for (i = 0; i < (1 << BYTEWIDTH); i++)
1700 if (bufp->translate[i])
1701 printchar (i);
1702 printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1703 printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1704}
1705#endif
1706
1707printchar (c)
1708 char c;
1709{
1710 if (c < 041 || c >= 0177)
1711 {
1712 putchar ('\\');
1713 putchar (((c >> 6) & 3) + '0');
1714 putchar (((c >> 3) & 7) + '0');
1715 putchar ((c & 7) + '0');
1716 }
1717 else
1718 putchar (c);
1719}
1720
1721error (string)
1722 char *string;
1723{
1724 puts (string);
1725 exit (1);
1726}
1727
1728#endif /* test */
This page took 0.191087 seconds and 4 git commands to generate.