ref: 85824350b5f65053053245d141aaf7d668089d28
parent: 0f8168038af32828fcdc39575dea0e4de0c01122
author: ben <ben@rana>
date: Wed Apr 27 03:52:41 EDT 2016
remove ape regexp library, add utility for awk native port
--- a/sys/src/ape/lib/mkfile
+++ b/sys/src/ape/lib/mkfile
@@ -1,6 +1,6 @@
</$objtype/mkfile
-DIRS=9 ap auth bio bsd bz2 draw fmt l mp net regexp sec utf v z
+DIRS=9 ap auth bio bsd bz2 draw fmt l mp net sec utf v z
none:V:
echo mk all, install, installall, clean, or nuke
--- a/sys/src/ape/lib/regexp/mkfile
+++ /dev/null
@@ -1,15 +1,0 @@
-APE=/sys/src/ape
-<$APE/config
-
-LIB=/$objtype/lib/ape/libregexp.a
-OFILES=regcomp.$O\
- regerror.$O\
- regexec.$O\
- regsub.$O\
- regaux.$O\
- rregexec.$O\
- rregsub.$O\
-
-</sys/src/cmd/mksyslib
-
-CFLAGS=-c -DUTF -D_REGEXP_EXTENSION
--- a/sys/src/ape/lib/regexp/regaux.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-/*
- * Machine state
- */
-Relist* _relist[2];
-Relist* _reliste[2];
-int _relistsize = LISTINCREMENT;
-
-/*
- * save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
- int i;
-
- if(mp==0 || ms<=0)
- return;
- if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
- (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
- for(i=0; i<ms && i<NSUBEXP; i++)
- mp[i] = sp->m[i];
- for(; i<ms; i++)
- mp[i].s.sp = mp[i].e.ep = 0;
- }
-}
-
-/*
- * Note optimization in _renewthread:
- * *lp must be pending when _renewthread called; if *l has been looked
- * at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- Resublist *sep) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if((sep)->m[0].s.sp < p->se.m[0].s.sp)
- p->se = *sep;
- return 0;
- }
- }
- p->inst = ip;
- p->se = *sep;
- (++p)->inst = 0;
- return p;
-}
-
--- a/sys/src/ape/lib/regexp/regcomp.c
+++ /dev/null
@@ -1,560 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <string.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define TRUE 1
-#define FALSE 0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
- Reinst* first;
- Reinst* last;
-}Node;
-
-#define NSTACK 20
-static Node andstack[NSTACK];
-static Node *andp;
-static int atorstack[NSTACK];
-static int* atorp;
-static int cursubid; /* id of current subexpression */
-static int subidstack[NSTACK]; /* parallel to atorstack */
-static int* subidp;
-static int lastwasand; /* Last token was operand */
-static int nbra;
-static char* exprp; /* pointer to next character in source expression */
-static int lexdone;
-static int nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int errors;
-static wchar_t yyrune; /* last lex'd rune */
-static Reclass*yyclassp; /* last lex'd class */
-
-/* predeclared crap */
-static void operator(int);
-static void pushand(Reinst*, Reinst*);
-static void pushator(int);
-static void evaluntil(int);
-static int bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
-{
- errors++;
- regerror(s);
- longjmp(regkaboom, 1);
-}
-
-static Reinst*
-newinst(int t)
-{
- freep->type = t;
- freep->l.left = 0;
- freep->r.right = 0;
- return freep++;
-}
-
-static void
-operand(int t)
-{
- Reinst *i;
-
- if(lastwasand)
- operator(CAT); /* catenate is implicit */
- i = newinst(t);
-
- if(t == CCLASS || t == NCCLASS)
- i->r.cp = yyclassp;
- if(t == RUNE)
- i->r.r = yyrune;
-
- pushand(i, i);
- lastwasand = TRUE;
-}
-
-static void
-operator(int t)
-{
- if(t==RBRA && --nbra<0)
- rcerror("unmatched right paren");
- if(t==LBRA){
- if(++cursubid >= NSUBEXP)
- rcerror ("too many subexpressions");
- nbra++;
- if(lastwasand)
- operator(CAT);
- } else
- evaluntil(t);
- if(t != RBRA)
- pushator(t);
- lastwasand = FALSE;
- if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
- lastwasand = TRUE; /* these look like operands */
-}
-
-static void
-regerr2(char *s, int c)
-{
- char buf[100];
- char *cp = buf;
- while(*s)
- *cp++ = *s++;
- *cp++ = c;
- *cp = '\0';
- rcerror(buf);
-}
-
-static void
-cant(char *s)
-{
- char buf[100];
- strcpy(buf, "can't happen: ");
- strcat(buf, s);
- rcerror(buf);
-}
-
-static void
-pushand(Reinst *f, Reinst *l)
-{
- if(andp >= &andstack[NSTACK])
- cant("operand stack overflow");
- andp->first = f;
- andp->last = l;
- andp++;
-}
-
-static void
-pushator(int t)
-{
- if(atorp >= &atorstack[NSTACK])
- cant("operator stack overflow");
- *atorp++ = t;
- *subidp++ = cursubid;
-}
-
-static Node*
-popand(int op)
-{
- Reinst *inst;
-
- if(andp <= &andstack[0]){
- regerr2("missing operand for ", op);
- inst = newinst(NOP);
- pushand(inst,inst);
- }
- return --andp;
-}
-
-static int
-popator(void)
-{
- if(atorp <= &atorstack[0])
- cant("operator stack underflow");
- --subidp;
- return *--atorp;
-}
-
-static void
-evaluntil(int pri)
-{
- Node *op1, *op2;
- Reinst *inst1, *inst2;
-
- while(pri==RBRA || atorp[-1]>=pri){
- switch(popator()){
- default:
- rcerror("unknown operator in evaluntil");
- break;
- case LBRA: /* must have been RBRA */
- op1 = popand('(');
- inst2 = newinst(RBRA);
- inst2->r.subid = *subidp;
- op1->last->l.next = inst2;
- inst1 = newinst(LBRA);
- inst1->r.subid = *subidp;
- inst1->l.next = op1->first;
- pushand(inst1, inst2);
- return;
- case OR:
- op2 = popand('|');
- op1 = popand('|');
- inst2 = newinst(NOP);
- op2->last->l.next = inst2;
- op1->last->l.next = inst2;
- inst1 = newinst(OR);
- inst1->r.right = op1->first;
- inst1->l.left = op2->first;
- pushand(inst1, inst2);
- break;
- case CAT:
- op2 = popand(0);
- op1 = popand(0);
- op1->last->l.next = op2->first;
- pushand(op1->first, op2->last);
- break;
- case STAR:
- op2 = popand('*');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(inst1, inst1);
- break;
- case PLUS:
- op2 = popand('+');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(op2->first, inst1);
- break;
- case QUEST:
- op2 = popand('?');
- inst1 = newinst(OR);
- inst2 = newinst(NOP);
- inst1->l.left = inst2;
- inst1->r.right = op2->first;
- op2->last->l.next = inst2;
- pushand(inst1, inst2);
- break;
- }
- }
-}
-
-static Reprog*
-optimize(Reprog *pp)
-{
- Reinst *inst, *target;
- int size;
- Reprog *npp;
- int diff;
-
- /*
- * get rid of NOOP chains
- */
- for(inst=pp->firstinst; inst->type!=END; inst++){
- target = inst->l.next;
- while(target->type == NOP)
- target = target->l.next;
- inst->l.next = target;
- }
-
- /*
- * The original allocation is for an area larger than
- * necessary. Reallocate to the actual space used
- * and then relocate the code.
- */
- size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
- npp = realloc(pp, size);
- if(npp==0 || npp==pp)
- return pp;
- diff = (char *)npp - (char *)pp;
- freep = (Reinst *)((char *)freep + diff);
- for(inst=npp->firstinst; inst<freep; inst++){
- switch(inst->type){
- case OR:
- case STAR:
- case PLUS:
- case QUEST:
- case CCLASS:
- case NCCLASS:
- *(char **)&inst->r.right += diff;
- break;
- }
- *(char **)&inst->l.left += diff;
- }
- *(char **)&npp->startinst += diff;
- return npp;
-}
-
-#ifdef DEBUG
-static void
-dumpstack(void){
- Node *stk;
- int *ip;
-
- print("operators\n");
- for(ip=atorstack; ip<atorp; ip++)
- print("0%o\n", *ip);
- print("operands\n");
- for(stk=andstack; stk<andp; stk++)
- print("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static void
-dump(Reprog *pp)
-{
- Reinst *l;
- wchar_t *p;
-
- l = pp->firstinst;
- do{
- print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
- l->l.left-pp->firstinst, l->l.right-pp->firstinst);
- if(l->type == RUNE)
- print("\t%C\n", l->r);
- else if(l->type == CCLASS || l->type == NCCLASS){
- print("\t[");
- if(l->type == NCCLASS)
- print("^");
- for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
- if(p[0] == p[1])
- print("%C", p[0]);
- else
- print("%C-%C", p[0], p[1]);
- print("]\n");
- } else
- print("\n");
- }while(l++->type);
-}
-#endif
-
-static Reclass*
-newclass(void)
-{
- if(nclass >= NCLASS)
- regerr2("too many character classes; limit", NCLASS+'0');
- return &(classp[nclass++]);
-}
-
-static int
-nextc(wchar_t *rp)
-{
- int n;
-
- if(lexdone){
- *rp = 0;
- return 1;
- }
- n = mbtowc(rp, exprp, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- exprp += n;
- if(*rp == L'\\'){
- n = mbtowc(rp, exprp, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- exprp += n;
- return 1;
- }
- if(*rp == 0)
- lexdone = 1;
- return 0;
-}
-
-static int
-lex(int literal, int dot_type)
-{
- int quoted;
-
- quoted = nextc(&yyrune);
- if(literal || quoted){
- if(yyrune == 0)
- return END;
- return RUNE;
- }
-
- switch(yyrune){
- case 0:
- return END;
- case L'*':
- return STAR;
- case L'?':
- return QUEST;
- case L'+':
- return PLUS;
- case L'|':
- return OR;
- case L'.':
- return dot_type;
- case L'(':
- return LBRA;
- case L')':
- return RBRA;
- case L'^':
- return BOL;
- case L'$':
- return EOL;
- case L'[':
- return bldcclass();
- }
- return RUNE;
-}
-
-static int
-bldcclass(void)
-{
- int type;
- wchar_t r[NCCRUNE];
- wchar_t *p, *ep, *np;
- wchar_t rune;
- int quoted;
-
- /* we have already seen the '[' */
- type = CCLASS;
- yyclassp = newclass();
-
- /* look ahead for negation */
- ep = r;
- quoted = nextc(&rune);
- if(!quoted && rune == L'^'){
- type = NCCLASS;
- quoted = nextc(&rune);
- *ep++ = L'\n';
- *ep++ = L'\n';
- }
-
- /* parse class into a set of spans */
- for(; ep<&r[NCCRUNE];){
- if(rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- if(!quoted && rune == L']')
- break;
- if(!quoted && rune == L'-'){
- if(ep == r){
- rcerror("malformed '[]'");
- return 0;
- }
- quoted = nextc(&rune);
- if((!quoted && rune == L']') || rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- *(ep-1) = rune;
- } else {
- *ep++ = rune;
- *ep++ = rune;
- }
- quoted = nextc(&rune);
- }
-
- /* sort on span start */
- for(p = r; p < ep; p += 2){
- for(np = p; np < ep; np += 2)
- if(*np < *p){
- rune = np[0];
- np[0] = p[0];
- p[0] = rune;
- rune = np[1];
- np[1] = p[1];
- p[1] = rune;
- }
- }
-
- /* merge spans */
- np = yyclassp->spans;
- p = r;
- if(r == ep)
- yyclassp->end = np;
- else {
- np[0] = *p++;
- np[1] = *p++;
- for(; p < ep; p += 2)
- if(p[0] <= np[1]){
- if(p[1] > np[1])
- np[1] = p[1];
- } else {
- np += 2;
- np[0] = p[0];
- np[1] = p[1];
- }
- yyclassp->end = np+2;
- }
-
- return type;
-}
-
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
-{
- int token;
- Reprog *pp;
-
- /* get memory for the program */
- pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
- if(pp == 0){
- regerror("out of memory");
- return 0;
- }
- freep = pp->firstinst;
- classp = pp->class;
- errors = 0;
-
- if(setjmp(regkaboom))
- goto out;
-
- /* go compile the sucker */
- lexdone = 0;
- exprp = s;
- nclass = 0;
- nbra = 0;
- atorp = atorstack;
- andp = andstack;
- subidp = subidstack;
- lastwasand = FALSE;
- cursubid = 0;
-
- /* Start with a low priority operator to prime parser */
- pushator(START-1);
- while((token = lex(literal, dot_type)) != END){
- if((token&0300) == OPERATOR)
- operator(token);
- else
- operand(token);
- }
-
- /* Close with a low priority operator */
- evaluntil(START);
-
- /* Force END */
- operand(END);
- evaluntil(START);
-#ifdef DEBUG
- dumpstack();
-#endif
- if(nbra)
- rcerror("unmatched left paren");
- --andp; /* points to first and only operand */
- pp->startinst = andp->first;
-#ifdef DEBUG
- dump(pp);
-#endif
- pp = optimize(pp);
-#ifdef DEBUG
- print("start: %d\n", andp->first-pp->firstinst);
- dump(pp);
-#endif
-out:
- if(errors){
- free(pp);
- pp = 0;
- }
- return pp;
-}
-
-extern Reprog*
-regcomp(char *s)
-{
- return regcomp1(s, 0, ANY);
-}
-
-extern Reprog*
-regcomplit(char *s)
-{
- return regcomp1(s, 1, ANY);
-}
-
-extern Reprog*
-regcompnl(char *s)
-{
- return regcomp1(s, 0, ANYNL);
-}
--- a/sys/src/ape/lib/regexp/regcomp.h
+++ /dev/null
@@ -1,61 +1,0 @@
-/*
- * substitution list
- */
-typedef struct Resublist Resublist;
-struct Resublist
-{
- Resub m[32];
-};
-
-/* max subexpressions per program */
-Resublist ReSuBlIsT;
-#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
-
-/* max character classes per program */
-Reprog RePrOg;
-#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass))
-
-/* max rune ranges per character class */
-#define NCCRUNE (sizeof(Reclass)/sizeof(wchar_t))
-
-/*
- * Actions and Tokens (Reinst types)
- *
- * 02xx are operators, value == precedence
- * 03xx are tokens, i.e. operands for operators
- */
-#define RUNE 0177
-#define OPERATOR 0200 /* Bitmask of all operators */
-#define START 0200 /* Start, used for marker on stack */
-#define RBRA 0201 /* Right bracket, ) */
-#define LBRA 0202 /* Left bracket, ( */
-#define OR 0203 /* Alternation, | */
-#define CAT 0204 /* Concatentation, implicit operator */
-#define STAR 0205 /* Closure, * */
-#define PLUS 0206 /* a+ == aa* */
-#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */
-#define ANY 0300 /* Any character except newline, . */
-#define ANYNL 0301 /* Any character including newline, . */
-#define NOP 0302 /* No operation, internal use only */
-#define BOL 0303 /* Beginning of line, ^ */
-#define EOL 0304 /* End of line, $ */
-#define CCLASS 0305 /* Character class, [] */
-#define NCCLASS 0306 /* Negated character class, [] */
-#define END 0377 /* Terminate: match found */
-
-/*
- * regexec execution lists
- */
-#define LISTINCREMENT 8
-typedef struct Relist Relist;
-struct Relist
-{
- Reinst *inst; /* Reinstruction of the thread */
- Resublist se; /* matched subexpressions in this thread */
-};
-extern Relist* _relist[2];
-extern Relist* _reliste[2];
-extern int _relistsize;
-
-extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
-extern void _renewmatch(Resub*, int, Resublist*);
--- a/sys/src/ape/lib/regexp/regerror.c
+++ /dev/null
@@ -1,16 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "regexp.h"
-
-void
-regerror(char *s)
-{
- char buf[132];
-
- strcpy(buf, "regerror: ");
- strcat(buf, s);
- strcat(buf, "\n");
- fwrite(buf, 1, strlen(buf), stderr);
- exit(1);
-}
--- a/sys/src/ape/lib/regexp/regexec.c
+++ /dev/null
@@ -1,191 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty; /* empty set of matches */
-
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- char *starts,
- char *eol,
- wchar_t startchar)
-{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- char *s;
- int i, checkstart;
- wchar_t r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
-
- match = 0;
- checkstart = startchar;
- sempty.m[0].s.sp = 0;
- if(mp!=0)
- for(i=0; i<ms; i++)
- mp[i].s.sp = mp[i].e.ep = 0;
- _relist[0][0].inst = _relist[1][0].inst = 0;
-
- /* Execute machine once for each character, including terminal NUL */
- s = starts;
- do{
- /* fast check for first char */
- r = *(unsigned char*)s;
- if(checkstart && r != startchar){
- s++;
- continue;
- }
-
- if(r < Runeself)
- n = 1;
- else {
- n = mbtowc(&r, s, MB_CUR_MAX);
- if (n <= 0)
- n = 1;
- }
-
- /* switch run lists */
- tl = _relist[flag];
- tle = _reliste[flag];
- nl = _relist[flag^=1];
- nle = _reliste[flag];
- nl->inst = 0;
-
- /* Add first instruction to current list */
- if(match == 0){
- sempty.m[0].s.sp = s;
- _renewthread(tl, progp->startinst, &sempty);
- }
-
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- if(s == eol)
- break;
-
- for(inst = tlp->inst; ; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.sp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.ep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.ep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- checkstart = startchar && nl->inst==0;
- s += n;
- }while(r);
- return match;
-}
-
-extern int
-regexec(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- char *starts; /* where to start match */
- char *eol; /* where to end match */
- wchar_t startchar;
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- starts = bol;
- eol = 0;
- if(mp && ms>0){
- if(mp->s.sp)
- starts = mp->s.sp;
- if(mp->e.ep)
- eol = mp->e.ep;
- }
- startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
- ? progp->startinst->r.r : 0;
-
- /* keep trying till we have enough list space to terminate */
- for(;;){
- if(_relist[0] == 0){
- _relist[0] = malloc(2*_relistsize*sizeof(Relist));
- _relist[1] = _relist[0] + _relistsize;
- _reliste[0] = _relist[0] + _relistsize - 1;
- _reliste[1] = _relist[1] + _relistsize - 1;
- if(_relist[0] == 0)
- regerror("_relist overflow");
- }
- rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
- if(rv >= 0)
- return rv;
- free(_relist[0]);
- _relist[0] = 0;
- _relistsize += LISTINCREMENT;
- }
-}
--- a/sys/src/ape/lib/regexp/regsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp, /* source string */
- char *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
-{
- char *ssp, *ep;
- int i;
-
- ep = dp+dlen-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp[i].s.sp != 0 && mp!=0 && ms>i)
- for(ssp = mp[i].s.sp;
- ssp < mp[i].e.ep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp[0].s.sp != 0 && mp!=0 && ms>0)
- if(mp[0].s.sp != 0)
- for(ssp = mp[0].s.sp;
- ssp < mp[0].e.ep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
-}
--- a/sys/src/ape/lib/regexp/rregexec.c
+++ /dev/null
@@ -1,181 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty; /* empty set of matches */
-
-/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp, /* program to run */
- wchar_t *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- wchar_t *starts,
- wchar_t *eol,
- wchar_t startchar)
-{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- wchar_t *s;
- int i, checkstart;
- wchar_t r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
-
- match = 0;
- checkstart = startchar;
- sempty.m[0].s.rsp = 0;
- if(mp!=0)
- for(i=0; i<ms; i++)
- mp[i].s.rsp = mp[i].e.rep = 0;
- _relist[0][0].inst = _relist[1][0].inst = 0;
-
- /* Execute machine once for each character, including terminal NUL */
- s = starts;
- do{
- r = *s;
-
- /* fast check for first char */
- if(checkstart && r!=startchar){
- s++;
- continue;
- }
-
- /* switch run lists */
- tl = _relist[flag];
- tle = _reliste[flag];
- nl = _relist[flag^=1];
- nle = _reliste[flag];
- nl->inst = 0;
-
- /* Add first instruction to current list */
- sempty.m[0].s.rsp = s;
- _renewthread(tl, progp->startinst, &sempty);
-
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- if(s == eol)
- break;
-
- for(inst=tlp->inst; ; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.rsp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.rep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.rep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- checkstart = startchar && nl->inst==0;
- s++;
- }while(r);
- return match;
-}
-
-extern int
-rregexec(Reprog *progp, /* program to run */
- wchar_t *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
-{
- wchar_t *starts; /* where to start match */
- wchar_t *eol; /* where to end match */
- wchar_t startchar;
- int rv;
-
- /*
- * use user-specified starting/ending location if specified
- */
- starts = bol;
- eol = 0;
- if(mp && ms>0){
- if(mp->s.rsp)
- starts = mp->s.rsp;
- if(mp->e.rep)
- eol = mp->e.rep;
- }
- startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
-
- /* keep trying till we have enough list space to terminate */
- for(;;){
- if(_relist[0] == 0){
- _relist[0] = malloc(2*_relistsize*sizeof(Relist));
- _relist[1] = _relist[0] + _relistsize;
- _reliste[0] = _relist[0] + _relistsize - 1;
- _reliste[1] = _relist[1] + _relistsize - 1;
- if(_relist[0] == 0)
- regerror("_relist overflow");
- }
- rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
- if(rv >= 0)
- return rv;
- free(_relist[0]);
- _relist[0] = 0;
- _relistsize += LISTINCREMENT;
- }
-}
--- a/sys/src/ape/lib/regexp/rregsub.c
+++ /dev/null
@@ -1,64 +1,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(wchar_t *sp, /* source string */
- wchar_t *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
-{
- wchar_t *ssp, *ep;
- int i;
-
- ep = dp+(dlen/sizeof(wchar_t))-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp[i].s.rsp != 0 && mp!=0 && ms>i)
- for(ssp = mp[i].s.rsp;
- ssp < mp[i].e.rep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp[0].s.rsp != 0 && mp!=0 && ms>0)
- if(mp[0].s.rsp != 0)
- for(ssp = mp[0].s.rsp;
- ssp < mp[0].e.rep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
-}
--- /dev/null
+++ b/sys/src/cmd/awk/popen.c
@@ -1,0 +1,91 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "awk.h"
+
+#define MAXFORKS 20
+#define NSYSFILE 3
+#define tst(a,b) (mode == OREAD? (b) : (a))
+#define RDR 0
+#define WTR 1
+
+struct a_fork {
+ short done;
+ short fd;
+ int pid;
+ char status[128];
+};
+static struct a_fork the_fork[MAXFORKS];
+
+Biobuf*
+popen(char *cmd, int mode)
+{
+ int p[2];
+ int myside, hisside, pid;
+ int i, ind;
+
+ for (ind = 0; ind < MAXFORKS; ind++)
+ if (the_fork[ind].pid == 0)
+ break;
+ if (ind == MAXFORKS)
+ return nil;
+ if(pipe(p) < 0)
+ return nil;
+ myside = tst(p[WTR], p[RDR]);
+ hisside = tst(p[RDR], p[WTR]);
+ switch (pid = fork()) {
+ case -1:
+ return nil;
+ case 0:
+ /* myside and hisside reverse roles in child */
+ close(myside);
+ dup(hisside, tst(0, 1));
+ for (i=NSYSFILE; i<FOPEN_MAX; i++)
+ close(i);
+ execl("/bin/rc", "rc", "-c", cmd, nil);
+ exits("exec failed");
+ default:
+ the_fork[ind].pid = pid;
+ the_fork[ind].fd = myside;
+ the_fork[ind].done = 0;
+ close(hisside);
+ return(Bfdopen(myside, mode));
+ }
+}
+
+int
+pclose(Biobuf *ptr)
+{
+ int f, r, ind;
+ Waitmsg *status;
+
+ f = Bfildes(ptr);
+ Bterm(ptr);
+ for (ind = 0; ind < MAXFORKS; ind++)
+ if (the_fork[ind].fd == f && the_fork[ind].pid != 0)
+ break;
+ if (ind == MAXFORKS)
+ return -1;
+ if (!the_fork[ind].done) {
+ do {
+ if((status = wait()) == nil)
+ r = -1;
+ else
+ r = status->pid;
+ for (f = 0; f < MAXFORKS; f++) {
+ if (r == the_fork[f].pid) {
+ the_fork[f].done = 1;
+ strecpy(the_fork[f].status, the_fork[f].status+512, status->msg);
+ break;
+ }
+ }
+ free(status);
+ } while(r != the_fork[ind].pid && r != -1);
+ if(r == -1)
+ strcpy(the_fork[ind].status, "No loved ones to wait for");
+ }
+ the_fork[ind].pid = 0;
+ if(the_fork[ind].status[0] != '\0')
+ return 1;
+ return 0;
+}
--- a/sys/src/libregexp/regcomp.c
+++ b/sys/src/libregexp/regcomp.c
@@ -184,12 +184,10 @@
return nil;
}
+ maxthr = regstrlen;
parsetr = node(&plex, TSUB, e0(&plex), nil);
- maxthr = maxthreads(parsetr);
- if(maxthr == -1)
- maxthr = regstrlen;
- prtree(parsetr, 0, 1);
+// prtree(parsetr, 0, 1);
reprog = malloc(sizeof(Reprog) +
sizeof(Reinst) * plex.instrs +
sizeof(Rethread) * maxthr +