ref: 13efe91101a11f41caf6321a8b2fbdd96ef9927a
dir: /parser.c/
#include <u.h> #include <libc.h> #include <bio.h> #include "dat.h" #include "fns.h" typedef struct Token Token; typedef struct OpInfo OpInfo; struct Token { int tag; Rune *text; double dval; vlong ival; }; struct OpInfo { int level; int type; }; enum { AtomTok = 1<<0, /* 1 */ FunctorTok = 1<<1, /* 2 */ VarTok = 1<<2, /* 4 */ StringTok = 1<<3, /* 8 */ IntTok = 1<<4, /* 16 */ FloatTok = 1<<5, /* 32 */ ParenLeftTok = 1<<6, /* 64 */ ParenRightTok = 1<<7, /* 128 */ SquareBracketLeftTok = 1<<8, /* 256 */ SquareBracketRightTok = 1<<9, /* 512 */ CurlyBracketLeftTok = 1<<10, /* 1024 */ CurlyBracketRightTok = 1<<11, /* 2048 */ CommaTok = 1<<12, /* 4096 */ PipeTok = 1<<13, /* 8192 */ EofTok = 1<<14, /* 16384 */ }; static Biobuf *parsein; static Token lookahead; static Module *currentmod; static VarName *varnames; void nexttoken(void); Term *fullterm(int, Rune *, Term *); Term *term(void); Term *listterm(int); Term *curlybracketterm(void); Term *compound(void); Term *parsevar(void); Term *parseoperators(Term *); void match(int); void syntaxerror_parser(char *); Term *parseterm(void); Term * parse(Biobuf *bio, Module *mod, VarName **vns) { parsein = bio; currentmod = mod; varnames = nil; nexttoken(); Term *result = parseterm(); *vns = varnames; return result; } Term * parseterm(void) { if(lookahead.tag == EofTok) return nil; Term *t = fullterm(AtomTok, L".", nil); if(lookahead.tag != AtomTok || runestrcmp(lookahead.text, L".") != 0) syntaxerror_parser("parseterm"); return t; } Term * fullterm(int stoptag, Rune *stoptext, Term *list) { if((lookahead.tag & stoptag) && (stoptext == nil || runestrcmp(stoptext, lookahead.text) == 0)) return parseoperators(list); Term *t = term(); list = appendterm(list, t); return fullterm(stoptag, stoptext, list); } Term * term(void) { Term *result; switch(lookahead.tag){ case AtomTok: result = mkatom(lookahead.text); match(AtomTok); break; case FunctorTok: result = compound(); break; case VarTok: result = parsevar(); break; case IntTok: result = mkinteger(lookahead.ival); match(IntTok); break; case FloatTok: result = mkfloat(lookahead.dval); match(FloatTok); break; case CommaTok: result = mkatom(L","); match(CommaTok); break; case SquareBracketLeftTok: result = listterm(1); break; case CurlyBracketLeftTok: result = curlybracketterm(); break; case ParenLeftTok: match(ParenLeftTok); result = fullterm(ParenRightTok, nil, nil); result->inparens = 1; match(ParenRightTok); break; case StringTok: result = mkstring(lookahead.text); match(StringTok); break; default: print("Can't parse term of token type %d\n", lookahead.tag); syntaxerror_parser("term"); result = nil; } return result; } Term * listterm(int start) { if(start) match(SquareBracketLeftTok); if(lookahead.tag == SquareBracketRightTok){ match(SquareBracketRightTok); return mkatom(L"[]"); }else if(lookahead.tag == PipeTok){ match(PipeTok); Term *t = fullterm(SquareBracketRightTok, nil, nil); match(SquareBracketRightTok); return t; }else{ Term *t = fullterm(CommaTok|PipeTok|SquareBracketRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); t->next = listterm(0); return mkcompound(L".", 2, t); } } Term * curlybracketterm(void) { match(CurlyBracketLeftTok); Term *result = fullterm(CurlyBracketRightTok, nil, nil); match(CurlyBracketRightTok); return mkcompound(L"{}", 1, result); } Term * compound(void) { Rune *name = lookahead.text; Term *args = nil; int arity = 0; match(FunctorTok); match(ParenLeftTok); while(lookahead.tag != ParenRightTok){ Term *t = fullterm(CommaTok|ParenRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); args = appendterm(args, t); arity++; } match(ParenRightTok); return mkcompound(name, arity, args); } Term * parsevar(void) { Rune *name = lookahead.text; match(VarTok); VarName *vn; uvlong i = 0; for(vn = varnames; vn != nil; vn = vn->next, i++) if(runestrcmp(vn->name, name) == 0 && !runestrcmp(vn->name, L"_") == 0){ vn->count++; return copyterm(vn->var); } VarName *new = gmalloc(sizeof(VarName)); new->name = name; new->var = mkvariable(); new->count = 1; new->next = varnames; varnames = new; return new->var; } Term * parseoperators(Term *list) { int i; int length = termslength(list); Term *t; Term **terms = gmalloc(sizeof(Term *) * length); OpInfo *infos = gmalloc(sizeof(OpInfo) * length); for(i = 0, t = list; i < length; i++){ Operator *op; if(t->tag == AtomTerm && !t->inparens && (op = getoperator(t->text, currentmod))){ infos[i].type = op->type; infos[i].level = op->level; }else{ infos[i].type = 0; infos[i].level = 0; } terms[i] = t; Term *tmp = t; t = t->next; tmp->next = nil; } while(length > 1){ int index = -1; int lowest = PrecedenceLevels + 1; for(i = 0; i < length; i++){ if(infos[i].type == 0) continue; if(infos[i].level == lowest && infos[i].type == Xfy) index = i; else if(infos[i].level < lowest){ index = i; lowest = infos[i].level; } } if(index == -1){ print("Can't parse, list of length %d contains no operators: ", length); for(i = 0; i < length; i++) print("%S(%d) ", prettyprint(terms[i], 0, 1, 0, currentmod), infos[i].level); print("\n"); syntaxerror_parser("parseoperators"); } int infixlevel = infos[index].type & (Xfx|Xfy|Yfx); int prefixlevel = infos[index].type & (Fx|Fy); int postfixlevel = infos[index].type & (Xf|Yf); if(infixlevel && index != 0 && index != length-1 && infos[index-1].type == 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; Term *args = terms[index-1]; args->next = terms[index+1]; terms[index-1] = mkcompound(terms[index]->text, 2, args); length -= 2; for(i = index; i < length; i++){ infos[i].level = infos[i+2].level; infos[i].type = infos[i+2].type; terms[i] = terms[i+2]; } }else if(prefixlevel && index != length-1 && infos[index+1].type == 0){ infos[index].type = 0; terms[index] = mkcompound(terms[index]->text, 1, terms[index+1]); length--; for(i = index+1; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else if(postfixlevel && index != 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; terms[index-1] = mkcompound(terms[index]->text, 1, terms[index-1]); length--; for(i = index; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else{ print("Parse error when parsing operator %S (prefix=%d, postfix=%d, infix=%d level=%d)\n", prettyprint(terms[index], 0, 0, 0, currentmod), prefixlevel, postfixlevel, infixlevel, infos[index].level); syntaxerror_parser("parseoperators"); } } Term *result = terms[0]; return result; } void nexttoken(void) { Rune buf[1024]; Rune peek; int i = 0; double numD; vlong numI = 0; int negative = 0; static long replaypeek = -1; if(replaypeek == -1) peek = Bgetrune(parsein); else{ peek = replaypeek; replaypeek = -1; } SkipWhite: /* Skip whitespace */ while(isspacerune(peek)) peek = Bgetrune(parsein); /* Skip single line comments */ if(peek == L'%'){ while(peek != L'\n') peek = Bgetrune(parsein); goto SkipWhite; } /* Variables */ if(peek == L'_' || isupperrune(peek)){ while(peek == L'_' || isalpharune(peek) || isdigitrune(peek)){ /* FIXME Should handle input length, also elsewhere in this function */ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = VarTok; lookahead.text = runestrdup(buf); return; } /* Strings */ if(peek == L'"'){ peek = Bgetrune(parsein); while(peek != L'"'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; lookahead.tag = StringTok; lookahead.text = runestrdup(buf); return; } /* Numbers */ if(peek == L'-'){ peek = Bgetrune(parsein); if(isdigitrune(peek)) negative = 1; else{ Bungetrune(parsein); peek = L'-'; } } if(isdigitrune(peek)){ if(peek == L'0'){ peek = Bgetrune(parsein); switch(peek){ case L'\'': /* Character code */ peek = Bgetrune(parsein); lookahead.tag = IntTok; lookahead.ival = peek; return; case L'o': /* Octal */ case L'x': /* Hexadecimal */ case L'b': /* Binary */ print("Cant lex numbers in other bases than base 10 right now\n"); exits("lexer"); } } while(isdigitrune(peek)){ numI *= 10; numI += peek - L'0'; peek = Bgetrune(parsein); } if(peek == L'.'){ int place = 1; numD = numI; peek = Bgetrune(parsein); if(!isdigitrune(peek)){ Bungetrune(parsein); replaypeek = L'.'; goto Integer; } while(isdigitrune(peek)){ double addition = (peek - L'0') / (double)(10 * place); numD += addition; peek = Bgetrune(parsein); place *= 10; } Bungetrune(parsein); /* Should also lex 123.45E10 */ lookahead.tag = FloatTok; lookahead.dval = negative ? -numD : numD; }else{ Bungetrune(parsein); Integer: lookahead.tag = IntTok; lookahead.ival = negative ? -numI : numI; } return; } /* Atoms */ /* Atoms in single quotes */ if(peek == L'\''){ peek = Bgetrune(parsein); while(peek != L'\''){ QuotedAtomLoop: buf[i++] = peek; peek = Bgetrune(parsein); } peek = Bgetrune(parsein); if(peek == L'\'') goto QuotedAtomLoop; else Bungetrune(parsein); buf[i] = '\0'; lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Special atom [] */ if(peek == L'['){ peek = Bgetrune(parsein); if(peek == L']'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"[]"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = SquareBracketLeftTok; return; } } /* Special atom {} */ if(peek == L'{'){ peek = Bgetrune(parsein); if(peek == L'}'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"{}"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = CurlyBracketLeftTok; return; } } /* Graphic atom */ Rune *graphics = L"#$&*+-./:<=>?@^~\\"; /* keep in sync with prettyprint*/ if(runestrchr(graphics, peek)){ while(runestrchr(graphics, peek)){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* simple atom */ if(islowerrune(peek)){ while(isalpharune(peek) || isdigitrune(peek) || peek == L'_'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Other */ if(runestrchr(L",.()]}|!;", peek)){ switch(peek){ case L',': lookahead.tag = CommaTok; break; case L'(': lookahead.tag = ParenLeftTok; break; case L')': lookahead.tag = ParenRightTok; break; case L']': lookahead.tag = SquareBracketRightTok; break; case L'}': lookahead.tag = CurlyBracketRightTok; break; case L'|': lookahead.tag = PipeTok; break; case L'!': lookahead.tag = AtomTok; lookahead.text = runestrdup(L"!"); break; case L';': lookahead.tag = AtomTok; lookahead.text = runestrdup(L";"); break; } return; } /* EOF */ if(peek == Beof){ lookahead.tag = EofTok; return; } print("Could not lex rune %C (%ud)\n", peek, peek); exits("lexer"); Functor: if(peek == Beof){ Bgetrune(parsein); return; } peek = Bgetrune(parsein); if(peek == L'(') lookahead.tag = FunctorTok; Bungetrune(parsein); return; } void match(int tag) { if(lookahead.tag == tag) nexttoken(); else syntaxerror_parser("match"); } void syntaxerror_parser(char *where) { print("Syntax error: Unexpected %d (%S) token in %s\n", lookahead.tag, lookahead.text, where); exits("syntax error"); }