ref: 81fa4d4a176b5e330ffe20b2ed9f92b10ffc5ba9
dir: /parser.c/
#include <u.h> #include <libc.h> #include <bio.h> #include "dat.h" #include "fns.h" #define PrecedenceLevels 1200 typedef struct Token Token; typedef struct Operator Operator; typedef struct OpInfo OpInfo; struct Token { int tag; Rune *text; double dval; vlong ival; }; struct Operator { int type; int level; Rune *spelling; Operator *next; }; struct OpInfo { int level; int type; }; enum { Xf = 1<<0, /* 1 */ Yf = 1<<1, /* 2 */ Xfx = 1<<2, /* 4 */ Xfy = 1<<3, /* 8 */ Yfx = 1<<4, /* 16 */ Fy = 1<<5, /* 32 */ Fx = 1<<6, /* 64 */ }; enum { AtomTok = 1<<0, /* 1 */ FunctorTok = 1<<1, /* 2 */ VarTok = 1<<2, /* 4 */ StringTok = 1<<3, /* 8 */ IntTok = 1<<4, /* 16 */ FloatTok = 1<<5, /* 32 */ ParenLeftTok = 1<<6, /* 64 */ ParenRightTok = 1<<7, /* 128 */ SquareBracketLeftTok = 1<<8, /* 256 */ SquareBracketRightTok = 1<<9, /* 512 */ CurlyBracketLeftTok = 1<<10, /* 1024 */ CurlyBracketRightTok = 1<<11, /* 2048 */ CommaTok = 1<<12, /* 4096 */ PipeTok = 1<<13, /* 8192 */ EofTok = 1<<14, /* 16384 */ }; static Biobuf *parsein; static Token lookahead; static Operator *operators[PrecedenceLevels]; void initoperators(void); void addoperator(int, int, Rune *); Operator *getoperator(Rune *); void nexttoken(void); Term *fullterm(int, Rune *, Term *); Term *term(void); Term *listterm(int); Term *curlybracketterm(void); Term *compound(void); Term *parseoperators(Term *); void match(int); void syntaxerror_parser(char *); Term *prologtext(int); Term * parse(int fd, Biobuf *bio, int querymode) { if(bio == nil){ fd = dup(fd, -1); parsein = Bfdopen(fd, OREAD); if(parsein == nil){ print("Could not open file\n"); return nil; } }else parsein = bio; initoperators(); nexttoken(); Term *result = prologtext(querymode); if(querymode){ uvlong id = 1; result = copyterm(result, &id); } if(!bio) Bterm(parsein); return result; } Term * prologtext(int querymode) { if(lookahead.tag == EofTok) return nil; Term *t = fullterm(AtomTok, L".", nil); if(lookahead.tag == AtomTok && runestrcmp(lookahead.text, L".") == 0){ if(!querymode) match(AtomTok); }else syntaxerror_parser("prologtext"); if(querymode) return t; if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 1){ Term *body = t->children; print("Got directive: %S\n", prettyprint(body, 0, 0, 0)); if(runestrcmp(body->text, L"module") == 0 && body->arity == 2) t->next = prologtext(querymode); else t = prologtext(querymode); }else if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 2){ t->next = prologtext(querymode); }else if(t->tag == AtomTerm || t->tag == CompoundTerm){ t->next = prologtext(querymode); }else{ print("Expected directive or clause as toplevel\n"); syntaxerror_parser("prologtext"); } return t; } Term * fullterm(int stoptag, Rune *stoptext, Term *list) { if((lookahead.tag & stoptag) && (stoptext == nil || runestrcmp(stoptext, lookahead.text) == 0)) return parseoperators(list); Term *t = term(); list = appendterm(list, t); return fullterm(stoptag, stoptext, list); } Term * term(void) { Term *result; switch(lookahead.tag){ case AtomTok: result = mkatom(lookahead.text); match(AtomTok); break; case FunctorTok: result = compound(); break; case VarTok: result = mkvariable(lookahead.text); match(VarTok); break; case IntTok: result = mkinteger(lookahead.ival); match(IntTok); break; case FloatTok: result = mkfloat(lookahead.dval); match(FloatTok); break; case CommaTok: result = mkatom(L","); match(CommaTok); break; case SquareBracketLeftTok: result = listterm(1); break; case CurlyBracketLeftTok: result = curlybracketterm(); break; case ParenLeftTok: match(ParenLeftTok); result = fullterm(ParenRightTok, nil, nil); match(ParenRightTok); break; case StringTok: result = mkstring(lookahead.text); match(StringTok); break; default: print("Can't parse term of token type %d\n", lookahead.tag); syntaxerror_parser("term"); result = nil; } return result; } Term * listterm(int start) { if(start) match(SquareBracketLeftTok); if(lookahead.tag == SquareBracketRightTok){ match(SquareBracketRightTok); return mkatom(L"[]"); }else if(lookahead.tag == PipeTok){ match(PipeTok); Term *t = fullterm(SquareBracketRightTok, nil, nil); match(SquareBracketRightTok); return t; }else{ Term *t = fullterm(CommaTok|PipeTok|SquareBracketRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); t->next = listterm(0); return mkcompound(L".", 2, t); } } Term * curlybracketterm(void) { match(CurlyBracketLeftTok); Term *result = fullterm(CurlyBracketRightTok, nil, nil); match(CurlyBracketRightTok); return mkcompound(L"{}", 1, result); } Term * compound(void) { Rune *name = lookahead.text; Term *args = nil; int arity = 0; match(FunctorTok); match(ParenLeftTok); while(lookahead.tag != ParenRightTok){ Term *t = fullterm(CommaTok|ParenRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); args = appendterm(args, t); arity++; } match(ParenRightTok); return mkcompound(name, arity, args); } Term * parseoperators(Term *list) { int i; int length = termslength(list); Term *t; Term **terms = gmalloc(sizeof(Term *) * length); OpInfo *infos = gmalloc(sizeof(OpInfo) * length); for(i = 0, t = list; i < length; i++){ Operator *op = getoperator(t->text); if(op && t->tag == AtomTerm){ infos[i].type = op->type; infos[i].level = op->level; }else{ infos[i].type = 0; infos[i].level = 0; } terms[i] = t; Term *tmp = t; t = t->next; tmp->next = nil; } while(length > 1){ int index = -1; int lowest = PrecedenceLevels + 1; for(i = 0; i < length; i++){ if(infos[i].type == 0) continue; if(infos[i].level == lowest && infos[i].type == Xfy) index = i; else if(infos[i].level < lowest){ index = i; lowest = infos[i].level; } } if(index == -1){ print("Can't parse, list of length %d contains no operators: ", length); for(i = 0; i < length; i++) print("%S(%d) ", prettyprint(terms[i], 0, 0, 0), infos[i].level); print("\n"); syntaxerror_parser("parseoperators"); } int infixlevel = infos[index].type & (Xfx|Xfy|Yfx); int prefixlevel = infos[index].type & (Fx|Fy); int postfixlevel = infos[index].type & (Xf|Yf); if(infixlevel && index != 0 && index != length-1 && infos[index-1].type == 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; Term *args = terms[index-1]; args->next = terms[index+1]; terms[index-1] = mkcompound(terms[index]->text, 2, args); length -= 2; for(i = index; i < length; i++){ infos[i].level = infos[i+2].level; infos[i].type = infos[i+2].type; terms[i] = terms[i+2]; } }else if(prefixlevel && index != length-1 && infos[index+1].type == 0){ infos[index].type = 0; terms[index] = mkcompound(terms[index]->text, 1, terms[index+1]); length--; for(i = index+1; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else if(postfixlevel && index != 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; terms[index-1] = mkcompound(terms[index]->text, 1, terms[index-1]); length--; for(i = index; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else{ print("Parse error when parsing operator %S (prefix=%d, postfix=%d, infix=%d level=%d)\n", prettyprint(terms[index], 0, 0, 0), prefixlevel, postfixlevel, infixlevel, infos[index].level); syntaxerror_parser("parseoperators"); } } Term *result = terms[0]; return result; } void initoperators(void) { Operator *op; int i; for(i = 0; i < PrecedenceLevels; i++){ while(operators[i]){ op = operators[i]; operators[i] = op->next; free(op); } } addoperator(1200, Xfx, L":-"); addoperator(1200, Xfx, L"-->"); addoperator(1200, Fx, L":-"); addoperator(1200, Fx, L"?-"); addoperator(1100, Xfy, L";"); addoperator(1050, Xfy, L"->"); addoperator(1000, Xfy, L","); addoperator(900, Fy, L"\\+"); addoperator(700, Xfx, L"="); addoperator(700, Xfx, L"\\="); addoperator(700, Xfx, L"=="); addoperator(700, Xfx, L"\\=="); addoperator(700, Xfx, L"@<"); addoperator(700, Xfx, L"@=<"); addoperator(700, Xfx, L"@>"); addoperator(700, Xfx, L"@>="); addoperator(700, Xfx, L"is"); addoperator(700, Xfx, L"=:="); addoperator(700, Xfx, L"=\\="); addoperator(700, Xfx, L"<"); addoperator(700, Xfx, L"=<"); addoperator(700, Xfx, L">"); addoperator(700, Xfx, L">="); addoperator(700, Xfx, L"=.."); addoperator(600, Xfy, L":"); addoperator(500, Yfx, L"+"); addoperator(500, Yfx, L"-"); addoperator(500, Yfx, L"/\\"); addoperator(500, Yfx, L"\\/"); addoperator(400, Yfx, L"*"); addoperator(400, Yfx, L"/"); addoperator(400, Yfx, L"//"); addoperator(400, Yfx, L"rem"); addoperator(400, Yfx, L"mod"); addoperator(400, Yfx, L"<<"); addoperator(400, Yfx, L">>"); addoperator(200, Xfx, L"**"); addoperator(200, Xfy, L"^"); addoperator(200, Fy, L"-"); addoperator(200, Fy, L"\\"); } void addoperator(int level, int type, Rune *spelling) { /* the operator table is never garbage collected, so just use normal malloc */ Operator *op = malloc(sizeof(Operator)); op->type = type; op->level = level; op->spelling = spelling; op->next = operators[level-1]; operators[level-1] = op; } Operator * getoperator(Rune *spelling) { Operator *op = nil; int level; if(spelling == nil) return nil; for(level = 0; level < PrecedenceLevels && op == nil; level++){ Operator *tmp; for(tmp = operators[level]; tmp != nil; tmp = tmp->next){ if(runestrcmp(tmp->spelling, spelling) == 0){ if(op == nil){ op = gmalloc(sizeof(Operator)); memcpy(op, tmp, sizeof(Operator)); }else op->type |= tmp->type; } } } return op; } void nexttoken(void) { Rune buf[1024]; Rune peek; int i = 0; double numD; vlong numI = 0; int negative = 0; static long replaypeek = -1; if(replaypeek == -1) peek = Bgetrune(parsein); else{ peek = replaypeek; replaypeek = -1; } SkipWhite: /* Skip whitespace */ while(isspacerune(peek)) peek = Bgetrune(parsein); /* Skip single line comments */ if(peek == L'%'){ while(peek != L'\n') peek = Bgetrune(parsein); goto SkipWhite; } /* Variables */ if(peek == L'_' || isupperrune(peek)){ while(peek == L'_' || isalpharune(peek) || isdigitrune(peek)){ /* FIXME Should handle input length, also elsewhere in this function */ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = VarTok; lookahead.text = runestrdup(buf); return; } /* Strings */ if(peek == L'"'){ peek = Bgetrune(parsein); while(peek != L'"'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; lookahead.tag = StringTok; lookahead.text = runestrdup(buf); return; } /* Numbers */ if(peek == L'-'){ peek = Bgetrune(parsein); if(isdigitrune(peek)) negative = 1; else{ Bungetrune(parsein); peek = L'-'; } } if(isdigitrune(peek)){ if(peek == L'0'){ peek = Bgetrune(parsein); switch(peek){ case L'o': /* Octal */ case L'x': /* Hexadecimal */ case L'b': /* Binary */ print("Cant lex numbers in other bases than base 10 right now\n"); exits("lexer"); } } while(isdigitrune(peek)){ numI *= 10; numI += peek - L'0'; peek = Bgetrune(parsein); } if(peek == L'.'){ int place = 1; numD = numI; peek = Bgetrune(parsein); if(!isdigitrune(peek)){ Bungetrune(parsein); replaypeek = L'.'; goto Integer; } while(isdigitrune(peek)){ double addition = (peek - L'0') / (double)(10 * place); numD += addition; peek = Bgetrune(parsein); place *= 10; } Bungetrune(parsein); /* Should also lex 123.45E10 */ lookahead.tag = FloatTok; lookahead.dval = negative ? -numD : numD; }else{ Bungetrune(parsein); Integer: lookahead.tag = IntTok; lookahead.ival = negative ? -numI : numI; } return; } /* Atoms */ /* Atoms in single quotes */ if(peek == L'\''){ peek = Bgetrune(parsein); while(peek != L'\''){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Special atom [] */ if(peek == L'['){ peek = Bgetrune(parsein); if(peek == L']'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"[]"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = SquareBracketLeftTok; return; } } /* Special atom {} */ if(peek == L'{'){ peek = Bgetrune(parsein); if(peek == L'}'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"{}"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = CurlyBracketLeftTok; return; } } /* Graphic atom */ Rune *graphics = L"#$&*+-./:<=>?@^~\\"; if(runestrchr(graphics, peek)){ while(runestrchr(graphics, peek)){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* simple atom */ if(islowerrune(peek)){ while(isalpharune(peek) || isdigitrune(peek) || peek == L'_'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Other */ if(runestrchr(L",.()]}|!;", peek)){ switch(peek){ case L',': lookahead.tag = CommaTok; break; case L'(': lookahead.tag = ParenLeftTok; break; case L')': lookahead.tag = ParenRightTok; break; case L']': lookahead.tag = SquareBracketRightTok; break; case L'}': lookahead.tag = CurlyBracketRightTok; break; case L'|': lookahead.tag = PipeTok; break; case L'!': lookahead.tag = AtomTok; lookahead.text = runestrdup(L"!"); break; case L';': lookahead.tag = AtomTok; lookahead.text = runestrdup(L";"); break; } return; } /* EOF */ if(peek == Beof){ lookahead.tag = EofTok; return; } print("Could not lex rune %C (%ud)\n", peek, peek); exits("lexer"); Functor: if(peek == Beof){ Bgetrune(parsein); return; } peek = Bgetrune(parsein); if(peek == L'(') lookahead.tag = FunctorTok; Bungetrune(parsein); return; } void match(int tag) { if(lookahead.tag == tag) nexttoken(); else syntaxerror_parser("match"); } void syntaxerror_parser(char *where) { print("Syntax error: Unexpected %d (%S) token in %s\n", lookahead.tag, lookahead.text, where); exits("syntax error"); }