ref: 0f347162b74d945f509955b6c57e506ab800db7b
dir: /parser.c/
#include <u.h> #include <libc.h> #include <bio.h> #include "dat.h" #include "fns.h" typedef struct Token Token; typedef struct OpInfo OpInfo; struct Token { int tag; Rune *text; double dval; vlong ival; }; struct OpInfo { int level; int type; }; enum { AtomTok = 1<<0, /* 1 */ FunctorTok = 1<<1, /* 2 */ VarTok = 1<<2, /* 4 */ StringTok = 1<<3, /* 8 */ IntTok = 1<<4, /* 16 */ FloatTok = 1<<5, /* 32 */ ParenLeftTok = 1<<6, /* 64 */ ParenRightTok = 1<<7, /* 128 */ SquareBracketLeftTok = 1<<8, /* 256 */ SquareBracketRightTok = 1<<9, /* 512 */ CurlyBracketLeftTok = 1<<10, /* 1024 */ CurlyBracketRightTok = 1<<11, /* 2048 */ CommaTok = 1<<12, /* 4096 */ PipeTok = 1<<13, /* 8192 */ EofTok = 1<<14, /* 16384 */ }; static Biobuf *parsein; static Token lookahead; static Module *currentmod; void nexttoken(void); Term *fullterm(int, Rune *, Term *); Term *term(void); Term *listterm(int); Term *curlybracketterm(void); Term *compound(void); Term *parseoperators(Term *); void match(int); void syntaxerror_parser(char *); Term *prologtext(int); void handlemoduledirective(Term *); void handleopdirective(Term *); Term * parse(int fd, Biobuf *bio, int querymode) { if(bio == nil){ fd = dup(fd, -1); parsein = Bfdopen(fd, OREAD); if(parsein == nil){ print("Could not open file\n"); return nil; } }else parsein = bio; nexttoken(); currentmod = usermodule; Term *result = prologtext(querymode); if(querymode && result){ result = copyterm(result, &clausenr); clausenr++; } if(!bio) Bterm(parsein); return result; } Term * prologtext(int querymode) { if(lookahead.tag == EofTok) return nil; Term *t = fullterm(AtomTok, L".", nil); if(lookahead.tag == AtomTok && runestrcmp(lookahead.text, L".") == 0){ if(!querymode) match(AtomTok); }else syntaxerror_parser("prologtext"); if(querymode) return t; if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 1){ Term *body = t->children; if(runestrcmp(body->text, L"module") == 0 && body->arity == 2){ handlemoduledirective(body->children); t->next = prologtext(querymode); return t; } else if(runestrcmp(body->text, L"op") == 0 && body->arity == 3) handleopdirective(body->children); t = prologtext(querymode); }else if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 2){ t->next = prologtext(querymode); }else if(t->tag == AtomTerm || t->tag == CompoundTerm){ t->next = prologtext(querymode); }else{ print("Expected directive or clause as toplevel\n"); syntaxerror_parser("prologtext"); } return t; } Term * fullterm(int stoptag, Rune *stoptext, Term *list) { if((lookahead.tag & stoptag) && (stoptext == nil || runestrcmp(stoptext, lookahead.text) == 0)) return parseoperators(list); Term *t = term(); list = appendterm(list, t); return fullterm(stoptag, stoptext, list); } Term * term(void) { Term *result; switch(lookahead.tag){ case AtomTok: result = mkatom(lookahead.text); match(AtomTok); break; case FunctorTok: result = compound(); break; case VarTok: result = mkvariable(lookahead.text); match(VarTok); break; case IntTok: result = mkinteger(lookahead.ival); match(IntTok); break; case FloatTok: result = mkfloat(lookahead.dval); match(FloatTok); break; case CommaTok: result = mkatom(L","); match(CommaTok); break; case SquareBracketLeftTok: result = listterm(1); break; case CurlyBracketLeftTok: result = curlybracketterm(); break; case ParenLeftTok: match(ParenLeftTok); result = fullterm(ParenRightTok, nil, nil); result->inparens = 1; match(ParenRightTok); break; case StringTok: result = mkstring(lookahead.text); match(StringTok); break; default: print("Can't parse term of token type %d\n", lookahead.tag); syntaxerror_parser("term"); result = nil; } return result; } Term * listterm(int start) { if(start) match(SquareBracketLeftTok); if(lookahead.tag == SquareBracketRightTok){ match(SquareBracketRightTok); return mkatom(L"[]"); }else if(lookahead.tag == PipeTok){ match(PipeTok); Term *t = fullterm(SquareBracketRightTok, nil, nil); match(SquareBracketRightTok); return t; }else{ Term *t = fullterm(CommaTok|PipeTok|SquareBracketRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); t->next = listterm(0); return mkcompound(L".", 2, t); } } Term * curlybracketterm(void) { match(CurlyBracketLeftTok); Term *result = fullterm(CurlyBracketRightTok, nil, nil); match(CurlyBracketRightTok); return mkcompound(L"{}", 1, result); } Term * compound(void) { Rune *name = lookahead.text; Term *args = nil; int arity = 0; match(FunctorTok); match(ParenLeftTok); while(lookahead.tag != ParenRightTok){ Term *t = fullterm(CommaTok|ParenRightTok, nil, nil); if(lookahead.tag == CommaTok) match(CommaTok); args = appendterm(args, t); arity++; } match(ParenRightTok); return mkcompound(name, arity, args); } Term * parseoperators(Term *list) { int i; int length = termslength(list); Term *t; Term **terms = gmalloc(sizeof(Term *) * length); OpInfo *infos = gmalloc(sizeof(OpInfo) * length); for(i = 0, t = list; i < length; i++){ Operator *op = getoperator(t->text, currentmod); if(op && t->tag == AtomTerm && !t->inparens){ infos[i].type = op->type; infos[i].level = op->level; }else{ infos[i].type = 0; infos[i].level = 0; } terms[i] = t; Term *tmp = t; t = t->next; tmp->next = nil; } while(length > 1){ int index = -1; int lowest = PrecedenceLevels + 1; for(i = 0; i < length; i++){ if(infos[i].type == 0) continue; if(infos[i].level == lowest && infos[i].type == Xfy) index = i; else if(infos[i].level < lowest){ index = i; lowest = infos[i].level; } } if(index == -1){ print("Can't parse, list of length %d contains no operators: ", length); for(i = 0; i < length; i++) print("%S(%d) ", prettyprint(terms[i], 0, 1, 0, currentmod), infos[i].level); print("\n"); syntaxerror_parser("parseoperators"); } int infixlevel = infos[index].type & (Xfx|Xfy|Yfx); int prefixlevel = infos[index].type & (Fx|Fy); int postfixlevel = infos[index].type & (Xf|Yf); if(infixlevel && index != 0 && index != length-1 && infos[index-1].type == 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; Term *args = terms[index-1]; args->next = terms[index+1]; terms[index-1] = mkcompound(terms[index]->text, 2, args); length -= 2; for(i = index; i < length; i++){ infos[i].level = infos[i+2].level; infos[i].type = infos[i+2].type; terms[i] = terms[i+2]; } }else if(prefixlevel && index != length-1 && infos[index+1].type == 0){ infos[index].type = 0; terms[index] = mkcompound(terms[index]->text, 1, terms[index+1]); length--; for(i = index+1; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else if(postfixlevel && index != 0 && infos[index-1].type == 0){ infos[index-1].type = 0; infos[index-1].level = infos[index].level; terms[index-1] = mkcompound(terms[index]->text, 1, terms[index-1]); length--; for(i = index; i < length; i++){ infos[i].level = infos[i+1].level; infos[i].type = infos[i+1].type; terms[i] = terms[i+1]; } }else{ print("Parse error when parsing operator %S (prefix=%d, postfix=%d, infix=%d level=%d)\n", prettyprint(terms[index], 0, 0, 0, currentmod), prefixlevel, postfixlevel, infixlevel, infos[index].level); syntaxerror_parser("parseoperators"); } } Term *result = terms[0]; return result; } void nexttoken(void) { Rune buf[1024]; Rune peek; int i = 0; double numD; vlong numI = 0; int negative = 0; static long replaypeek = -1; if(replaypeek == -1) peek = Bgetrune(parsein); else{ peek = replaypeek; replaypeek = -1; } SkipWhite: /* Skip whitespace */ while(isspacerune(peek)) peek = Bgetrune(parsein); /* Skip single line comments */ if(peek == L'%'){ while(peek != L'\n') peek = Bgetrune(parsein); goto SkipWhite; } /* Variables */ if(peek == L'_' || isupperrune(peek)){ while(peek == L'_' || isalpharune(peek) || isdigitrune(peek)){ /* FIXME Should handle input length, also elsewhere in this function */ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = VarTok; lookahead.text = runestrdup(buf); return; } /* Strings */ if(peek == L'"'){ peek = Bgetrune(parsein); while(peek != L'"'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; lookahead.tag = StringTok; lookahead.text = runestrdup(buf); return; } /* Numbers */ if(peek == L'-'){ peek = Bgetrune(parsein); if(isdigitrune(peek)) negative = 1; else{ Bungetrune(parsein); peek = L'-'; } } if(isdigitrune(peek)){ if(peek == L'0'){ peek = Bgetrune(parsein); switch(peek){ case L'\'': /* Character code */ peek = Bgetrune(parsein); lookahead.tag = IntTok; lookahead.ival = peek; return; case L'o': /* Octal */ case L'x': /* Hexadecimal */ case L'b': /* Binary */ print("Cant lex numbers in other bases than base 10 right now\n"); exits("lexer"); } } while(isdigitrune(peek)){ numI *= 10; numI += peek - L'0'; peek = Bgetrune(parsein); } if(peek == L'.'){ int place = 1; numD = numI; peek = Bgetrune(parsein); if(!isdigitrune(peek)){ Bungetrune(parsein); replaypeek = L'.'; goto Integer; } while(isdigitrune(peek)){ double addition = (peek - L'0') / (double)(10 * place); numD += addition; peek = Bgetrune(parsein); place *= 10; } Bungetrune(parsein); /* Should also lex 123.45E10 */ lookahead.tag = FloatTok; lookahead.dval = negative ? -numD : numD; }else{ Bungetrune(parsein); Integer: lookahead.tag = IntTok; lookahead.ival = negative ? -numI : numI; } return; } /* Atoms */ /* Atoms in single quotes */ if(peek == L'\''){ peek = Bgetrune(parsein); while(peek != L'\''){ QuotedAtomLoop: buf[i++] = peek; peek = Bgetrune(parsein); } peek = Bgetrune(parsein); if(peek == L'\'') goto QuotedAtomLoop; else Bungetrune(parsein); buf[i] = '\0'; lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Special atom [] */ if(peek == L'['){ peek = Bgetrune(parsein); if(peek == L']'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"[]"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = SquareBracketLeftTok; return; } } /* Special atom {} */ if(peek == L'{'){ peek = Bgetrune(parsein); if(peek == L'}'){ lookahead.tag = AtomTok; lookahead.text = runestrdup(L"{}"); goto Functor; }else{ Bungetrune(parsein); lookahead.tag = CurlyBracketLeftTok; return; } } /* Graphic atom */ Rune *graphics = L"#$&*+-./:<=>?@^~\\"; /* keep in sync with prettyprint*/ if(runestrchr(graphics, peek)){ while(runestrchr(graphics, peek)){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* simple atom */ if(islowerrune(peek)){ while(isalpharune(peek) || isdigitrune(peek) || peek == L'_'){ buf[i++] = peek; peek = Bgetrune(parsein); } buf[i] = '\0'; Bungetrune(parsein); lookahead.tag = AtomTok; lookahead.text = runestrdup(buf); goto Functor; } /* Other */ if(runestrchr(L",.()]}|!;", peek)){ switch(peek){ case L',': lookahead.tag = CommaTok; break; case L'(': lookahead.tag = ParenLeftTok; break; case L')': lookahead.tag = ParenRightTok; break; case L']': lookahead.tag = SquareBracketRightTok; break; case L'}': lookahead.tag = CurlyBracketRightTok; break; case L'|': lookahead.tag = PipeTok; break; case L'!': lookahead.tag = AtomTok; lookahead.text = runestrdup(L"!"); break; case L';': lookahead.tag = AtomTok; lookahead.text = runestrdup(L";"); break; } return; } /* EOF */ if(peek == Beof){ lookahead.tag = EofTok; return; } print("Could not lex rune %C (%ud)\n", peek, peek); exits("lexer"); Functor: if(peek == Beof){ Bgetrune(parsein); return; } peek = Bgetrune(parsein); if(peek == L'(') lookahead.tag = FunctorTok; Bungetrune(parsein); return; } void match(int tag) { if(lookahead.tag == tag) nexttoken(); else syntaxerror_parser("match"); } void syntaxerror_parser(char *where) { print("Syntax error: Unexpected %d (%S) token in %s\n", lookahead.tag, lookahead.text, where); exits("syntax error"); } void handlemoduledirective(Term *args) { Term *modulename = args; Term *publiclist = modulename->next; USED(publiclist); if(modulename->tag != AtomTerm){ print("Module name should be an atom in: %S\n", prettyprint(modulename, 0, 0, 0, currentmod)); return; } currentmod = addemptymodule(modulename->text); } void handleopdirective(Term *args) { Term *levelt = args; Term *typet = levelt->next; Term *opt = typet->next; if(levelt->tag == IntegerTerm && levelt->ival >= 0 && levelt->ival <= PrecedenceLevels && typet->tag == AtomTerm && opt->tag == AtomTerm){ int level = levelt->ival; Rune *spelling = opt->text; int type = 0; if(runestrcmp(typet->text, L"xf") == 0) type = Xf; else if(runestrcmp(typet->text, L"yf") == 0) type = Yf; else if(runestrcmp(typet->text, L"xfx") == 0) type = Xfx; else if(runestrcmp(typet->text, L"xfy") == 0) type = Xfy; else if(runestrcmp(typet->text, L"yfx") == 0) type = Yfx; else if(runestrcmp(typet->text, L"fy") == 0) type = Fy; else if(runestrcmp(typet->text, L"fx") == 0) type = Fx; if(type != 0){ addoperator(level, type, spelling, currentmod); return; } } print("Malformed op directive with level=%S, type=%S, op=%S\n", prettyprint(levelt, 0, 0, 0, currentmod), prettyprint(typet, 0, 0, 0, currentmod), prettyprint(opt, 0, 0, 0, currentmod)); }