shithub: lpa

Download patch

ref: dcb761aebd2815fdf2d04c0b05724292de9dc98b
parent: ad872eeb19b4fcc41a5d34750ca6cdcf88a39795
author: Peter Mikkelsen <peter@pmikkelsen.com>
date: Thu Jul 18 16:57:49 EDT 2024

Start working on a parser

--- a/aplan.c
+++ /dev/null
@@ -1,32 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <thread.h>
-
-#include "dat.h"
-#include "fns.h"
-
-Array *
-parseaplan(TokenList *tokens, char **errp)
-{
-	/* TODO: write a recursive descent parser for APLAN here. */
-	Array *val;
-
-	int ok = 1;
-	for(uvlong i = 0; i < tokens->count; i++)
-		ok &= tokens->tokens[i].tag == TokNumber;
-	if(!ok){
-		*errp = "can only parse simple constants";
-		return nil;
-	}
-
-	if(tokens->count == 1){
-		val = allocarray(TypeNumber, 0, 1);
-		setint(val, 0, tokens->tokens[0].num);
-	}else{
-		val = allocarray(TypeNumber, 1, tokens->count);
-		setshape(val, 0, tokens->count);
-		for(uvlong i = 0; i < tokens->count; i++)
-			setint(val, i, tokens->tokens[i].num);
-	}
-	return val;
-}
\ No newline at end of file
--- a/array.c
+++ b/array.c
@@ -12,6 +12,7 @@
 struct Array
 {
 	int rank;
+	usize size;
 	usize *shape;
 	union {
 		void *data;
@@ -31,6 +32,7 @@
 {
 	Array *a = alloc(DataArray);
 	a->rank = rank;
+	a->size = size;
 
 	switch(type){
 	case TypeNumber:
@@ -57,4 +59,19 @@
 setshape(Array *a, int dim, usize size)
 {
 	a->shape[dim] = size;
+}
+
+char *
+printarray(Array *a)
+{
+	/* TODO: this is just for debugging */
+	char buf[2048];
+	char *p = buf;
+	p += sprint(p, "type: %s shape: ", "numeric");
+	for(int i = 0; i < a->rank; i++)
+		p += sprint(p, "%lld ", a->shape[i]);
+	p += sprint(p, "data: ");
+	for(uvlong i = 0; i < a->size; i++)
+		p += sprint(p, "%lld ", a->intdata[i]);
+	return buf;
 }
\ No newline at end of file
--- a/dat.h
+++ b/dat.h
@@ -96,6 +96,12 @@
 	void **items;
 };
 
+enum Primitive
+{
+	PrimPlus,
+	PrimMinus,
+};
+
 enum TokenTag
 {
 	TokNumber,
@@ -104,8 +110,16 @@
 	TokRparen,
 	TokLbrack,
 	TokRbrack,
+	TokLbrace,
+	TokRbrace,
 	TokNewline,
 	TokDiamond,
+	TokPrimitive,
+	TokDel,
+	TokLarrow,
+	TokSemi,
+
+	TokEnd,
 };
 
 typedef struct Token Token;
@@ -112,9 +126,11 @@
 struct Token
 {
 	int tag;
+	int nameclass;
 	union {
 		vlong num; /* TokNumber */
 		char *name; /* TokName: UTF-8 encoded name */
+		int prim; /* TokPrimitive */
 	};
 };
 
@@ -123,6 +139,10 @@
 {
 	uvlong count;
 	Token *tokens;
+
+	uvlong offset;
+	jmp_buf errbuf;
+	char *err;
 };
 
 enum ArrayType
@@ -134,8 +154,47 @@
 typedef struct Array Array;
 #pragma incomplete Array
 
+enum AstTag
+{
+	AstProg,
+	AstFunc,
+	AstName,
+	AstLocals,
+	AstAssign,
+	AstMonadic,
+	AstDyadic,
+	AstConst,
+	AstPrim,
+};
+
 typedef struct Ast Ast;
 struct Ast
 {
-	int lol;
+	int tag;
+	char *name;
+	int nameclass;
+	int prim;
+
+	Ast *funcname;
+	Ast *funcresult;
+	Ast *funcleftarg;
+	Ast *funcrightarg;
+	Ast *funclocals;
+
+	Ast *func;
+	Ast *left;
+	Ast *right;
+
+	Array *val;
+
+	uvlong childcount;
+	Ast **children;
+};
+
+enum Nameclass
+{
+	NameclassUndef, /* Unknown name */
+	NameclassLocal, /* Local variable, but no value yet */
+	NameclassArray, /* Array value */
+	NameclassFunc, /* Function value */
 };
\ No newline at end of file
--- a/fns.h
+++ b/fns.h
@@ -1,6 +1,3 @@
-/* aplan.c */
-Array *parseaplan(TokenList *, char **);
-
 /* array.c */
 void initarrays(void);
 Array *allocarray(int, int, usize);
@@ -7,6 +4,8 @@
 void setint(Array *, usize, vlong);
 void setshape(Array *, int, usize);
 
+char *printarray(Array *);
+
 /* fs.c */
 Qid freshobjqid(void);
 void startfs(char *, char *);
@@ -33,12 +32,13 @@
 void appendlog(Session *s, char *data);
 
 /* symtab.c */
-Symtab *allocsymtab(int);
+Symtab *allocsymtab(void);
 uvlong sym(Symtab *, char *);
 char *symname(Symtab *, uvlong);
 void *symval(Symtab *, uvlong);
-Qid symqid(Symtab *, uvlong);
 void symset(Symtab *, uvlong, void *);
+Symbol *symptr(Symtab *, uvlong);
+
 Enumeration *enumsymbols(Symtab *);
 
 /* systemcmd.c */
@@ -47,9 +47,9 @@
 /* util.c */
 Enumeration *allocenum(uvlong);
 void trim(char *);
+void debugast(Ast *, int);
 
 /* value.c */
 char *printval(void *);
 void *parseval(char *, char **);
 
-void *init_quadio(void);
--- a/fs.c
+++ b/fs.c
@@ -556,14 +556,17 @@
 	char *err = nil;
 	Aux *aux = r->fid->aux;
 	Module *m = aux->module;
-	uvlong symb;
+	uvlong symid;
+	Symbol *symb;
 
 	switch(QID_TYPE(r->fid->qid)){
 	case Qmodule: /* create a new symbol */
-		symb = sym(m->symtab, r->ifcall.name);
-		if(symb == -1)
+		symid = sym(m->symtab, r->ifcall.name);
+		if(symid == -1)
 			err = Einvalidname;
-		r->fid->qid = r->ofcall.qid = symqid(m->symtab, symb);
+		symb = symptr(m->symtab, symid);
+		aux->symbol = symb;
+		r->fid->qid = r->ofcall.qid = symb->qsymbol;
 		break;
 	default:
 		err = Ecreate;
--- a/mkfile
+++ b/mkfile
@@ -3,7 +3,6 @@
 TARG=lpafs
 SCRIPTS=lpa
 OFILES=\
-	aplan.$O\
 	array.$O\
 	fs.$O\
 	main.$O\
--- a/module.c
+++ b/module.c
@@ -12,7 +12,7 @@
 
 	Module *m = alloc(DataModule);
 	m->name = strdup(name);
-	m->symtab = allocsymtab(1);
+	m->symtab = allocsymtab();
 	m->id = id++;
 
 	wlock(&s->modules->lock);
--- a/parse.c
+++ b/parse.c
@@ -5,11 +5,332 @@
 #include "dat.h"
 #include "fns.h"
 
+static void _Noreturn error(TokenList *, char *);
+static int peek(TokenList *);
+static int peekclass(TokenList *);
+static void match(TokenList *, int);
+static void addchild(Ast *, Ast *);
+static int issep(TokenList *t);
+static int nameclass(char *, Ast *);
+
+static void parsesep(TokenList *);
+static void parseseps(TokenList *, int);
+static Ast *parseprog(TokenList *);
+static Ast *parsefuncdef(TokenList *);
+static Ast *parsefuncheader(TokenList *);
+static Ast *parselocals(TokenList *);
+static Ast *parseexpr(TokenList *, Ast *);
+static Ast *parseexprsub(TokenList *);
+static Ast *parseline(TokenList *);
+static Ast *parsename(TokenList *);
+static Ast *parsefunc(TokenList *);
+static Ast *parseconst(TokenList *);
+
 Ast *
 parse(TokenList *tokens, char **errp)
 {
-	/* Ast *ast = alloc(DataAst); */
-	USED(tokens);
-	*errp = "parsing not implemented yet";
-	return nil;
-}
\ No newline at end of file
+	Ast *ast;
+
+	tokens->offset = 0;
+	tokens->err = nil;
+	if(setjmp(tokens->errbuf)){
+		*errp = tokens->err;
+		return nil;
+	}else{
+		ast = parseprog(tokens);
+		match(tokens, TokEnd);
+	}
+	return ast;
+}
+
+static void _Noreturn
+error(TokenList *tokens, char *msg)
+{
+	tokens->err = msg;
+	longjmp(tokens->errbuf, 1);
+}
+
+static int
+peek(TokenList *tokens)
+{
+	if(tokens->offset >= tokens->count)
+		error(tokens, "unexpected end of token stream");
+
+	return tokens->tokens[tokens->offset].tag;
+}
+
+static int
+peekclass(TokenList *tokens)
+{
+	peek(tokens); /* triggers an error if we are at the end */
+	return tokens->tokens[tokens->offset].nameclass;
+}
+
+static void
+match(TokenList *tokens, int tag)
+{
+	if(peek(tokens) != tag)
+		error(tokens, "Unexpected token (match failed)");
+	tokens->offset++;
+}
+
+static void
+addchild(Ast *ast, Ast *child)
+{
+	ast->childcount++;
+	ast->children = allocextra(ast, sizeof(Ast *) * ast->childcount);
+	ast->children[ast->childcount-1] = child;
+}
+
+static int
+issep(TokenList *t)
+{
+	switch(peek(t)){
+	case TokNewline:
+	case TokDiamond:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int
+nameclass(char *name, Ast *func)
+{
+	int class;
+
+	if(func == nil)
+		class = NameclassUndef;
+	else if(strcmp(name, func->funcname->name) == 0)
+		class = NameclassFunc;
+	else if(func->funcresult && strcmp(name, func->funcresult->name) == 0)
+		class = NameclassLocal;
+	else if(func->funcleftarg && strcmp(name, func->funcleftarg->name) == 0)
+		class = NameclassLocal;
+	else if(func->funcrightarg && strcmp(name, func->funcrightarg->name) == 0)
+		class = NameclassLocal;
+	else{
+		/* Check if the name exist in the locallist */
+		class = NameclassUndef;
+	}
+	return class;
+}
+
+static void
+parsesep(TokenList *t)
+{
+	if(issep(t))
+		match(t, peek(t));
+}
+
+static void
+parseseps(TokenList *t, int required)
+{
+	while(issep(t)){
+		match(t, peek(t));
+		required = 0;
+	}
+	if(required)
+		match(t, TokNewline);
+}
+
+static Ast *
+parseprog(TokenList *t)
+{
+	Ast *prog = alloc(DataAst);
+	prog->tag = AstProg;
+
+	while(peek(t) != TokEnd){
+		Ast *child;
+		if(peek(t) == TokDel)
+			child = parsefuncdef(t);
+		else
+			child = parseexpr(t, nil);
+		print("After expr: %d\n", peek(t));
+		if(peek(t) != TokEnd)
+			parseseps(t, 1);
+		addchild(prog, child);
+	}
+	print("got prog, peek is: %d\n", peek(t));
+	return prog;
+}
+
+static Ast *
+parsefuncdef(TokenList *t)
+{
+	Ast *func = parsefuncheader(t);
+	while(peek(t) != TokDel){
+		Ast *expr = parseexpr(t, func);
+		addchild(func, expr);
+		if(peek(t) != TokDel)
+			parseseps(t, 1);
+	}
+	match(t, TokDel);
+	return func;
+}
+
+static Ast *
+parsefuncheader(TokenList *t)
+{
+	Ast *func = alloc(DataAst);
+	func->tag = AstFunc;
+
+	match(t, TokDel);
+
+	func->funcname = parsename(t);
+	if(peek(t) == TokLarrow){
+		match(t, TokLarrow);
+		func->funcresult = func->funcname;
+		func->funcname = parsename(t);
+	}
+	if(peek(t) == TokName)
+		func->funcrightarg = parsename(t);
+	if(peek(t) == TokName){
+		func->funcleftarg = func->funcname;
+		func->funcname = func->funcrightarg;
+		func->funcrightarg = parsename(t);
+	}
+	func->funclocals = parselocals(t);
+
+	return func;
+}
+
+static Ast *
+parselocals(TokenList *t)
+{
+	Ast *locals = alloc(DataAst);
+	locals->tag = AstLocals;
+	while(peek(t) == TokSemi){
+		match(t, TokSemi);
+		Ast *name = parsename(t);
+		name->nameclass = NameclassLocal;
+		addchild(locals, name);
+	}
+	parseseps(t, 1);
+	return locals;
+}
+
+static Ast *
+parseexpr(TokenList *t, Ast *func)
+{
+	uvlong start, end;
+	vlong depth;
+
+	depth = 0;
+	start = t->offset;
+	while(!(issep(t) || (peek(t) == TokDel) || (peek(t) == TokEnd)) || depth != 0){
+		switch(peek(t)){
+		case TokLparen: depth++; break;
+		case TokRparen: depth--; break;
+		}
+		match(t, peek(t));
+	}
+	end = t->offset;
+	t->offset = start;
+
+	for(uvlong i = start; i < end; i++){
+		char *name;
+		int class;
+
+		if(t->tokens[i].tag != TokName)
+			continue;
+		name = t->tokens[i].name;
+		class = nameclass(name, func);
+		t->tokens[i].nameclass = class;
+		if(class == 0)
+			error(t, "parseexpr() can't deal with free variables yet");
+	}
+
+	/* We know the nameclass of each name, and assume that the nameclasses do not change.
+	 * Now create the AST.
+	 */
+	return parseexprsub(t);
+}
+
+static Ast *
+parseexprsub(TokenList *t)
+{
+	Ast *expr, *val;
+
+	if(peek(t) == TokLparen){
+		match(t, TokLparen);
+		val = parseexprsub(t);
+		match(t, TokRparen);
+	}else
+		val = nil;
+
+	if(peekclass(t) == NameclassFunc){
+func:
+		expr = alloc(DataAst);
+		if(val){
+			expr->tag = AstDyadic;
+			expr->left = val;
+		}else
+			expr->tag = AstMonadic;
+		expr->func = parsefunc(t);
+		expr->right = parseexprsub(t);
+		return expr;
+	}
+
+	if(peek(t) == TokName){
+		val = parsename(t);
+		if(peek(t) == TokLarrow){
+			match(t, TokLarrow);
+			expr = alloc(DataAst);
+			expr->tag = AstAssign;
+			expr->left = val;
+			expr->right = parseexprsub(t);
+			return expr;
+		}
+	}
+
+	/* We need a value now. Stranding is not implemented */
+	if(val == nil)
+		val = parseconst(t);
+
+	if(peekclass(t) == NameclassFunc)
+		goto func;
+	
+	return val;
+}
+
+static Ast *
+parsename(TokenList *t)
+{
+	Ast *name = alloc(DataAst);
+	name->tag = AstName;
+	name->name = t->tokens[t->offset].name;
+	match(t, TokName);
+	return name;
+}
+
+static Ast *
+parsefunc(TokenList *t)
+{
+	/* TODO: parse primitives as well */
+	Ast *func;
+	if(peek(t) == TokName && peekclass(t) == NameclassFunc)
+		func = parsename(t);
+	else{
+		func = alloc(DataAst);
+		func->tag = AstPrim;
+		func->prim = t->tokens[t->offset].prim;
+		match(t, TokPrimitive);
+	}
+
+	return func;
+}
+
+static Ast *
+parseconst(TokenList *t)
+{
+	Ast *val = alloc(DataAst);
+	val->tag = AstConst;
+
+	vlong num = t->tokens[t->offset].num;
+	match(t, TokNumber);
+	val->val = allocarray(TypeNumber, 0, 1);
+	setint(val->val, 0, num);
+
+	return val;
+}
--- a/scan.c
+++ b/scan.c
@@ -5,6 +5,14 @@
 #include "dat.h"
 #include "fns.h"
 
+struct {
+	Rune spelling;
+	int id;
+} primspecs[] = {
+	L'+', PrimPlus,
+	L'-', PrimMinus,
+};
+
 Token *
 newtok(TokenList *tokens, int tag)
 {
@@ -29,26 +37,32 @@
 
 	while(*cp){
 		n = chartorune(&r, cp);
+		int new = -1;
 		switch(r){
-		case '(':
-			newtok(tokens, TokLparen);
+		case L'(': new = TokLparen; break;
+		case L')': new = TokRparen; break;
+		case L'[': new = TokLbrack; break;
+		case L']': new = TokRbrack; break;
+		case L'{': new = TokLbrace; break;
+		case L'}': new = TokRbrace; break;
+		case L'\n': new = TokNewline; break;
+		case L'⋄': new = TokDiamond; break;
+		case L'∇': new = TokDel; break;
+		case L'←': new = TokLarrow; break;
+		case L';': new = TokSemi; break;
+		}
+		if(new != -1){
+			newtok(tokens, new);
 			goto next;
-		case ')':
-			newtok(tokens, TokRparen);
-			goto next;
-		case '[':
-			newtok(tokens, TokLbrack);
-			goto next;
-		case ']':
-			newtok(tokens, TokRbrack);
-			goto next;
-		case '\n':
-			newtok(tokens, TokNewline);
-			goto next;
-		case L'⋄':
-			newtok(tokens, TokDiamond);
-			goto next;
 		}
+		for(int i = 0; i < nelem(primspecs); i++){
+			if(r == primspecs[i].spelling){
+				tok = newtok(tokens, TokPrimitive);
+				tok->prim = primspecs[i].id;
+				tok->nameclass = NameclassFunc;
+				goto next;
+			}
+		}
 		if(isspacerune(r))
 			goto next;
 		if(isdigitrune(r)){
@@ -59,10 +73,30 @@
 			tok->num = num;
 			goto next;
 		}
+		if(isalpharune(r)){
+			char *start = cp;
+			do{
+				cp += n;
+				n = chartorune(&r, cp);
+			}while(isalpharune(r) || isdigitrune(r));
+			tok = newtok(tokens, TokName);
+			usize size = cp - start;
+			tok->name = malloc(size + 1);
+			memcpy(tok->name, start, size);
+			tok->name[size] = 0;
+			continue;
+		}
 		*errp = "scan error";
 		return nil;
 next:
 		cp += n;
+	}
+	newtok(tokens, TokEnd);
+	if(tokens){
+		print("token tags: ");
+		for(uvlong i = 0; i < tokens->count; i++)
+			print("%d ", tokens->tokens[i].tag);
+		print("\n");
 	}
 	return tokens;
 }
\ No newline at end of file
--- a/session.c
+++ b/session.c
@@ -56,7 +56,7 @@
 			if(err)
 				goto error;
 
-			USED(ast);
+			debugast(ast, 0);
 			appendlog(s, "got an AST but can't evaluate it yet\n");
 		}
 	}
--- a/symtab.c
+++ b/symtab.c
@@ -5,24 +5,10 @@
 #include "dat.h"
 #include "fns.h"
 
-struct {
-	char *name;
-	void *(*init)(void);
-} defaultsyms[] = {
-	{ "⎕IO", init_quadio },
-};
-
 Symtab *
-allocsymtab(int defaults)
+allocsymtab(void)
 {
 	Symtab *s = alloc(DataSymtab);
-	if(defaults){
-		for(uvlong i = 0; i < nelem(defaultsyms); i++){
-			uvlong symb = sym(s, defaultsyms[i].name);
-			symset(s, symb, defaultsyms[i].init());
-		}
-	}
-
 	return s;
 }
 
@@ -77,14 +63,14 @@
 	return value;
 }
 
-Qid
-symqid(Symtab *s, uvlong id)
+Symbol *
+symptr(Symtab *s, uvlong id)
 {
-	Qid qid;
+	Symbol *symb;
 	rlock(&s->lock);
-	qid = s->symbols[id]->qsymbol;
+	symb = s->symbols[id];
 	runlock(&s->lock);
-	return qid;
+	return symb;
 }
 
 void
--- a/util.c
+++ b/util.c
@@ -24,4 +24,90 @@
 		else
 			str[i] = 0;
 	}
+}
+
+static void
+indent(int depth)
+{
+	for(int i = 0; i < depth; i++)
+		print("  ");
+}
+
+static void
+printchild(char *desc, Ast *ast, int depth)
+{
+	indent(depth);
+	print("%s:\n", desc);
+	indent(depth+1);
+	debugast(ast, depth+1);
+}
+
+void
+debugast(Ast *ast, int depth)
+{
+	if(ast == nil){
+		print("<nil>\n");
+		return;
+	}
+
+	int printchildren = 0;
+
+	switch(ast->tag){
+	case AstProg:
+		print("prog\n");
+		printchildren = 1;
+		break;
+	case AstFunc:
+		print("func\n");
+		depth++;
+		printchild("name", ast->funcname, depth);
+		printchild("result", ast->funcresult, depth);
+		printchild("left arg", ast->funcleftarg, depth);
+		printchild("right arg", ast->funcrightarg, depth);
+		printchild("local vars", ast->funclocals, depth);
+		indent(depth); print("body\n");
+		printchildren = 1;
+		break;
+	case AstName:
+		print("\"%s\"\n", ast->name);
+		break;
+	case AstLocals:
+		print("locals\n");
+		printchildren = 1;
+		break;
+	case AstAssign:
+		print("assign\n");
+		printchild("name", ast->left, depth+1);
+		printchild("value", ast->right, depth+1);
+		break;
+	case AstMonadic:
+		print("monadic call\n");
+		printchild("func", ast->func, depth+1);
+		printchild("right", ast->right, depth+1);
+		break;
+	case AstDyadic:
+		print("dyadic call\n");
+		printchild("func", ast->func, depth+1);
+		printchild("left", ast->left, depth+1);
+		printchild("right", ast->right, depth+1);
+		break;
+	case AstConst:
+		print("const:\n");
+		indent(depth+1);
+		print("%s\n", printarray(ast->val));
+		break;
+	case AstPrim:
+		print("prim: %d\n", ast->prim);
+		break;
+	default:
+		print("<ast node %d>\n", ast->tag);
+		break;
+	}
+
+	if(printchildren){
+		for(uvlong i = 0; i < ast->childcount; i++){
+			indent(depth+1);
+			debugast(ast->children[i], depth+1);
+		}
+	}
 }
\ No newline at end of file
--- a/value.c
+++ b/value.c
@@ -18,17 +18,36 @@
 void *
 parseval(char *buf, char **errp)
 {
+	Ast *ast;
 	void *val = nil;
+
 	TokenList *tokens = scan(buf, errp);
-	if(tokens != nil){
-		/* Parse the tokens as a constant. TODO: Support function definitions as well... */
-		val = parseaplan(tokens, errp);
+	if(tokens == nil)
+		goto end;
+	ast = parse(tokens, errp);
+	if(ast == nil)
+		goto end;
+
+	/* Check that the ast is either a single function definition,
+	 * or a single constant.
+	 */
+	if(!(ast->tag == AstProg && ast->childcount == 1)){
+		*errp = "Expected single value or function definition";
+		goto end;
 	}
+
+	ast = ast->children[0];
+	switch(ast->tag){
+	case AstConst:
+		val = ast->val;
+		break;
+	case AstFunc:
+		*errp = "Functions not supported yet";
+		break;
+	default:
+		*errp = "Expected constant or function definition";
+		goto end;
+	}
+end:
 	return val;
 }
-
-void *
-init_quadio(void)
-{
-	return nil;
-}
\ No newline at end of file