shithub: lpa

Download patch

ref: 8dc7c659e34ad5d244477e01b1ebf4def63be24d
parent: ddf9cc77ac875f0045e03dd59f516e302a4fbb7a
author: Peter Mikkelsen <peter@pmikkelsen.com>
date: Sun Jul 21 18:52:38 EDT 2024

Start working on an evaluator

--- a/dat.h
+++ b/dat.h
@@ -11,6 +11,8 @@
 	DataTokenList,
 	DataArray,
 	DataAst,
+	DataByteCode,
+	DataValueStack,
 
 	DataMax,
 };
@@ -167,6 +169,7 @@
 	AstConst,
 	AstPrim,
 	AstStrand,
+	AstLater, /* parse at runtime */
 };
 
 typedef struct Ast Ast;
@@ -189,6 +192,8 @@
 
 	Array *val;
 
+	TokenList *tokens; /* for AstLater */
+
 	uvlong childcount;
 	Ast **children;
 };
@@ -199,4 +204,32 @@
 	NameclassLocal, /* Local variable, but no value yet */
 	NameclassArray, /* Array value */
 	NameclassFunc, /* Function value */
+};
+
+typedef struct ByteCode ByteCode;
+struct ByteCode
+{
+	uvlong count;
+	u8int *instrs;
+};
+
+enum Instr
+{
+	IPushConst,
+	IPushPrim,
+	ILookup,
+	IStrand,
+	IMonadic,
+	IDyadic,
+	IClear,
+	IParse,
+	IDone,
+	IJump,
+};
+
+typedef struct ValueStack ValueStack;
+struct ValueStack
+{
+	uvlong count;
+	void **values;
 };
\ No newline at end of file
--- /dev/null
+++ b/eval.c
@@ -1,0 +1,222 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+
+#include "dat.h"
+#include "fns.h"
+
+static ByteCode *codegen(Session *, Module *, Ast *);
+static void *evalbc(Session *, Module *, ByteCode *);
+
+void
+eval(Session *s, Ast *a)
+{
+	/* Evaluate some ast in module m in session s. */
+	Module *m = s->modules->modules[0]; /* TODO: this isn't nice */
+	ByteCode *code = codegen(s, m, a);
+	void *v = evalbc(s, m, code);
+
+	if(v)
+		appendlog(s, printval(v));
+}
+
+static void
+emitbyte(ByteCode *c, u8int i)
+{
+	c->count += 1;
+	c->instrs = allocextra(c, c->count);
+	c->instrs[c->count-1] = i;
+}
+
+static void
+emituvlong(ByteCode *c, uvlong v)
+{
+	for(int i = 0; i < sizeof(v); i++)
+		emitbyte(c, (v>>(8*i)) & 0xFF);
+}
+
+static void
+emitptr(ByteCode *c, void *p)
+{
+	emituvlong(c, (uvlong)p);
+}
+
+static void
+codegensub(Session *s, Module *m, ByteCode *c, Ast *a)
+{
+	char *err;
+	uvlong i;
+
+	switch(a->tag){
+	case AstProg:
+		for(i = 0; i < a->childcount; i++){
+			if(i != 0)
+				emitbyte(c, IClear);
+			codegensub(s, m, c, a->children[i]);
+		}
+		emitbyte(c, IDone);
+		break;
+	case AstName:
+		emitbyte(c, ILookup);
+		emituvlong(c, sym(m->symtab, a->name));
+		break;
+	case AstConst:
+		emitbyte(c, IPushConst);
+		emitptr(c, a->val); /* TODO: better to have consts array and emit index? */
+		break;
+	case AstStrand:
+		/* right to left */
+		for(i = a->childcount; i > 0; i--)
+			codegensub(s, m, c, a->children[i-1]);
+		emitbyte(c, IStrand);
+		emituvlong(c, a->childcount);
+		break;
+	case AstMonadic:
+		codegensub(s, m, c, a->right);
+		codegensub(s, m, c, a->func);
+		emitbyte(c, IMonadic);
+		break;
+	case AstDyadic:
+		codegensub(s, m, c, a->right);
+		codegensub(s, m, c, a->left);
+		codegensub(s, m, c, a->func);
+		emitbyte(c, IDyadic);
+		break;
+	case AstPrim:
+		emitbyte(c, IPushPrim);
+		emituvlong(c, a->prim); /* TODO: waste of space */
+		break;
+	case AstLater:
+		emitbyte(c, IParse);
+		emitptr(c, a->tokens);
+		break;
+	default:
+		err = smprint("Don't know how to do codegen for ast type %d\n", a->tag);
+		appendlog(s, err);
+		free(err);
+		break;
+	}
+
+}
+
+static ByteCode *
+codegen(Session *s, Module *m, Ast *a)
+{
+	ByteCode *c = alloc(DataByteCode);
+	codegensub(s, m, c, a);
+	return c;
+}
+
+static void
+pushval(ValueStack *s, void *v)
+{
+	s->count += 1;
+	s->values = allocextra(s, s->count * sizeof(v));
+	s->values[s->count-1] = v;
+}
+
+static void *
+popval(ValueStack *s)
+{
+	if(s->count == 0)
+		sysfatal("popval on empty value stack");
+	s->count--; /* no realloc */
+	return s->values[s->count];
+}
+
+static void *
+evalbc(Session *s, Module *m, ByteCode *c)
+{
+	ValueStack *values;
+	uvlong o, v;
+	int prim = 0;
+	void *r;
+
+	values = alloc(DataValueStack);
+	debugbc(c);
+
+	o = 0;
+	while(o < c->count){
+		int instr = c->instrs[o];
+		o++;
+
+		switch(instr){
+		case IPushConst:
+			o += getuvlong(c->instrs+o, &v);
+			pushval(values, (void*)v);
+			break;
+		case IPushPrim:
+			o += getuvlong(c->instrs+o, &v);
+			prim = v;
+			break;
+		case ILookup:
+			o += getuvlong(c->instrs+o, &v);
+			pushval(values, symval(m->symtab, v)); /* TODO: value error? */
+			break;
+		case IStrand:
+			o += getuvlong(c->instrs+o, &v);
+			{
+				Array *x = allocarray(TypeArray, 1, v);
+				setshape(x, 0, v);
+				for(uvlong i = 0; i < v; i++)
+					setarray(x, i, popval(values));
+				x = simplifyarray(x);
+				pushval(values, x);
+			}
+			break;
+		case IMonadic:
+			appendlog(s, "NOTE: monadic call acts like ⊢\n");
+			break;
+		case IDyadic:
+			USED(prim);
+			appendlog(s, "NOTE: dyadic call acts like ⊣\n");
+			popval(values);
+			break;
+		case IClear:
+			while(values->count > 0)
+				popval(values);
+			break;
+		case IParse:
+			/* parse at runtime and emit code */
+			o += getuvlong(c->instrs+o, &v);
+			{
+				char *err;
+				TokenList *t = (TokenList *)v;
+				Ast *a = parse(t, m->symtab, &err);
+				if(!a){
+					appendlog(s, "RUNTIME PARSE: ");
+					appendlog(s, err);
+					appendlog(s, "\n");
+					return nil;
+				}else{
+					uvlong next = o;
+					uvlong start = c->count;
+					codegensub(s, m, c, a);
+					emitbyte(c, IJump);
+					emituvlong(c, next);
+					o = start; /* jump to new code */
+					/* TODO: this adds code every time the instruction is run */
+					print("updated bytecode:\n");
+					debugbc(c);
+				}
+			}
+			break;
+		case IDone:
+			goto done;
+			break;
+		case IJump:
+			getuvlong(c->instrs+o, &v);
+			o = v;
+			break;
+		default:
+			appendlog(s, "unknown instruction in evalbc\n");
+			return nil;
+		}
+	}
+
+done:
+	r = nil;
+	if(values->count != 0)
+		r = popval(values);
+	return r;
+}
\ No newline at end of file
--- a/fns.h
+++ b/fns.h
@@ -5,9 +5,11 @@
 void setarray(Array *, usize, Array *);
 void setshape(Array *, int, usize);
 Array *simplifyarray(Array *);
-
 char *printarray(Array *);
 
+/* eval.c */
+void eval(Session *s, Ast *);
+
 /* fs.c */
 Qid freshobjqid(void);
 void startfs(char *, char *);
@@ -23,7 +25,7 @@
 Enumeration *enummodules(Session *s);
 
 /* parse.c */
-Ast *parse(TokenList *, char **);
+Ast *parse(TokenList *, Symtab *, char **);
 
 /* scan.c */
 TokenList *scan(char *, char **);
@@ -51,6 +53,8 @@
 Enumeration *allocenum(uvlong);
 void trim(char *);
 void debugast(Ast *, int);
+void debugbc(ByteCode *);
+int getuvlong(u8int *, uvlong *);
 
 /* value.c */
 char *printval(void *);
--- a/memory.c
+++ b/memory.c
@@ -34,6 +34,8 @@
 	[DataEnumeration] = {.size = sizeof(Enumeration) },
 	[DataTokenList] = {.size = sizeof(TokenList) },
 	[DataAst] = {.size = sizeof(Ast) },
+	[DataByteCode] = {.size = sizeof(ByteCode) },
+	[DataValueStack] = {.size = sizeof(ValueStack) },
 };
 
 void *
--- a/mkfile
+++ b/mkfile
@@ -4,6 +4,7 @@
 SCRIPTS=lpa
 OFILES=\
 	array.$O\
+	eval.$O\
 	fs.$O\
 	main.$O\
 	memory.$O\
--- a/parse.c
+++ b/parse.c
@@ -12,7 +12,7 @@
 static void addchild(Ast *, Ast *);
 static int issep(TokenList *t);
 static int isexprsep(TokenList *t);
-static int nameclass(char *, Ast *);
+static int nameclass(char *, Symtab *, Ast *);
 
 static void parsesep(TokenList *);
 static void parseseps(TokenList *, int);
@@ -20,7 +20,7 @@
 static Ast *parsefuncdef(TokenList *);
 static Ast *parsefuncheader(TokenList *);
 static Ast *parselocals(TokenList *);
-static Ast *parseexpr(TokenList *, Ast *);
+static Ast *parseexpr(TokenList *, Symtab *, Ast *);
 static Ast *parseexprsub(TokenList *);
 static Ast *parseline(TokenList *);
 static Ast *parsename(TokenList *);
@@ -28,7 +28,7 @@
 static Ast *parseconst(TokenList *);
 
 Ast *
-parse(TokenList *tokens, char **errp)
+parse(TokenList *tokens, Symtab *symtab, char **errp)
 {
 	Ast *ast;
 
@@ -38,7 +38,10 @@
 		*errp = tokens->err;
 		return nil;
 	}else{
-		ast = parseprog(tokens);
+		if(symtab)
+			ast = parseexpr(tokens, symtab, nil);
+		else
+			ast = parseprog(tokens);
 		match(tokens, TokEnd);
 	}
 	return ast;
@@ -110,10 +113,10 @@
 }
 
 static int
-nameclass(char *name, Ast *func)
+nameclass(char *name, Symtab *s, Ast *func)
 {
-	int class;
-
+	int class = NameclassUndef;
+	
 	if(func == nil)
 		class = NameclassUndef;
 	else if(strcmp(name, func->funcname->name) == 0)
@@ -124,8 +127,19 @@
 		class = NameclassLocal;
 	else if(func->funcrightarg && strcmp(name, func->funcrightarg->name) == 0)
 		class = NameclassLocal;
-	else{
-		/* Check if the name exist in the locallist */
+	
+	if(s && class == NameclassUndef){
+		uvlong symid = sym(s, name);
+		void *val = symval(s, symid);
+
+		if(val) switch(getalloctag(val)){
+		case DataArray:
+			class = NameclassArray;
+			break;
+		/* more cases here in the future */
+		}
+	}else{
+		/* TODO: Check if the name exist in the locallist */
 		class = NameclassUndef;
 	}
 	return class;
@@ -160,7 +174,7 @@
 		if(peek(t) == TokDel)
 			child = parsefuncdef(t);
 		else
-			child = parseexpr(t, nil);
+			child = parseexpr(t, nil, nil);
 		if(peek(t) != TokEnd)
 			parseseps(t, 1);
 		addchild(prog, child);
@@ -173,7 +187,7 @@
 {
 	Ast *func = parsefuncheader(t);
 	while(peek(t) != TokDel){
-		Ast *expr = parseexpr(t, func);
+		Ast *expr = parseexpr(t, nil, func);
 		addchild(func, expr);
 		if(peek(t) != TokDel)
 			parseseps(t, 1);
@@ -224,7 +238,7 @@
 }
 
 static Ast *
-parseexpr(TokenList *t, Ast *func)
+parseexpr(TokenList *t, Symtab *symtab, Ast *func)
 {
 	uvlong start, end;
 	vlong depth;
@@ -248,10 +262,25 @@
 		if(t->tokens[i].tag != TokName)
 			continue;
 		name = t->tokens[i].name;
-		class = nameclass(name, func);
+		class = nameclass(name, symtab, func);
 		t->tokens[i].nameclass = class;
-		if(class == 0)
-			error(t, "parseexpr() can't deal with free variables yet");
+		if(class == 0){ /* We don't know how to parse it until runtime */
+			if(symtab)
+				error(t, "could not resolve nameclasses");
+
+			uvlong count = end-start;
+			Ast *later = alloc(DataAst);
+			later->tag = AstLater;
+			later->tokens = alloc(DataTokenList);
+			later->tokens->count = count+1;
+			later->tokens->tokens = allocextra(later->tokens, sizeof(Token) * later->tokens->count);
+			for(i = 0; i < count; i++){
+				later->tokens->tokens[i] = t->tokens[start+i];
+				match(t, peek(t));
+			}
+			later->tokens->tokens[count].tag = TokEnd;
+			return later;
+		}
 	}
 
 	/* We know the nameclass of each name, and assume that the nameclasses do not change.
--- a/session.c
+++ b/session.c
@@ -52,12 +52,12 @@
 				continue;
 			}
 			
-			Ast *ast = parse(tokens, &err);
+			Ast *ast = parse(tokens, 0, &err);
 			if(err)
 				goto error;
 
 			debugast(ast, 0);
-			appendlog(s, "got an AST but can't evaluate it yet\n");
+			eval(s, ast);
 		}
 	}
 }
--- a/util.c
+++ b/util.c
@@ -114,4 +114,73 @@
 			debugast(ast->children[i], depth+1);
 		}
 	}
+}
+
+int
+getuvlong(u8int *p, uvlong *vp)
+{
+	int size = sizeof(uvlong);
+	uvlong v = 0;
+	for(int i = 0; i < size; i++){
+		uvlong x = *p;
+		p++;
+
+		v |= x << (8*i);
+	}
+	*vp = v;
+	return size;
+}
+
+void
+debugbc(ByteCode *c)
+{
+	uvlong o, v;
+
+	o = 0;
+	while(o < c->count){
+		int instr = c->instrs[o];
+		o++;
+
+		switch(instr){
+		case IPushConst:
+			o += getuvlong(c->instrs+o, &v);
+			print("CONST %p\n", (void*)v);
+			break;
+		case IPushPrim:
+			o += getuvlong(c->instrs+o, &v);
+			print("PRIM %p\n", v);
+			break;
+		case ILookup:
+			o += getuvlong(c->instrs+o, &v);
+			print("LOOKUP %ulld\n", v);
+			break;
+		case IStrand:
+			o += getuvlong(c->instrs+o, &v);
+			print("STRAND %ulld\n", v);
+			break;
+		case IMonadic:
+			print("MONADIC\n");
+			break;
+		case IDyadic:
+			print("DYADIC\n");
+			break;
+		case IClear:
+			print("CLEAR\n");
+			break;
+		case IParse:
+			o += getuvlong(c->instrs+o, &v);
+			print("PARSE %ulld\n", v);
+			break;
+		case IDone:
+			print("DONE\n");
+			break;
+		case IJump:
+			o += getuvlong(c->instrs+o, &v);
+			print("JUMP %ulld\n", v);
+			break;
+		default:
+			print("???");
+			return;
+		}
+	}
 }
\ No newline at end of file
--- a/value.c
+++ b/value.c
@@ -34,7 +34,7 @@
 	TokenList *tokens = scan(buf, errp);
 	if(tokens == nil)
 		goto end;
-	ast = parse(tokens, errp);
+	ast = parse(tokens, 0, errp);
 	if(ast == nil)
 		goto end;
 	debugast(ast, 0);