shithub: mc

Download patch

ref: 404ed1ddb3faa4fec7ee4fd8fddc5bbdec284bde
parent: 9a2f63d47a96bf522e6c5d39e215b2e6a9e600c7
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Jul 26 18:02:02 EDT 2017

Automatic variables

Hello,

I wrote this patch to start a conversation about resource
management in Myrddin.

~ Introduction
~~~~~~~~~~~~~~

The patch attached provides a hopefully life-improving and
surely non-invasive mechanism to handle resources that have
block-limited lifetimes: automatic variables.

This resource-management scheme can be found in multiple
languages today.  For example, C++ calls destructors
automatically at the end of the scope of a variable;
similarly, Rust automatically calls user-defined code
when a variable's scope ends; finally, also related is
Go's defer statement which ensures that resource-releasing
code is called at the end of a function.

~ Description
~~~~~~~~~~~~~

The idea is that every "binder" of the language (var/const/
fn args/match) offers the possibility to mark the variables
it binds as "automatic" using the 'auto' keyword.

An automatic variable must be of a type that implements
the new builtin 'disposable' trait below.  When the scope
of a variable 'v' marked as automatic ends, '__dispose__(v)'
is called.

That's it.

	trait disposable @a =
		__dispose__ : (val : @a -> void)
	;;

~ Example Programs
~~~~~~~~~~~~~~~~~~

The language modification is shown in action in the program
below.

	use std

	impl disposable int =
		__dispose__ = {x
			std.put("__dispose__({})\n", x)
		}
	;;

	const g = {auto x
		-> x++ - 1
	}

	const main = {
		var auto i, auto j = 42
		for i = 1; i < 6; i=i+1
			var auto z : int = 2*i
			if i == 3
				std.put("z = {} -- cont\n", z)
				continue
			;;
			std.put("z = {}\n", z)
			if i/2 == 2
				var auto inner : int = 1234
				break
			;;
		;;
		i = g(321)
	}

The output of the previous test program is:

  1:	z = 2
  2:	__dispose__(2)
  3:	z = 4
  4:	__dispose__(4)
  5:	z = 6 -- cont
  6:	__dispose__(6)
  7:	z = 8
  8:	__dispose__(1234)
  9:	__dispose__(8)
 10:	__dispose__(322)
 11:	__dispose__(42)
 12:	__dispose__(320)

Some important remarks:

  * Unlike Go, __dispose__ is called as soon as the scope
    of a variable ends, not at the end of the function.
    In particular, the variable 'z' in the example is
    always disposed of before starting the next iteration.
    (An iteration ends the loop body block.)

  * __dispose__ is called in reverse order of declaration
    This allows variables to depend on resources of
    variables already in scope.

  * Regardless of how a block is exited (fallthrough,
    break, continue, return), variables of the blocks left
    are disposed of in reverse order and exactly once.

  * As line 10 of the output shows, the __dispose__ calls
    happen "after" the return statement of the function.
    (It happens after the post-increment, so x's value
    when it is disposed of is 322.)

The following example shows that, using an ad hoc type,
it is possible to execute arbitrary code at the end of
a scope.

	type defer = (-> void)
	impl disposable defer =
		__dispose__ = {f: defer; f()}
	;;

	const foobar = {...
		const auto _dummy = ({
			std.put("Bye!\n")
		}: defer)

		...
	}

~ Discussion
~~~~~~~~~~~~

Multiple alternatives exist for resource management, and
hopefully this mail starts an interesting debate.
According to me, here are the pros and cons of the current
proposal:

  - PROS -

  * Opt-in
  * Backward compatible
  * Simple
  * The spirit of C's original auto
  * It has an implementation

  - CONS -

  * No safety guarantees/compiler checks whatsoever
  * Syntactic pollution: 'auto' keyword, 'disposable' trait

Finally, note that the current patch does not implement
auto support for variables bound in match statements.
This will come in a followup patch if there is sufficient
interest.  Also, the patch does not provide proper support
(or proper errors) for gotos and labels.

--- a/mi/flatten.c
+++ b/mi/flatten.c
@@ -15,7 +15,15 @@
 #include "mi.h"
 #include "../config.h"
 
+typedef struct Flattenctx Flattenctx;
+typedef struct Loop Loop;
 
+struct Loop {
+	Node *lcnt;
+	Node *lbrk;
+	Stab *body;
+};
+
 /* takes a list of nodes, and reduces it (and it's subnodes) to a list
  * following these constraints:
  *      - All nodes are expression node
@@ -22,17 +30,14 @@
  *      - Nodes with side effects are root node
  *      - All nodes operate on machine-primitive types and tuple
  */
-typedef struct Flattenctx Flattenctx;
 struct Flattenctx {
 	int isglobl;
 
-	/* return handling */
 	Node **stmts;
 	size_t nstmts;
 
 	/* return handling */
-	int hasenv;
-	int isbigret;
+	Node *tret;
 
 	/* pre/postinc handling */
 	Node **incqueue;
@@ -39,10 +44,9 @@
 	size_t nqueue;
 
 	/* break/continue handling */
-	Node **loopstep;
-	size_t nloopstep;
-	Node **loopexit;
-	size_t nloopexit;
+	Loop loop;
+	unsigned inloop;
+	Stab *curst;
 
 	/* location handling */
 	Htab *globls;
@@ -199,7 +203,51 @@
 	return r;
 }
 
+static Node *
+traitfn(Srcloc loc, Trait *tr, char *fn, Type *ty)
+{
+	Node *proto, *dcl, *var;
+	char *name;
+	size_t i;
+
+	for (i = 0; i < tr->nproto; i++) {
+		name = declname(tr->proto[i]);
+		if (!strcmp(fn, name)) {
+			proto = tr->proto[i];
+			dcl = htget(proto->decl.impls, ty);
+			var = mkexpr(loc, Ovar, dcl->decl.name, NULL);
+			var->expr.type = dcl->decl.type;
+			var->expr.did = dcl->decl.did;
+			return var;
+		}
+	}
+	return NULL;
+}
+
 static void
+dispose(Flattenctx *s, Stab *st)
+{
+	Node *d, *call, *func, *val;
+	Trait *tr;
+	Type *ty;
+	size_t i;
+
+	tr = traittab[Tcdisp];
+	/* dispose in reverse order of declaration */
+	for (i = st->nautodcl; i-- > 0;) {
+		d = st->autodcl[i];
+		ty = decltype(d);
+		val = mkexpr(Zloc, Ovar, d->decl.name, NULL);
+		val->expr.type = ty;
+		val->expr.did = d->decl.did;
+		func = traitfn(Zloc, tr, "__dispose__", ty);
+		call = mkexpr(Zloc, Ocall, func, val, NULL);
+		call->expr.type = mktype(Zloc, Tyvoid);
+		flatten(s, call);
+	}
+}
+
+static void
 flattencond(Flattenctx *s, Node *n, Node *ltrue, Node *lfalse)
 {
 	Node **args;
@@ -368,6 +416,26 @@
 	return r;
 }
 
+/* returns 1 when the exit jump needs to be emitted */
+static int
+exitscope(Flattenctx *s, Stab *stop, Srcloc loc, int x)
+{
+	Stab *st;
+
+	for (st = s->curst;; st = st->super) {
+		if (st->exit[x]) {
+			jmp(s, st->exit[x]);
+			return 0;
+		}
+		st->exit[x] = genlbl(loc);
+		flatten(s, st->exit[x]);
+		dispose(s, st);
+		if ((!stop && st->isfunc) || st == stop) {
+			return 1;
+		}
+	}
+}
+
 static Node *
 rval(Flattenctx *s, Node *n)
 {
@@ -499,7 +567,11 @@
 				append(s, s->incqueue[i]);
 			lfree(&s->incqueue, &s->nqueue);
 		}
-		append(s, mkexpr(n->loc, Oret, t, NULL));
+		if (!s->tret)
+			s->tret = temp(s, v);
+		flatten(s, asn(lval(s, s->tret), t));
+		if (exitscope(s, NULL, Zloc, Xret))
+			append(s, mkexpr(n->loc, Oret, s->tret, NULL));
 		break;
 	case Oasn:
 		r = assign(s, args[0], args[1]);
@@ -506,15 +578,17 @@
 		break;
 	case Obreak:
 		r = NULL;
-		if (s->nloopexit == 0)
+		if (s->inloop == 0)
 			fatal(n, "trying to break when not in loop");
-		jmp(s, s->loopexit[s->nloopexit - 1]);
+		if (exitscope(s, s->loop.body, n->loc, Xbrk))
+			jmp(s, s->loop.lbrk);
 		break;
 	case Ocontinue:
 		r = NULL;
-		if (s->nloopstep == 0)
+		if (s->inloop == 0)
 			fatal(n, "trying to continue when not in loop");
-		jmp(s, s->loopstep[s->nloopstep - 1]);
+		if (exitscope(s, s->loop.body, n->loc, Xcnt))
+			jmp(s, s->loop.lcnt);
 		break;
 	case Oeq: case One:
 		r = compare(s, n, 1);
@@ -569,14 +643,20 @@
 }
 
 static void
-flattenblk(Flattenctx *fc, Node *n)
+flattenblk(Flattenctx *s, Node *n)
 {
+	Stab *st;
 	size_t i;
 
+	st = s->curst;
+	s->curst = n->block.scope;
 	for (i = 0; i < n->block.nstmts; i++) {
 		n->block.stmts[i] = fold(n->block.stmts[i], 0);
-		flatten(fc, n->block.stmts[i]);
+		flatten(s, n->block.stmts[i]);
 	}
+	assert(s->curst == n->block.scope);
+	dispose(s, s->curst);
+	s->curst = st;
 }
 
 /* init; while cond; body;;
@@ -593,6 +673,8 @@
 static void
 flattenloop(Flattenctx *s, Node *n)
 {
+	Stab *b;
+	Loop l;
 	Node *lbody;
 	Node *lend;
 	Node *ldec;
@@ -606,9 +688,15 @@
 	lstep = genlbl(n->loc);
 	lend = genlbl(n->loc);
 
-	lappend(&s->loopstep, &s->nloopstep, lstep);
-	lappend(&s->loopexit, &s->nloopexit, lend);
 
+	b = s->curst;
+	s->curst = n->loopstmt.scope;
+	l = s->loop;
+	s->loop.lcnt = lstep;
+	s->loop.lbrk = lend;
+	s->loop.body = n->loopstmt.scope;
+	s->inloop++;
+
 	flatten(s, n->loopstmt.init);  /* init */
 	jmp(s, lcond);              /* goto test */
 	flatten(s, lbody);             /* body lbl */
@@ -627,8 +715,9 @@
 		append(s, s->incqueue[i]);
 	lfree(&s->incqueue, &s->nqueue);
 
-	s->nloopstep--;
-	s->nloopexit--;
+	s->inloop--;
+	s->loop = l;
+	s->curst = b;
 }
 
 /* if foo; bar; else baz;;
@@ -719,6 +808,7 @@
 static void
 flattenidxiter(Flattenctx *s, Node *n)
 {
+	Loop l;
 	Node *lbody, *lstep, *lcond, *lmatch, *lend;
 	Node *idx, *len, *dcl, *seq, *val, *done;
 	Node *zero;
@@ -730,8 +820,11 @@
 	lmatch = genlbl(n->loc);
 	lend = genlbl(n->loc);
 
-	lappend(&s->loopstep, &s->nloopstep, lstep);
-	lappend(&s->loopexit, &s->nloopexit, lend);
+	s->inloop++;
+	l = s->loop;
+	s->loop.lcnt = lstep;
+	s->loop.lbrk = lend;
+	s->loop.body = n->iterstmt.body->block.scope;
 
         /* FIXME: pass this in from main() */
         idxtype = mktype(n->loc, Tyuint64);
@@ -766,31 +859,10 @@
 	jmp(s, lbody);
 	flatten(s, lend);
 
-	s->nloopstep--;
-	s->nloopexit--;
+	s->inloop--;
+	s->loop = l;
 }
 
-static Node *
-itertraitfn(Srcloc loc, Trait *tr, char *fn, Type *ty)
-{
-	Node *proto, *dcl, *var;
-	char *name;
-	size_t i;
-
-	for (i = 0; i < tr->nproto; i++) {
-		name = declname(tr->proto[i]);
-		if (!strcmp(fn, name)) {
-			proto = tr->proto[i];
-			dcl = htget(proto->decl.impls, ty);
-			var = mkexpr(loc, Ovar, dcl->decl.name, NULL);
-			var->expr.type = dcl->decl.type;
-			var->expr.did = dcl->decl.did;
-			return var;
-		}
-	}
-	return NULL;
-}
-
 /* for pat in seq
  * 	body;;
  * =>
@@ -812,6 +884,7 @@
 static void
 flattentraititer(Flattenctx *s, Node *n)
 {
+	Loop l;
 	Node *lbody, *lclean, *lstep, *lmatch, *lend;
 	Node *done, *val, *iter, *valptr, *iterptr;
 	Node *func, *call;
@@ -831,9 +904,13 @@
 	lstep = genlbl(n->loc);
 	lmatch = genlbl(n->loc);
 	lend = genlbl(n->loc);
-	lappend(&s->loopstep, &s->nloopstep, lstep);
-	lappend(&s->loopexit, &s->nloopexit, lend);
 
+	s->inloop++;
+	l = s->loop;
+	s->loop.lcnt = lstep;
+	s->loop.lbrk = lend;
+	s->loop.body = n->iterstmt.body->block.scope;
+
 	append(s, asn(iter, n->iterstmt.seq));
 	jmp(s, lstep);
 	flatten(s, lbody);
@@ -842,7 +919,7 @@
 	flatten(s, lclean);
 
 	/* call iterator cleanup */
-	func = itertraitfn(n->loc, tr, "__iterfin__", exprtype(iter));
+	func = traitfn(n->loc, tr, "__iterfin__", exprtype(iter));
 	call = mkexpr(n->loc, Ocall, func, iterptr, valptr, NULL);
 	call->expr.type = mktype(n->loc, Tyvoid);
 	append(s, call);
@@ -849,7 +926,7 @@
 
 	flatten(s, lstep);
 	/* call iterator step */
-	func = itertraitfn(n->loc, tr, "__iternext__", exprtype(iter));
+	func = traitfn(n->loc, tr, "__iternext__", exprtype(iter));
 	call = mkexpr(n->loc, Ocall, func, iterptr, valptr, NULL);
 	done = gentemp(n->loc, mktype(n->loc, Tybool), NULL);
 	call->expr.type = exprtype(done);
@@ -862,8 +939,8 @@
 	jmp(s, lbody);
 	flatten(s, lend);
 
-	s->nloopstep--;
-	s->nloopexit--;
+	s->inloop--;
+	s->loop = l;
 }
 
 static void
@@ -954,6 +1031,7 @@
 	lit = dcl->decl.init->expr.args[0];
 	fn = lit->lit.fnval;
 	body = fn->func.body;
+	fc.curst = fn->func.scope;
 	flatten(&fc, fn->func.body);
 	blk = mkblock(fn->loc, body->block.scope);
 	blk->block.stmts = fc.stmts;
@@ -1013,4 +1091,3 @@
 		return 0;
 	return 1;
 }
-
--- a/parse/dump.c
+++ b/parse/dump.c
@@ -186,6 +186,7 @@
 		findentf(fd, depth + 1, "isimport=%d\n", n->decl.isimport);
 		findentf(fd, depth + 1, "isnoret=%d\n", n->decl.isnoret);
 		findentf(fd, depth + 1, "isexportinit=%d\n", n->decl.isexportinit);
+		findentf(fd, depth + 1, "isauto=%d\n", n->decl.isauto);
 		findentf(fd, depth, ")\n");
 		outsym(n, fd, depth + 1);
 		outnode(n->decl.init, fd, depth + 1);
--- a/parse/gram.y
+++ b/parse/gram.y
@@ -94,6 +94,7 @@
 %token<tok> Tgoto	/* goto */
 %token<tok> Tbreak	/* break */
 %token<tok> Tcontinue	/* continue */
+%token<tok> Tauto	/* auto */
 
 %token<tok> Tintlit
 %token<tok> Tstrlit
@@ -144,7 +145,7 @@
 %type<node> littok literal lorexpr landexpr borexpr strlit bandexpr
 %type<node> cmpexpr addexpr mulexpr shiftexpr prefixexpr
 %type<node> postfixexpr funclit seqlit tuplit name block stmt label
-%type<node> use fnparam declbody declcore typedeclcore structent
+%type<node> use fnparam declbody declcore typedeclcore autodecl structent
 %type<node> arrayelt structelt tuphead ifstmt forstmt whilestmt
 %type<node> matchstmt elifs optexprln loopcond optexpr match
 
@@ -366,8 +367,8 @@
 	}
 	;
 
-declbody: declcore Tasn expr {$$ = $1; $1->decl.init = $3;}
-	| declcore
+declbody: autodecl Tasn expr {$$ = $1; $1->decl.init = $3;}
+	| autodecl
 	;
 
 declcore: name {$$ = mkdecl($1->loc, $1, mktyvar($1->loc));}
@@ -378,6 +379,10 @@
 	: name Tcolon type {$$ = mkdecl($1->loc, $1, $3);}
 	;
 
+autodecl: Tauto declcore {$$ = $2; $$->decl.isauto = 1;}
+	| declcore
+	;
+
 name    : Tident {$$ = mkname($1->loc, $1->id);}
 	| Tident Tdot Tident {
 		$$ = mknsname($3->loc, $1->id, $3->id);
@@ -859,7 +864,7 @@
 	| /* empty */ {$$.nl = NULL; $$.nn = 0;}
 	;
 
-fnparam : declcore {$$ = $1;}
+fnparam : autodecl {$$ = $1;}
 	| Tgap { $$ = mkpseudodecl($1->loc, mktyvar($1->loc)); }
 	| Tgap Tcolon type { $$ = mkpseudodecl($1->loc, $3); }
 	;
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -232,6 +232,24 @@
 }
 
 static void
+adddispspecialization(Node *n, Stab *stab)
+{
+	Trait *tr;
+	Type *ty;
+
+	tr = traittab[Tcdisp];
+	ty = decltype(n);
+	if (!ty->traits || !bshas(ty->traits, Tcdisp))
+		return;
+	assert(tr->nproto == 1);
+	if (hthas(tr->proto[0]->decl.impls, ty))
+		return;
+	lappend(&specializationscope, &nspecializationscope, stab);
+	lappend(&specializations, &nspecializations, n);
+	lappend(&genericdecls, &ngenericdecls, tr->proto[0]);
+}
+
+static void
 additerspecializations(Node *n, Stab *stab)
 {
 	Trait *tr;
@@ -1876,6 +1894,8 @@
 		if (hasparams(type(n)) && !ingeneric)
 			fatal(n, "generic type %s in non-generic near %s", tystr(type(n)),
 					ctxstr(n));
+		if (n->decl.isauto)
+			constrain(n, type(n), traittab[Tcdisp]);
 		popenv(n->decl.env);
 		indentdepth--;
 		if (debugopt['u'])
@@ -2371,6 +2391,8 @@
 		if (streq(declname(n), "__init__"))
 			if (!initcompatible(tybase(decltype(n))))
 				fatal(n, "__init__ must be (->void), got %s", tystr(decltype(n)));
+		if (n->decl.isauto)
+			adddispspecialization(n, curstab());
 		popenv(n->decl.env);
 		break;
 	case Nblock:
@@ -2468,7 +2490,7 @@
 specialize(Node *f)
 {
 	Node *d, *n, *name;
-	Type *ty, *it;
+	Type *ty, *it, *dt;
 	size_t i;
 	Trait *tr;
 
@@ -2501,6 +2523,14 @@
 			it = itertype(n->iterstmt.seq, mktype(n->loc, Tyvoid));
 			d = specializedcl(tr->proto[1], ty, it, &name);
 			htput(tr->proto[1]->decl.impls, ty, d);
+		} else if (n->type == Ndecl && n->decl.isauto) {
+			tr = traittab[Tcdisp];
+			assert(tr->nproto == 1);
+			ty = decltype(n);
+			dt = mktyfunc(n->loc, NULL, 0, mktype(n->loc, Tyvoid));
+			lappend(&dt->sub, &dt->nsub, ty);
+			d = specializedcl(tr->proto[0], ty, dt, &name);
+			htput(tr->proto[0]->decl.impls, ty, d);
 		} else {
 			die("unknown node for specialization\n");
 		}
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -85,6 +85,13 @@
 	size_t nsubst;
 };
 
+typedef enum {
+	Xcnt,
+	Xbrk,
+	Xret,
+	Nexits
+} Exit;
+
 struct Stab {
 	Stab *super;
 	char *name;
@@ -100,6 +107,11 @@
 	Htab *env;	/* the syms closed over, if this is a function stab */
 	Htab *lbl;	/* labels */
 	Htab *impl;	/* trait implementations: really a set of implemented traits. */
+
+	Node **autodcl;	/* declarations in dcl marked 'auto' */
+	size_t nautodcl;
+
+	Node *exit[Nexits];
 };
 
 struct Tyenv {
@@ -306,6 +318,7 @@
 			char isnoret;
 			char isexportinit;
 			char isinit;
+			char isauto;
 		} decl;
 
 		struct {
--- a/parse/stab.c
+++ b/parse/stab.c
@@ -453,6 +453,10 @@
 
 	st = findstab(st, s->decl.name);
 	old = htget(st->dcl, s->decl.name);
+	if (s->decl.isauto) {
+		assert(!old);
+		lappend(&st->autodcl, &st->nautodcl, s);
+	}
 	if (!old)
 		forcedcl(st, s);
 	else if (!mergedecl(old, s))
--- a/parse/tok.c
+++ b/parse/tok.c
@@ -178,6 +178,7 @@
 	} kwmap[] = {
 		{"$noret", Tattr},
 		{"_", Tgap},
+		{"auto", Tauto},
 		{"break", Tbreak},
 		{"const", Tconst},
 		{"continue", Tcontinue},
--- a/parse/trait.def
+++ b/parse/trait.def
@@ -6,3 +6,4 @@
 Tc(Tcslice,	"sliceable")    /* sliceable */
 Tc(Tcfunc,	"function")     /* behaves like a function */
 Tc(Tciter,	"iterable")     /* can be iterated over */
+Tc(Tcdisp,	"disposable")   /* automatically disposable */
--- a/parse/type.c
+++ b/parse/type.c
@@ -1034,6 +1034,35 @@
 }
 
 void
+disposableinit(Stab *st, Trait *tr)
+{
+	Node *func, *arg, **args;
+	Type *ty;
+	size_t nargs;
+
+	tr->param = mktyparam(Zloc, "a");
+	tr->naux = 0;
+
+	/* __dispose__ : (val : @a -> void) */
+	args = NULL;
+	nargs = 0;
+	arg = mkdecl(Zloc, mkname(Zloc, "val"), mktyparam(Zloc, "a"));
+	lappend(&args, &nargs, arg);
+	ty = mktyfunc(Zloc, args, nargs, mktype(Zloc, Tyvoid));
+
+	func = mkdecl(Zloc, mkname(Zloc, "__dispose__"), ty);
+	func->decl.trait = tr;
+	func->decl.impls = mkht(tyhash, tyeq);
+	func->decl.isgeneric = 1;
+	func->decl.isconst = 1;
+	func->decl.isglobl = 1;
+	func->decl.isextern = 1;
+
+	lappend(&tr->proto, &tr->nproto, func);
+	putdcl(st, func);
+}
+
+void
 tyinit(Stab *st)
 {
 	int i;
@@ -1106,5 +1135,6 @@
 	 * comes last, since this needs both the types and the traits set up
 	 */
 	iterableinit(st, traittab[Tciter]);
+	disposableinit(st, traittab[Tcdisp]);
 
 }