ref: e4cb42e20e8af9db7d56082b58000bf245e47612
parent: 6df541775b1cd4eb2d9bd3698d68c5cd6d42736b
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Sun Nov 10 22:07:40 EST 2024
reader: move buffer & co onto the stack
--- a/read.c
+++ b/read.c
@@ -11,13 +11,18 @@
TOK_OPENC, TOK_CLOSEC,
};
-static value_t do_read_sexpr(value_t label);
+typedef struct Rctx Rctx;
+struct Rctx {
+ uint32_t toktype;
+ value_t tokval;
+ char buf[256];
+};
+
+static value_t do_read_sexpr(Rctx *ctx, value_t label);
+
#if defined(__plan9__)
static int errno;
-#define VLONG_MAX ~(1LL<<63)
-#define VLONG_MIN (1LL<<63)
-#define UVLONG_MAX (~0ULL)
static mpint *mp_vlong_min, *mp_vlong_max, *mp_uvlong_max;
#endif
@@ -31,17 +36,17 @@
errno = 0;
x = strtoll(nptr, rptr, base);
#if defined(__plan9__)
- if((x != VLONG_MAX && x != VLONG_MIN) || *rptr == nptr)
+ if((x != INT64_MAX && x != INT64_MIN) || *rptr == nptr)
return x;
mpint *c;
m = strtomp(nptr, rptr, base, nil);
- if(x == VLONG_MAX){
+ if(x == INT64_MAX){
if(mp_vlong_max == nil)
- mp_vlong_max = vtomp(VLONG_MAX, nil);
+ mp_vlong_max = vtomp(INT64_MAX, nil);
c = mp_vlong_max;
}else{
if(mp_vlong_min == nil)
- mp_vlong_min = vtomp(VLONG_MIN, nil);
+ mp_vlong_min = vtomp(INT64_MIN, nil);
c = mp_vlong_min;
}
if(mpcmp(c, m) == 0){
@@ -67,11 +72,11 @@
errno = 0;
x = strtoull(nptr, rptr, base);
#if defined(__plan9__)
- if(x != UVLONG_MAX || *rptr == nptr)
+ if(x != INT64_MAX || *rptr == nptr)
return x;
m = strtomp(nptr, rptr, base, nil);
if(mp_uvlong_max == nil)
- mp_uvlong_max = uvtomp(UVLONG_MAX, nil);
+ mp_uvlong_max = uvtomp(INT64_MAX, nil);
if(mpcmp(mp_uvlong_max, m) == 0){
mpfree(m);
m = nil;
@@ -159,10 +164,6 @@
return isnumtok_base(tok, pval, base);
}
-static uint32_t toktype = TOK_NONE;
-static value_t tokval;
-static char buf[256];
-
static char
nextchar(void)
{
@@ -193,22 +194,22 @@
}
static void
-take(void)
+take(Rctx *ctx)
{
- toktype = TOK_NONE;
+ ctx->toktype = TOK_NONE;
}
static void
-accumchar(char c, int *pi)
+accumchar(Rctx *ctx, char c, int *pi)
{
- buf[(*pi)++] = c;
- if(*pi >= (int)(sizeof(buf)-1))
+ ctx->buf[(*pi)++] = c;
+ if(*pi >= (int)(sizeof(ctx->buf)-1))
lerrorf(FL(ParseError), "token too long");
}
// return: 1 if escaped (forced to be symbol)
static int
-read_token(char c, int digits)
+read_token(Rctx *ctx, char c, int digits)
{
int i = 0, ch, escaped = 0, issym = 0, nc = 0;
@@ -230,11 +231,11 @@
ch = ios_peekc(RS);
if(ch == IOS_EOF)
goto terminate;
- accumchar((char)ch, &i);
+ accumchar(ctx, (char)ch, &i);
}else if(!escaped && !(symchar(c) && (!digits || isdigit(c)))){
break;
}else{
- accumchar(c, &i);
+ accumchar(ctx, c, &i);
}
nc++;
}
@@ -241,7 +242,7 @@
if(nc == 0)
ios_skip(RS, -1);
terminate:
- buf[i++] = '\0';
+ ctx->buf[i++] = '\0';
return issym;
}
@@ -254,98 +255,98 @@
}
static uint32_t
-peek(void)
+peek(Rctx *ctx)
{
char c, *end;
fixnum_t x;
int ch, base;
- if(toktype != TOK_NONE)
- return toktype;
+ if(ctx->toktype != TOK_NONE)
+ return ctx->toktype;
c = nextchar();
if(ios_eof(RS))
return TOK_NONE;
if(c == '(')
- toktype = TOK_OPEN;
+ ctx->toktype = TOK_OPEN;
else if(c == ')')
- toktype = TOK_CLOSE;
+ ctx->toktype = TOK_CLOSE;
else if(c == '[')
- toktype = TOK_OPENB;
+ ctx->toktype = TOK_OPENB;
else if(c == ']')
- toktype = TOK_CLOSEB;
+ ctx->toktype = TOK_CLOSEB;
else if(c == '{')
- toktype = TOK_OPENC;
+ ctx->toktype = TOK_OPENC;
else if(c == '}')
- toktype = TOK_CLOSEC;
+ ctx->toktype = TOK_CLOSEC;
else if(c == '\'')
- toktype = TOK_QUOTE;
+ ctx->toktype = TOK_QUOTE;
else if(c == '`')
- toktype = TOK_BQ;
+ ctx->toktype = TOK_BQ;
else if(c == '"')
- toktype = TOK_DOUBLEQUOTE;
+ ctx->toktype = TOK_DOUBLEQUOTE;
else if(c == '#'){
ch = ios_getc(RS); c = (char)ch;
if(ch == IOS_EOF)
lerrorf(FL(ParseError), "invalid read macro");
if(c == '.')
- toktype = TOK_SHARPDOT;
+ ctx->toktype = TOK_SHARPDOT;
else if(c == '\'')
- toktype = TOK_SHARPQUOTE;
+ ctx->toktype = TOK_SHARPQUOTE;
else if(c == '\\'){
Rune cval;
if(ios_getutf8(RS, &cval) == IOS_EOF)
lerrorf(FL(ParseError), "end of input in character constant");
if(cval == 'u' || cval == 'U' || cval == 'x'){
- read_token('u', 0);
- if(buf[1] != '\0'){ // not a solitary 'u','U','x'
- if(!read_numtok(&buf[1], &tokval, 16))
+ read_token(ctx, 'u', 0);
+ if(ctx->buf[1] != '\0'){ // not a solitary 'u','U','x'
+ if(!read_numtok(&ctx->buf[1], &ctx->tokval, 16))
lerrorf(FL(ParseError), "invalid hex character constant");
- cval = numval(tokval);
+ cval = numval(ctx->tokval);
}
}else if(cval >= 'a' && cval <= 'z'){
- read_token((char)cval, 0);
- tokval = symbol(buf);
- if(buf[1] == '\0') USED(cval); /* one character */
- else if(tokval == FL(nulsym)) cval = 0x00;
- else if(tokval == FL(alarmsym)) cval = 0x07;
- else if(tokval == FL(backspacesym)) cval = 0x08;
- else if(tokval == FL(tabsym)) cval = 0x09;
- else if(tokval == FL(linefeedsym)) cval = 0x0A;
- else if(tokval == FL(newlinesym)) cval = 0x0A;
- else if(tokval == FL(vtabsym)) cval = 0x0B;
- else if(tokval == FL(pagesym)) cval = 0x0C;
- else if(tokval == FL(returnsym)) cval = 0x0D;
- else if(tokval == FL(escsym)) cval = 0x1B;
- else if(tokval == FL(spacesym)) cval = 0x20;
- else if(tokval == FL(deletesym)) cval = 0x7F;
+ read_token(ctx, (char)cval, 0);
+ ctx->tokval = symbol(ctx->buf);
+ if(ctx->buf[1] == '\0') USED(cval); /* one character */
+ else if(ctx->tokval == FL(nulsym)) cval = 0x00;
+ else if(ctx->tokval == FL(alarmsym)) cval = 0x07;
+ else if(ctx->tokval == FL(backspacesym)) cval = 0x08;
+ else if(ctx->tokval == FL(tabsym)) cval = 0x09;
+ else if(ctx->tokval == FL(linefeedsym)) cval = 0x0A;
+ else if(ctx->tokval == FL(newlinesym)) cval = 0x0A;
+ else if(ctx->tokval == FL(vtabsym)) cval = 0x0B;
+ else if(ctx->tokval == FL(pagesym)) cval = 0x0C;
+ else if(ctx->tokval == FL(returnsym)) cval = 0x0D;
+ else if(ctx->tokval == FL(escsym)) cval = 0x1B;
+ else if(ctx->tokval == FL(spacesym)) cval = 0x20;
+ else if(ctx->tokval == FL(deletesym)) cval = 0x7F;
else
- lerrorf(FL(ParseError), "unknown character #\\%s", buf);
+ lerrorf(FL(ParseError), "unknown character #\\%s", ctx->buf);
}
- toktype = TOK_NUM;
- tokval = mk_rune(cval);
+ ctx->toktype = TOK_NUM;
+ ctx->tokval = mk_rune(cval);
}else if(c == '('){
- toktype = TOK_SHARPOPEN;
+ ctx->toktype = TOK_SHARPOPEN;
}else if(c == '<'){
lerrorf(FL(ParseError), "unreadable object");
}else if(isdigit(c)){
- read_token(c, 1);
+ read_token(ctx, c, 1);
c = (char)ios_getc(RS);
if(c == '#')
- toktype = TOK_BACKREF;
+ ctx->toktype = TOK_BACKREF;
else if(c == '=')
- toktype = TOK_LABEL;
+ ctx->toktype = TOK_LABEL;
else
lerrorf(FL(ParseError), "invalid label");
- x = strtoll(buf, &end, 10);
+ x = strtoll(ctx->buf, &end, 10);
if(*end != '\0')
lerrorf(FL(ParseError), "invalid label");
- tokval = fixnum(x);
+ ctx->tokval = fixnum(x);
}else if(c == '!'){
// #! single line comment for shbang script support
do{
ch = ios_getc(RS);
}while(ch != IOS_EOF && (char)ch != '\n');
- return peek();
+ return peek(ctx);
}else if(c == '|'){
// multiline comment
int commentlevel = 1;
@@ -373,62 +374,62 @@
}
}
// this was whitespace, so keep peeking
- return peek();
+ return peek(ctx);
}else if(c == ';'){
// datum comment
- (void)do_read_sexpr(UNBOUND); // skip
- return peek();
+ (void)do_read_sexpr(ctx, UNBOUND); // skip
+ return peek(ctx);
}else if(c == ':'){
// gensym
ch = ios_getc(RS);
if((char)ch == 'g')
ch = ios_getc(RS);
- read_token((char)ch, 0);
- x = strtol(buf, &end, 10);
- if(*end != '\0' || buf[0] == '\0')
+ read_token(ctx, (char)ch, 0);
+ x = strtol(ctx->buf, &end, 10);
+ if(*end != '\0' || ctx->buf[0] == '\0')
lerrorf(FL(ParseError), "invalid gensym label");
- toktype = TOK_GENSYM;
- tokval = fixnum(x);
+ ctx->toktype = TOK_GENSYM;
+ ctx->tokval = fixnum(x);
}else if(symchar(c)){
- read_token(ch, 0);
+ read_token(ctx, ch, 0);
if(((c == 'b' && (base = 2)) ||
(c == 'o' && (base = 8)) ||
(c == 'd' && (base = 10)) ||
- (c == 'x' && (base = 16))) && (isdigit_base(buf[1], base) || buf[1] == '-')){
- if(!read_numtok(&buf[1], &tokval, base))
+ (c == 'x' && (base = 16))) && (isdigit_base(ctx->buf[1], base) || ctx->buf[1] == '-')){
+ if(!read_numtok(&ctx->buf[1], &ctx->tokval, base))
lerrorf(FL(ParseError), "invalid base %d constant", base);
- return (toktype = TOK_NUM);
+ return (ctx->toktype = TOK_NUM);
}
- toktype = TOK_SHARPSYM;
- tokval = symbol(buf);
+ ctx->toktype = TOK_SHARPSYM;
+ ctx->tokval = symbol(ctx->buf);
}else{
lerrorf(FL(ParseError), "unknown read macro");
}
}else if(c == ','){
- toktype = TOK_COMMA;
+ ctx->toktype = TOK_COMMA;
ch = ios_peekc(RS);
if(ch == IOS_EOF)
- return toktype;
+ return ctx->toktype;
if((char)ch == '@')
- toktype = TOK_COMMAAT;
+ ctx->toktype = TOK_COMMAAT;
else if((char)ch == '.')
- toktype = TOK_COMMADOT;
+ ctx->toktype = TOK_COMMADOT;
else
- return toktype;
+ return ctx->toktype;
ios_getc(RS);
}else{
- if(!read_token(c, 0)){
- if(buf[0] == '.' && buf[1] == '\0')
- return (toktype = TOK_DOT);
- if(read_numtok(buf, &tokval, 0))
- return (toktype = TOK_NUM);
+ if(!read_token(ctx, c, 0)){
+ if(ctx->buf[0] == '.' && ctx->buf[1] == '\0')
+ return (ctx->toktype = TOK_DOT);
+ if(read_numtok(ctx->buf, &ctx->tokval, 0))
+ return (ctx->toktype = TOK_NUM);
}
- toktype = TOK_SYM;
- tokval = symbol(strcmp(buf, "lambda") == 0 ? "λ" : buf);
+ ctx->toktype = TOK_SYM;
+ ctx->tokval = symbol(strcmp(ctx->buf, "lambda") == 0 ? "λ" : ctx->buf);
}
- return toktype;
+ return ctx->toktype;
}
// NOTE: this is NOT an efficient operation. it is only used by the
@@ -455,7 +456,7 @@
}
static value_t
-read_vector(value_t label, uint32_t closer)
+read_vector(Rctx *ctx, value_t label, uint32_t closer)
{
value_t v = FL(the_empty_vector), elt;
uint32_t i = 0;
@@ -462,7 +463,7 @@
PUSH(v);
if(label != UNBOUND)
ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)v);
- while(peek() != closer){
+ while(peek(ctx) != closer){
if(ios_eof(RS))
lerrorf(FL(ParseError), "unexpected end of input");
if(i >= vector_size(v)){
@@ -470,13 +471,13 @@
if(label != UNBOUND)
ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)v);
}
- elt = do_read_sexpr(UNBOUND);
+ elt = do_read_sexpr(ctx, UNBOUND);
v = FL(stack)[FL(sp)-1];
assert(i < vector_size(v));
vector_elt(v, i) = elt;
i++;
}
- take();
+ take(ctx);
if(i > 0)
vector_setsize(v, i);
return POP();
@@ -572,7 +573,7 @@
// can move whenever a new cons is allocated. we have to refer to every cons
// through a handle to a relocatable pointer (i.e. a pointer on the stack).
static void
-read_list(value_t *pval, value_t label, uint32_t closer)
+read_list(Rctx *ctx, value_t *pval, value_t label, uint32_t closer)
{
value_t c, *pc;
uint32_t t;
@@ -579,7 +580,7 @@
PUSH(FL(Nil));
pc = &FL(stack)[FL(sp)-1]; // to keep track of current cons cell
- t = peek();
+ t = peek(ctx);
while(t != closer){
if(ios_eof(RS))
lerrorf(FL(ParseError), "unexpected end of input");
@@ -592,19 +593,19 @@
ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)c);
}
*pc = c;
- c = do_read_sexpr(UNBOUND); // must be on separate lines due to
+ c = do_read_sexpr(ctx, UNBOUND); // must be on separate lines due to
car_(*pc) = c; // undefined evaluation order
- t = peek();
+ t = peek(ctx);
if(t == TOK_DOT){
- take();
- c = do_read_sexpr(UNBOUND);
+ take(ctx);
+ c = do_read_sexpr(ctx, UNBOUND);
cdr_(*pc) = c;
- t = peek();
+ t = peek(ctx);
if(ios_eof(RS))
lerrorf(FL(ParseError), "unexpected end of input");
if(t != closer){
- take();
+ take(ctx);
lerrorf(
FL(ParseError),
"expected '%c'",
@@ -613,7 +614,7 @@
}
}
}
- take();
+ take(ctx);
c = POP();
USED(c);
}
@@ -620,7 +621,7 @@
// label is the backreference we'd like to fix up with this read
static value_t
-do_read_sexpr(value_t label)
+do_read_sexpr(Rctx *ctx, value_t label)
{
value_t v, sym, oldtokval, *head;
value_t *pv;
@@ -627,8 +628,8 @@
uint32_t t;
char c;
- t = peek();
- take();
+ t = peek(ctx);
+ take(ctx);
switch(t){
case TOK_CLOSE:
lerrorf(FL(ParseError), "unexpected ')'");
@@ -640,7 +641,7 @@
lerrorf(FL(ParseError), "unexpected '.'");
case TOK_SYM:
case TOK_NUM:
- return tokval;
+ return ctx->tokval;
case TOK_COMMA:
head = &FL(comma); goto listwith;
case TOK_COMMAAT:
@@ -659,26 +660,26 @@
PUSH(v);
if(label != UNBOUND)
ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)v);
- v = do_read_sexpr(UNBOUND);
+ v = do_read_sexpr(ctx, UNBOUND);
car_(cdr_(FL(stack)[FL(sp)-1])) = v;
return POP();
case TOK_SHARPQUOTE:
// femtoLisp doesn't need symbol-function, so #' does nothing
- return do_read_sexpr(label);
+ return do_read_sexpr(ctx, label);
case TOK_OPEN:
PUSH(FL(Nil));
- read_list(&FL(stack)[FL(sp)-1], label, TOK_CLOSE);
+ read_list(ctx, &FL(stack)[FL(sp)-1], label, TOK_CLOSE);
return POP();
case TOK_OPENB:
PUSH(FL(Nil));
- read_list(&FL(stack)[FL(sp)-1], label, TOK_CLOSEB);
+ read_list(ctx, &FL(stack)[FL(sp)-1], label, TOK_CLOSEB);
return POP();
case TOK_OPENC:
PUSH(FL(Nil));
- read_list(&FL(stack)[FL(sp)-1], label, TOK_CLOSEC);
+ read_list(ctx, &FL(stack)[FL(sp)-1], label, TOK_CLOSEC);
return POP();
case TOK_SHARPSYM:
- sym = tokval;
+ sym = ctx->tokval;
if(sym == FL(tsym) || sym == FL(Tsym))
return FL(t);
if(sym == FL(fsym) || sym == FL(Fsym))
@@ -686,11 +687,11 @@
// constructor notation
c = nextchar();
if(c != '('){
- take();
- lerrorf(FL(ParseError), "expected argument list for %s", symbol_name(tokval));
+ take(ctx);
+ lerrorf(FL(ParseError), "expected argument list for %s", symbol_name(ctx->tokval));
}
PUSH(FL(Nil));
- read_list(&FL(stack)[FL(sp)-1], UNBOUND, TOK_CLOSE);
+ read_list(ctx, &FL(stack)[FL(sp)-1], UNBOUND, TOK_CLOSE);
if(sym == FL(vu8sym)){
sym = FL(arraysym);
FL(stack)[FL(sp)-1] = fl_cons(FL(uint8sym), FL(stack)[FL(sp)-1]);
@@ -702,7 +703,7 @@
unbound_error(sym);
return fl_apply(v, POP());
case TOK_SHARPOPEN:
- return read_vector(label, TOK_CLOSE);
+ return read_vector(ctx, label, TOK_CLOSE);
case TOK_SHARPDOT:
// eval-when-read
// evaluated expressions can refer to existing backreferences, but they
@@ -709,7 +710,7 @@
// cannot see pending labels. in other words:
// (... #2=#.#0# ... ) OK
// (... #2=#.(#2#) ... ) DO NOT WANT
- sym = do_read_sexpr(UNBOUND);
+ sym = do_read_sexpr(ctx, UNBOUND);
if(issymbol(sym)){
v = symbol_value(sym);
if(v == UNBOUND)
@@ -719,20 +720,20 @@
return fl_toplevel_eval(sym);
case TOK_LABEL:
// create backreference label
- if(ptrhash_has(&FL(readstate)->backrefs, (void*)tokval))
- lerrorf(FL(ParseError), "label %"PRIdPTR" redefined", numval(tokval));
- oldtokval = tokval;
- v = do_read_sexpr(tokval);
+ if(ptrhash_has(&FL(readstate)->backrefs, (void*)ctx->tokval))
+ lerrorf(FL(ParseError), "label %"PRIdPTR" redefined", numval(ctx->tokval));
+ oldtokval = ctx->tokval;
+ v = do_read_sexpr(ctx, ctx->tokval);
ptrhash_put(&FL(readstate)->backrefs, (void*)oldtokval, (void*)v);
return v;
case TOK_BACKREF:
// look up backreference
- v = (value_t)ptrhash_get(&FL(readstate)->backrefs, (void*)tokval);
+ v = (value_t)ptrhash_get(&FL(readstate)->backrefs, (void*)ctx->tokval);
if(v == (value_t)HT_NOTFOUND)
- lerrorf(FL(ParseError), "undefined label %"PRIdPTR, numval(tokval));
+ lerrorf(FL(ParseError), "undefined label %"PRIdPTR, numval(ctx->tokval));
return v;
case TOK_GENSYM:
- pv = (value_t*)ptrhash_bp(&FL(readstate)->gensyms, (void*)tokval);
+ pv = (value_t*)ptrhash_bp(&FL(readstate)->gensyms, (void*)ctx->tokval);
if(*pv == (value_t)HT_NOTFOUND)
*pv = gensym();
return *pv;
@@ -745,7 +746,6 @@
value_t
fl_read_sexpr(value_t f)
{
- value_t v;
fl_readstate_t state;
state.prev = FL(readstate);
htable_new(&state.backrefs, 8);
@@ -752,10 +752,11 @@
htable_new(&state.gensyms, 8);
state.source = f;
FL(readstate) = &state;
- assert(toktype == TOK_NONE);
- fl_gc_handle(&tokval);
+ Rctx ctx;
+ ctx.toktype = TOK_NONE;
+ fl_gc_handle(&ctx.tokval);
- v = do_read_sexpr(UNBOUND);
+ value_t v = do_read_sexpr(&ctx, UNBOUND);
fl_free_gc_handles(1);
FL(readstate) = state.prev;