shithub: sl

Download patch

ref: 064fc0481ad5568482744f2d66db0980e93eea95
parent: 98dfeb8e6bdc2d672c00df97709a115c0cd14755
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Apr 2 23:18:04 EDT 2025

unbox the runes, give it its own type

References: https://todo.sr.ht/~ft/sl/46

--- a/boot/sl.boot
+++ b/boot/sl.boot
@@ -402,8 +402,8 @@
                                                   #fn("n10B3F00<20C?00T21C8072061:23061:" #(thrown-value
   ffound caddr #fn(raise))) str-join #fn(map) str reverse! "/" "λ") fname) reverse! length>
   list-tail *interactive* filter closure? #fn(map) #fn("n10Z;380420061:" #(#fn(top-level-value)))
-  #fn(environment) #fn(for-each) #fn("n17021A<0KGF52524222374051==52470257652492<El23?0770KG0EG52@30q49292<KM_:" #(princ
-  "(" #fn(for-each) #fn("n1702151472061:" #(princ " " print)) vec->list ")" *linefeed* fn-disasm))) print-stack-trace)
+  #fn(environment) #fn(for-each) #fn("n17021A<0KGF52524222374051==5247025765249292<KM_:" #(princ "("
+  #fn(for-each) #fn("n1702151472061:" #(princ " " print)) vec->list ")" *linefeed*))) print-stack-trace)
             print-to-str #fn("z02050212285>10524238561:" #(#fn(buffer)
                                                            #fn(for-each)
                                                            #fn("n1200A62:" #(#fn(write)))
@@ -439,8 +439,7 @@
             self-evaluating? #fn("n120051S;3Z040H;36040RS;JK0421051;3A040R;3:04022051Q:" #(#fn(gensym?)
   #fn(const?) #fn(top-level-value)) self-evaluating?)
             separate-doc-from-body #fn("\x8710002000W1000J60q?14I2021?65140<0=2287513F01JB0883=07388871P62:13X02487513O086258751513B07388=8788<P1P62:761510P:" #(#0#
-  #fn("n1r520051L2;3a040KG21l2;3U040r2G22l2;3H040r3G23l2;3;040r4G24l2:" #(#fn(length) #\d #\o #\c
-                                                                          #\-) doc?)
+  #fn("n1r520051L2;3]040KG21Q;3R040r2G22Q;3F040r3G23Q;3:040r4G24Q:" #(#fn(length) #\d #\o #\c #\-) doc?)
   #fn(str?) separate-doc-from-body #fn(keyword?) #fn(str) reverse) separate-doc-from-body)
             set-syntax! #fn("n220710163:" #(#fn(put!) *syntax-environment*) set-syntax!) sort
             #fn("O200010003000W2000J7071?240=J400:0<7223182870>42418287>362:" #(#(:key 0) identity
@@ -468,7 +467,7 @@
                                                                                         #fn(str-length)
                                                                                         #fn(str-sub)) str-trim)
             sym-set-doc #fn("z220151873601@401<87360q@401=21Z3\xb40883\xaf0228823528:<8:=74258<528=;3H04268=5126778=28295351~8=;3?042:2;8>>18<52718;8?P23527<02=8@534893>07<02>8953@30q^1^1^1^1^1^1^1@30q482B3[07?02@527A2B8:>182527<02@2C8:8;5253^1^1@30q47D60:" #(#fn(str?)
-  str-join #fn(str-split) "\n" any #fn("n1E20051L2;3@040EG21l2;34040:" #(#fn(length) #\space))
+  str-join #fn(str-split) "\n" any #fn("n1E20051L2;3?040EG21Q;34040:" #(#fn(length) #\space))
   #fn(length) str-trim " " "" #fn(map) #fn("n170A2105152390220A62:0:" #(<= #fn(length)
                                                                         #fn(str-sub))) putprop
   *doc* *doc-extra* getprop *formals-list* filter #fn("n1700A52S:" #(member))
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -100,12 +100,12 @@
 		n += llength(v);
 		return size_wrap(n);
 	}
+	if(isrune(a))
+		return fixnum(runelen(torune(a)));
 	if(iscprim(a)){
 		cv = ptr(a);
 		if(cp_class(cv) == sl_utf8type)
 			return fixnum(1);
-		if(cp_class(cv) == sl_runetype)
-			return fixnum(runelen(*(Rune*)cp_data(cv)));
 	}
 	if(iscvalue(a) && cv_class(ptr(a))->eltype != nil)
 		return size_wrap(cvalue_arrlen(a));
@@ -274,8 +274,10 @@
 	sl_v v = args[0];
 	if(isfixnum(v))
 		return v;
-	if(isubnum(v))
-		return isubnumu(v) ? fixnum(ubnumuval(v)) : fixnum(ubnumsval(v));
+	if(isubnumu(v))
+		return fixnum(ubnumuval(v));
+	if(isubnums(v))
+		return fixnum(ubnumsval(v));
 	if(iscprim(v)){
 		void *p = ptr(v);
 		return fixnum(conv_to_s64(cp_data(p), cp_numtype(p)));
@@ -353,9 +355,11 @@
 todouble(sl_v a)
 {
 	if(isfixnum(a))
-		return (double)numval(a);
-	if(isubnum(a))
-		return isubnumu(a) ? (double)ubnumuval(a) : (double)ubnumsval(a);
+		return numval(a);
+	if(isubnumu(a))
+		return ubnumuval(a);
+	if(isubnums(a))
+		return ubnumsval(a);
 	if(iscprim(a)){
 		sl_cprim *cp = ptr(a);
 		sl_numtype nt = cp_numtype(cp);
--- a/src/cvalues.c
+++ b/src/cvalues.c
@@ -200,6 +200,31 @@
 	cv_autorelease(cv);
 }
 
+static void
+cvalue_rune_init(sl_type *type, sl_v a, void *dest)
+{
+	Rune r;
+	USED(type);
+	if(isfixnum(a)){
+		r = numval(a);
+	}else if(isrune(a)){
+		r = torune(a);
+	}else if(isubnumu(a)){
+		uintptr v = ubnumuval(a);
+		r = conv_to_u32(&v, ubnumtype(a));
+	}else if(isubnums(a)){
+		intptr v = ubnumsval(a);
+		r = conv_to_u32(&v, ubnumtype(a));
+	}else if(iscprim(a)){
+		sl_cprim *cp = ptr(a);
+		r = conv_to_u32(cp_data(cp), cp_numtype(cp));
+	}else if(ismp(a)){
+		r = conv_to_u32(cv_data(ptr(a)), T_MP);
+	}else
+		type_error("num", a);
+	*((Rune*)dest) = r;
+}
+
 #define num_init(ctype, cnvt, tag) \
 	static void \
 	cvalue_##ctype##_init(sl_type *type, sl_v a, void *dest) \
@@ -235,6 +260,17 @@
 num_init(float, double, T_FLOAT)
 num_init(double, double, T_DOUBLE)
 
+BUILTIN("rune", rune)
+{
+	if(nargs == 0){
+		PUSH(fixnum(0));
+		args = sl.sp-1;
+	}
+	Rune r;
+	cvalue_rune_init(sl_runetype, args[0], &r);
+	return mk_rune(r);
+}
+
 #define num_ctor_init(typenam, ctype, tag) \
 	static \
 	BUILTIN(#typenam, typenam) \
@@ -258,11 +294,10 @@
 		} \
 		sl_v v; \
 		if(tag < T_UNBOXED_NUM && \
-		   sl_##typenam##type != sl_runetype && \
 		   sizeof(ctype) < sizeof(sl_v)){ \
 			ctype u; \
 			cvalue_##ctype##_init(sl_##typenam##type, args[0], &u); \
-			v = (sl_v)u<<8 | tag<<4 | TAG_UNBOXED; \
+			v = (sl_v)u<<TAG_EXT_BITS | tag<<4 | TAG_UNBOXED; \
 		}else{ \
 			v = cprim(sl_##typenam##type, sizeof(ctype)); \
 			cvalue_##ctype##_init(sl_##typenam##type, args[0], cp_data(ptr(v))); \
@@ -279,17 +314,15 @@
 		return v; \
 	}
 
-#define UNBOXED_NUM_BITS (FIXNUM_BITS+TAG_BITS-8)
-
 #define num_ctor_ctor_unboxed(typenam, ctype, tag) \
+	sl_constfn \
 	sl_v \
 	mk_##typenam(ctype n) \
 	{ \
 		sl_v v; \
 		if(tag < T_UNBOXED_NUM && \
-		   sl_##typenam##type != sl_runetype && \
-		   (sizeof(n) < sizeof(sl_v) || fits_bits(n, UNBOXED_NUM_BITS))){ \
-			v = n<<8 | tag<<4 | TAG_UNBOXED; \
+		   (sizeof(n) < sizeof(sl_v) || fits_bits(n, UNBOXED_BITS))){ \
+			v = n<<TAG_EXT_BITS | tag<<4 | TAG_UNBOXED; \
 		}else{ \
 			v = cprim(sl_##typenam##type, sizeof(n)); \
 			*(ctype*)cp_data(ptr(v)) = n; \
@@ -316,7 +349,6 @@
 num_ctor_init(utf8, u8int, T_U8)
 num_ctor(float, float, T_FLOAT)
 num_ctor(double, double, T_DOUBLE)
-num_ctor(rune, u32int, T_U32)
 
 static void
 cvalue_mp_init(sl_type *type, sl_v a, void *dest)
@@ -376,6 +408,7 @@
 
 static sl_cvtable mp_vtable = { nil, nil, free_mp, nil };
 
+sl_constfn
 sl_v
 size_wrap(usize sz)
 {
@@ -568,7 +601,9 @@
 {
 	sl_sym *s;
 
-	if(issym(type) && (s = ptr(type)) != nil && valid_numtype(s->numtype))
+	if(issym(type) &&
+	   (s = ptr(type)) != nil &&
+	   (valid_numtype(s->numtype) || type == sl_runesym))
 		return s->size;
 
 	if(iscons(type)){
@@ -602,6 +637,12 @@
 		*psz = unboxedtypes[ubnumtype(v)]->size;
 		return;
 	}
+	if(isrune(v)){
+		Rune r = torune(v);
+		*pdata = (u8int*)u;
+		*psz = runetochar((char*)u, &r);
+		return;
+	}
 	if(iscvalue(v)){
 		sl_cv *pcv = ptr(v);
 		sl_ios *x;
@@ -640,27 +681,33 @@
 sl_purefn
 BUILTIN("typeof", typeof)
 {
+	sl_v v = args[0];
 	argcount(nargs, 1);
-	switch(tag(args[0])){
+	switch(tag(v)){
 	case TAG_CONS: return sl_conssym;
 	case TAG_FIXNUM: return sl_fixnumsym;
-	case TAG_UNBOXED: return unboxedtypesyms[ubnumtype(args[0])];
+	case TAG_UNBOXED:
+		if(isubnum(v))
+			return unboxedtypesyms[ubnumtype(v)];
+		if(isrune(v))
+			return sl_runesym;
+		abort();
 	case TAG_SYM: return sl_symsym;
 	case TAG_VEC:return sl_vecsym;
 	case TAG_FN:
-		if(args[0] == sl_t)
+		if(v == sl_t)
 			return sl_booleansym;
-		if(args[0] == sl_nil)
+		if(v == sl_nil)
 			return sl_nullsym;
-		if(args[0] == sl_eof)
+		if(v == sl_eof)
 			return sl_eof;
-		if(args[0] == sl_void)
+		if(v == sl_void)
 			return sl_void;
-		if(isbuiltin(args[0]))
+		if(isbuiltin(v))
 			return sl_builtinsym;
 		return sl_fnsym;
 	}
-	return cv_type(ptr(args[0]));
+	return cv_type(ptr(v));
 }
 
 sl_v
@@ -729,6 +776,7 @@
 	argcount(nargs, 1);
 	return (isubnum(args[0]) ||
 	        iscprim(args[0]) ||
+	        isrune(args[0]) ||
 			(iscvalue(args[0]) && cv_isPOD(ptr(args[0])))) ?
 		sl_t : sl_nil;
 }
@@ -906,6 +954,7 @@
 
 #define RETURN_NUM_AS(var, type) return(mk_##type(var))
 
+sl_constfn
 sl_v
 return_from_u64(u64int Uaccum)
 {
@@ -1016,6 +1065,11 @@
 		*pp = pi;
 		*pt = ubnumtype(a);
 		return true;
+	}else if(isrune(a)){
+		*pi = torune(a);
+		*pp = pi;
+		*pt = T_U32;
+		return true;
 	}else if(iscprim(a)){
 		cp = ptr(a);
 		*pp = cp_data(cp);
@@ -1407,6 +1461,13 @@
 	htable_new(&slg.types, 256);
 	htable_new(&slg.reverse_dlsym_lookup, 256);
 
+	ctor_cv_intern(rune, NONNUMERIC, u32int);
+	sl_sym *sym = ptr(sl_runesym);
+	sym->numtype = NONNUMERIC;
+	sym->size = sizeof(Rune);
+	sl_runetype = get_type(sl_runesym);
+	sl_runetype->init = cvalue_rune_init;
+
 	sl_builtintype = define_opaque_type(sl_builtinsym, sizeof(builtin_t), nil, nil);
 
 	ctor_cv_intern(s8, T_S8, s8int);
@@ -1418,7 +1479,6 @@
 	ctor_cv_intern(s64, T_S64, s64int);
 	ctor_cv_intern(u64, T_U64, u64int);
 	ctor_cv_intern(utf8, T_U8, u8int);
-	ctor_cv_intern(rune, T_U32, u32int);
 	ctor_cv_intern(float, T_FLOAT, float);
 	ctor_cv_intern(double, T_DOUBLE, double);
 
@@ -1433,7 +1493,6 @@
 	mk_primtype(s64, s64int);
 	mk_primtype(u64, u64int);
 	mk_primtype(utf8, u8int);
-	mk_primtype(rune, u32int);
 	mk_primtype(float, float);
 	mk_primtype(double, double);
 
--- a/src/cvalues.h
+++ b/src/cvalues.h
@@ -50,7 +50,6 @@
 sl_v mk_u32(u32int n);
 sl_v mk_s64(s64int n);
 sl_v mk_u64(u64int n);
-sl_v mk_rune(Rune n);
 sl_v mk_mp(mpint *n);
 
 usize llength(sl_v v) sl_purefn;
--- a/src/equal.c
+++ b/src/equal.c
@@ -80,32 +80,18 @@
 	case TAG_FIXNUM:
 		if(isfixnum(b))
 			return (sl_fx)a < (sl_fx)b ? fixnum(-1) : fixnum(1);
-		if(isubnum(b))
+		if(isubnum(b) || iscprim(b) || (iscvalue(b) && (cv = ptr(b), valid_numtype(cv_numtype(cv)))))
 			return fixnum(numeric_compare(a, b, eq, true, false));
-		if(iscprim(b)){
-			if(cp_class(ptr(b)) == sl_runetype)
+		if(isrune(b))
 				return fixnum(1);
-			return fixnum(numeric_compare(a, b, eq, true, false));
-		}
-		if(iscvalue(b)){
-			cv = ptr(b);
-			if(valid_numtype(cv_numtype(cv)))
-				return fixnum(numeric_compare(a, b, eq, true, false));
-		}
 		return fixnum(-1);
 	case TAG_UNBOXED:
-		if(isfixnum(b) || isubnum(b))
-			return fixnum(numeric_compare(a, b, eq, true, false));
-		if(iscprim(b)){
-			if(cp_class(ptr(b)) == sl_runetype)
+		if(isrune(a))
+			return fixnum(isrune(b) && a == b ? 0 : -1);
+		if(isrune(b))
 				return fixnum(1);
+		if(isfixnum(b) || isubnum(b) || iscprim(b) || (iscvalue(b) && (cv = ptr(b), valid_numtype(cv_numtype(cv)))))
 			return fixnum(numeric_compare(a, b, eq, true, false));
-		}
-		if(iscvalue(b)){
-			cv = ptr(b);
-			if(valid_numtype(cv_numtype(cv)))
-				return fixnum(numeric_compare(a, b, eq, true, false));
-		}
 		return fixnum(-1);
 	case TAG_SYM:
 		if(eq || tagb < TAG_SYM)
@@ -118,11 +104,6 @@
 			return bounded_vec_compare(a, b, bound, eq);
 		break;
 	case TAG_CPRIM:
-		if(cp_class(ptr(a)) == sl_runetype){
-			if(!iscprim(b) || cp_class(ptr(b)) != sl_runetype)
-				return fixnum(-1);
-		}else if(iscprim(b) && cp_class(ptr(b)) == sl_runetype)
-			return fixnum(1);
 		c = numeric_compare(a, b, eq, true, false);
 		if(c != 2)
 			return fixnum(c);
@@ -352,6 +333,8 @@
 			u.d = ubnumuval(a);
 		else if(isubnums(a))
 			u.d = ubnumsval(a);
+		else if(isrune(a))
+			return inthash(torune(a));
 		else // FIXME(sigrid): unboxed
 			u.d = 0;
 		return doublehash(u.i64);
@@ -364,8 +347,6 @@
 	case TAG_CPRIM:
 		cp = ptr(a);
 		data = cp_data(cp);
-		if(cp_class(cp) == sl_runetype)
-			return inthash(*(Rune*)data);
 		nt = cp_numtype(cp);
 		u.d = conv_to_double(data, nt);
 		return doublehash(u.i64);
--- a/src/io.c
+++ b/src/io.c
@@ -196,11 +196,9 @@
 {
 	argcount(nargs, 2);
 	sl_ios *s = toio(args[0]);
-	sl_cprim *cp = ptr(args[1]);
-	if(!iscprim(args[1]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[1]))
 		type_error("rune", args[1]);
-	Rune r = *(Rune*)cp_data(cp);
-	return fixnum(ios_putrune(s, r));
+	return fixnum(ios_putrune(s, torune(args[1])));
 }
 
 BUILTIN("io-skip", io_skip)
@@ -325,12 +323,10 @@
 		argcount(nargs, 2);
 	sl_ios *s = toio(args[0]);
 	sl_v v = args[1];
-	sl_cprim *cp = ptr(v);
-	if(iscprim(args[1]) && cp_class(cp) == sl_runetype){
+	if(isrune(v)){
 		if(nargs > 2)
 			lerrorf(sl_errarg, "offset argument not supported for characters");
-		Rune r = *(Rune*)cp_data(ptr(args[1]));
-		return fixnum(ios_putrune(s, r));
+		return fixnum(ios_putrune(s, torune(v)));
 	}
 	u8int *data;
 	usize sz, offs = 0;
@@ -347,10 +343,10 @@
 static u8int
 get_delim_arg(sl_v arg)
 {
-	usize uldelim = tosize(arg);
+	usize uldelim = isrune(arg) ? torune(arg) : tosize(arg);
 	if(uldelim > 0x7f){
 		// runes > 0x7f, or anything else > 0xff, are out of range
-		if((iscprim(arg) && cp_class(ptr(arg)) == sl_runetype) || uldelim > 0xff)
+		if(isrune(arg) || uldelim > 0xff)
 			lerrorf(sl_errarg, "delimiter out of range");
 	}
 	return (u8int)uldelim;
--- a/src/print.c
+++ b/src/print.c
@@ -233,7 +233,7 @@
 		const char *s = sym_name(v);
 		return u8_strwidth(s, strlen(s));
 	}
-	if(iscprim(v) && ptr(v) != nil && cp_class(ptr(v)) == sl_runetype)
+	if(isrune(v))
 		return 4;
 	return -1;
 }
@@ -387,6 +387,7 @@
 
 static void unboxed_print(sl_ios *f, sl_v v);
 static void cvalue_print(sl_ios *f, sl_v v);
+static void rune_print(sl_ios *f, Rune r);
 
 static bool
 print_circle_prefix(sl_ios *f, sl_v v)
@@ -661,6 +662,45 @@
 	}
 }
 
+static void
+rune_print(sl_ios *f, Rune r)
+{
+	char seq[UTFmax+1];
+	int n, nb = runetochar(seq, &r);
+	seq[nb] = '\0';
+	if(sl.print_princ){
+		outsn(f, seq, nb);
+	}else{
+		outsc(f, "#\\");
+		switch(r){
+		case 0x00: outsc(f, "nul"); break;
+		case 0x07: outsc(f, "alarm"); break;
+		case 0x08: outsc(f, "backspace"); break;
+		case 0x09: outsc(f, "tab"); break;
+		case 0x0a: outsc(f, "newline"); break;
+		case 0x0b: outsc(f, "vtab"); break;
+		case 0x0c: outsc(f, "page"); break;
+		case 0x0d: outsc(f, "return"); break;
+		case 0x1b: outsc(f, "esc"); break;
+		case ' ':  outsc(f, "space"); break;
+		case 0x7f: outsc(f, "delete"); break;
+		default:
+			if(sl_iswprint(r))
+				outs(f, seq);
+			else{
+				n = ios_printf(f, "x%04"PRIx32, r);
+				if(n < 1)
+					goto err;
+				sl.hpos += n;
+			}
+			break;
+		}
+	}
+	return;
+err:
+	lerrorf(sl_errio, "write failed");
+}
+
 // 'weak' means we don't need to accurately reproduce the type, so
 // for example #s32(0) can be printed as just 0. this is used
 // printing in a context where a type is already implied, e.g. inside
@@ -680,38 +720,7 @@
 			sl.hpos += n;
 		}
 	}else if(type == sl_runesym){
-		Rune r = *(Rune*)data;
-		char seq[UTFmax+1];
-		int nb = runetochar(seq, &r);
-		seq[nb] = '\0';
-		if(sl.print_princ){
-			outsn(f, seq, nb);
-		}else{
-			outsc(f, "#\\");
-			switch(r){
-			case 0x00: outsc(f, "nul"); break;
-			case 0x07: outsc(f, "alarm"); break;
-			case 0x08: outsc(f, "backspace"); break;
-			case 0x09: outsc(f, "tab"); break;
-			case 0x0a: outsc(f, "newline"); break;
-			case 0x0b: outsc(f, "vtab"); break;
-			case 0x0c: outsc(f, "page"); break;
-			case 0x0d: outsc(f, "return"); break;
-			case 0x1b: outsc(f, "esc"); break;
-			case ' ':  outsc(f, "space"); break;
-			case 0x7f: outsc(f, "delete"); break;
-			default:
-				if(sl_iswprint(r))
-					outs(f, seq);
-				else{
-					n = ios_printf(f, "x%04"PRIx32, r);
-					if(n < 1)
-						goto err;
-					sl.hpos += n;
-				}
-				break;
-			}
-		}
+		rune_print(f, *(Rune*)data);
 	}else if(type == sl_floatsym || type == sl_doublesym){
 		char buf[64];
 		double d;
@@ -858,7 +867,10 @@
 		u = ubnumuval(v);
 	else if(isubnums(v))
 		u = ubnumsval(v);
-	else // FIXME(sigrid): unboxed
+	else if(isrune(v)){
+		rune_print(f, torune(v));
+		return;
+	}else // FIXME(sigrid): unboxed
 		u = 0;
 	int numtype = ubnumtype(v);
 	sl_v typesym = unboxedtypesyms[numtype];
--- a/src/sl.c
+++ b/src/sl.c
@@ -698,7 +698,7 @@
 		return true;
 	if(iscprim(v)){
 		sl_cprim *c = ptr(v);
-		return c->type != sl_runetype && valid_numtype(c->type->numtype);
+		return valid_numtype(c->type->numtype);
 	}
 	if(ismp(v))
 		return true;
--- a/src/sl.h
+++ b/src/sl.h
@@ -21,6 +21,7 @@
 	TAG_CONS,
 
 	TAG_BITS = 3,
+	TAG_EXT_BITS = 8,
 
 	/* those were set to 7 and 3 strategically on purpose */
 	TAG_NONLEAF_MASK = TAG_CONS & TAG_VEC,
@@ -46,6 +47,7 @@
 #if defined(BITS64)
 typedef s64int sl_fx;
 #define FIXNUM_BITS (64-TAG_BITS)
+#define UNBOXED_BITS (64-TAG_EXT_BITS)
 #define TOP_BIT (1ULL<<63)
 #define T_FIXNUM T_S64
 #define PRIdFIXNUM PRId64
@@ -52,6 +54,7 @@
 #else
 typedef s32int sl_fx;
 #define FIXNUM_BITS (32-TAG_BITS)
+#define UNBOXED_BITS (32-TAG_EXT_BITS)
 #define TOP_BIT (1U<<31)
 #define T_FIXNUM T_S32
 #define PRIdFIXNUM PRId32
@@ -127,14 +130,18 @@
  * strings:  ...|xxxxxxxx|xxxxxxxx|xxxxxxxx|sss01100|
  * runes:    ...|xxxxxxxx|xxxxxxxx|xxxxxxxx|00011100|
  */
-#define isubnum(x)  ((tagext(x) & ((1<<(TAG_BITS+1))-1)) == (0<<TAG_BITS | TAG_UNBOXED))
-#define isubnums(x) ((tagext(x) & ((2<<(TAG_BITS+1))-1)) == (0<<TAG_BITS | TAG_UNBOXED))
-#define isubnumu(x) ((tagext(x) & ((2<<(TAG_BITS+1))-1)) == (2<<TAG_BITS | TAG_UNBOXED))
-#define ubnumtype(x) ((tagext(x)>>(TAG_BITS+1))&(0xff>>(TAG_BITS+1)))
+#define TAG_UBNUM_SHIFT (TAG_BITS+1)
+#define isubnum(x)  ((tagext(x) & ((1<<TAG_UBNUM_SHIFT)-1)) == (0<<TAG_BITS | TAG_UNBOXED))
+#define isubnums(x) ((tagext(x) & ((2<<TAG_UBNUM_SHIFT)-1)) == (0<<TAG_BITS | TAG_UNBOXED))
+#define isubnumu(x) ((tagext(x) & ((2<<TAG_UBNUM_SHIFT)-1)) == (2<<TAG_BITS | TAG_UNBOXED))
+#define ubnumtype(x) ((tagext(x)>>TAG_UBNUM_SHIFT) & 0xf)
+#define numtofx(v) (sl_fx)tooffset(v)
 extern sl_type *unboxedtypes[T_UNBOXED_NUM];
 extern sl_v unboxedtypesyms[T_UNBOXED_NUM];
 
-#define numtofx(v) (sl_fx)tooffset(v)
+#define mk_rune(r) ((r)<<TAG_EXT_BITS | 0x1c)
+#define isrune(v) (((v) & 0xff) == 0x1c)
+#define torune(v) ((v)>>8)
 
 // allocate n consecutive conses
 #define cons_reserve(n) tagptr(alloc_words((n)*2), TAG_CONS)
--- a/src/str.c
+++ b/src/str.c
@@ -44,12 +44,9 @@
 BUILTIN("str-width", str_width)
 {
 	argcount(nargs, 1);
-	if(iscprim(args[0])){
-		sl_cprim *cp = ptr(args[0]);
-		if(cp_class(cp) == sl_runetype){
-			int w = sl_wcwidth(*(Rune*)cp_data(cp));
-			return w < 0 ? sl_nil : fixnum(w);
-		}
+	if(isrune(args[0])){
+		int w = sl_wcwidth(torune(args[0]));
+		return w < 0 ? sl_nil : fixnum(w);
 	}
 	if(!sl_isstr(args[0]))
 		type_error("str", args[0]);
@@ -200,31 +197,31 @@
 	return mk_rune(r);
 }
 
+sl_purefn
 BUILTIN("rune-upcase", rune_upcase)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
+	return mk_rune(toupperrune(torune(args[0])));
 }
 
+sl_purefn
 BUILTIN("rune-downcase", rune_downcase)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
+	return mk_rune(tolowerrune(torune(args[0])));
 }
 
+sl_purefn
 BUILTIN("rune-titlecase", rune_titlecase)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return mk_rune(totitlerune(*(Rune*)cp_data(cp)));
+	return mk_rune(totitlerune(torune(args[0])));
 }
 
 sl_purefn
@@ -231,10 +228,9 @@
 BUILTIN("rune-alphabetic?", rune_alphabeticp)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return isalpharune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return isalpharune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 sl_purefn
@@ -241,10 +237,9 @@
 BUILTIN("rune-lower-case?", rune_lower_casep)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return islowerrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return islowerrune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 sl_purefn
@@ -251,10 +246,9 @@
 BUILTIN("rune-upper-case?", rune_upper_casep)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return isupperrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return isupperrune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 sl_purefn
@@ -261,10 +255,9 @@
 BUILTIN("rune-title-case?", rune_title_casep)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return istitlerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return istitlerune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 sl_purefn
@@ -271,10 +264,9 @@
 BUILTIN("rune-numeric?", rune_numericp)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return isdigitrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return isdigitrune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 sl_purefn
@@ -281,10 +273,9 @@
 BUILTIN("rune-whitespace?", rune_whitespacep)
 {
 	argcount(nargs, 1);
-	sl_cprim *cp = ptr(args[0]);
-	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
+	if(!isrune(args[0]))
 		type_error("rune", args[0]);
-	return isspacerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
+	return isspacerune(torune(args[0])) ? sl_t : sl_nil;
 }
 
 BUILTIN("str-find", str_find)
@@ -304,9 +295,8 @@
 	usize ndbytes;
 	sl_v v = args[1];
 	char rbuf[UTFmax+1];
-	sl_cprim *cp = ptr(v);
-	if(iscprim(v) && cp_class(cp) == sl_runetype){
-		Rune r = *(Rune*)cp_data(cp);
+	if(isrune(v)){
+		Rune r = torune(v);
 		ndbytes = runetochar(rbuf, &r);
 		nd = rbuf;
 		nd[ndbytes] = 0;
--- a/src/system.sl
+++ b/src/system.sl
@@ -140,10 +140,10 @@
   :doc-group doc
   (def (doc? kw)
     (and (> (length kw) 5)
-         (= (aref kw 1) #\d)
-         (= (aref kw 2) #\o)
-         (= (aref kw 3) #\c)
-         (= (aref kw 4) #\-)))
+         (eq? (aref kw 1) #\d)
+         (eq? (aref kw 2) #\o)
+         (eq? (aref kw 3) #\c)
+         (eq? (aref kw 4) #\-)))
   (let {[hd (car body)]
         [tl (cdr body)]}
     (cond [(and (str? hd) (not doc) tl)
@@ -163,7 +163,7 @@
              [hd (car lines)]
              [tl (cdr lines)]
              [snd (any (λ (s) (and (> (length s) 0)
-                                   (= (aref s 0) #\space)
+                                   (eq? (aref s 0) #\space)
                                    s))
                        tl)]
              [indent (and snd
@@ -1424,7 +1424,7 @@
        (for-each (λ (p) (princ " ") (print p))
                  (cdr (cdr (vec->list f))))
        (princ ")" *linefeed*)
-       (when (= n 0)
+       #;(when (= n 0)
          (fn-disasm (aref f 1) (aref f 0)))
        (set! n (+ n 1)))
      st)))
--- a/src/types.c
+++ b/src/types.c
@@ -33,7 +33,7 @@
 	ft->numtype = NONNUMERIC;
 	if(issym(t)){
 		ft->numtype = sym_to_numtype(t);
-		assert(valid_numtype(ft->numtype));
+		assert(valid_numtype(ft->numtype) || t == sl_runesym);
 		((sl_sym*)ptr(t))->type = ft;
 	}
 	ft->size = sz;
--- a/src/vm.h
+++ b/src/vm.h
@@ -423,13 +423,26 @@
 LABEL(apply_aref):;
 	sl_v v = sp[-n];
 	for(int i = n-1; i > 0; i--){
+		sl_v e = sp[-i];
+		usize isz = tosize(e);
+		if(sl_isstr(v)){
+			char *s = tostr(v);
+			usize sz = cv_len(ptr(v)), b, k;
+			for(b = k = 0; k < isz && b < sz; k++)
+				b += u8_seqlen(s+b);
+			if(k == isz && b < sz){
+				Rune r;
+				chartorune(&r, s+b);
+				v = mk_rune(r);
+				continue;
+			}
+			bounds_error(v, e);
+		}
 		if(isarr(v)){
 			sp[-i-1] = v;
 			v = cvalue_arr_aref(sp-i-1);
 			continue;
 		}
-		sl_v e = sp[-i];
-		usize isz = tosize(e);
 		if(isvec(v)){
 			if(sl_unlikely(isz >= vec_size(v))){
 				sl.sp = sp;
--- a/test/unittest.sl
+++ b/test/unittest.sl
@@ -83,7 +83,7 @@
 
 (assert (> 9223372036854775808 9223372036854775807))
 
-(assert (fixnum? (- (aref "0" 0) #\0)))
+(assert-fail (fixnum? (- (aref "0" 0) #\0)))
 
 (assert (= (ash #bignum(1) -9999) 0))
 
@@ -215,6 +215,7 @@
 
 ; long argument lists
 (assert (= (apply + (iota 100000)) 4999950000))
+
 (def ones (map (λ (x) 1) (iota 80000)))
 (assert (= (eval `(if (< 2 1)
                       (+ ,@ones)
@@ -440,8 +441,8 @@
 (assert (equal? 7 (aref a 1 2)))
 (assert (equal? 5 (aref a 1 (1+ 0) 1)))
 (assert-fail (aref a 1 1 3) bounds-error)
-(assert (equal? (fixnum #\l) (aref #("hello") 0 2)))
-(assert (equal? (fixnum #\o) (aref #("hello") 0 (1+ 3))))
+(assert (equal? #\l (aref #("hello") 0 2)))
+(assert (equal? #\o (aref #("hello") 0 (1+ 3))))
 (assert-fail (aref #("hello") 0 5))
 (assert-fail (aref #("hello") 1 0))
 (assert-fail (aref '(NIL) 0 0))
--- a/tools/bootstrap.sh
+++ b/tools/bootstrap.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+set -x
 test -e
 P="$(pwd)"
 F="$P/build/sl"
@@ -5,10 +6,10 @@
 test -x $F || { meson setup -Dbuildtype=debug build . && ninja -C build || exit 1; }
 test -x $F || { echo no $F found; exit 1; }
 cd src && \
-$F ../tools/gen.sl && \
+$WRAPPER $F ../tools/gen.sl && \
 cp ../boot/sl.boot ../boot/sl.boot.bak && \
-$F ../tools/mkboot0.sl builtins.sl instructions.sl system.sl compiler.sl > ../boot/sl.boot && \
+$WRAPPER $F ../tools/mkboot0.sl builtins.sl instructions.sl system.sl compiler.sl > ../boot/sl.boot && \
 ninja -C ../build && \
 cd ../boot && \
-$F ../tools/mkboot1.sl && \
+$WRAPPER $F -S 8m ../tools/mkboot1.sl && \
 ninja -C ../build || { cp "$P/boot/sl.boot.bak" "$P/boot/sl.boot"; exit 1; }
--- a/tools/gen.sl
+++ b/tools/gen.sl
@@ -10,8 +10,8 @@
       (λ (i) (let {[r (rune (aref name i))]}
                (io-write cname
                          (cond [(rune-alphanumeric? r) (rune-upcase r)]
-                               [(= r #\?) #\P]
-                               [(= r #\_) #\_]
+                               [(eq? r #\?) #\P]
+                               [(eq? r #\_) #\_]
                                [else ""])))))
     (io->str cname)))
 
--