shithub: femtolisp

Download patch

ref: 10d16c10bed72a177a99e0a9a3f3caca6c38ce94
parent: c9541daf008fa80b06f04cd0177e6c0268965810
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Jan 13 15:28:37 EST 2025

changes to satisfy m68k builds more

* certain structs have to be aligned by 8 bytes
* qp tries don't like when custom allocator (mem.c) is used

TODO:

* mpint is still broken

EXTRA:

* add attributes to certain functions

--- a/3rd/brieflz/brieflz.h
+++ b/3rd/brieflz/brieflz.h
@@ -79,7 +79,7 @@
  * @return maximum size of compressed data
  */
 BLZ_API size_t
-blz_max_packed_size(size_t src_size);
+blz_max_packed_size(size_t src_size) fl_constfn;
 
 /**
  * Get required size of `workmem` buffer.
@@ -90,7 +90,7 @@
  * @return required size in bytes of `workmem` buffer
  */
 BLZ_API size_t
-blz_workmem_size(size_t src_size);
+blz_workmem_size(size_t src_size) fl_constfn;
 
 /**
  * Compress `src_size` bytes of data from `src` to `dst`.
@@ -114,7 +114,7 @@
  * @return required size in bytes of `workmem` buffer
  */
 BLZ_API size_t
-blz_workmem_size_level(size_t src_size, int level);
+blz_workmem_size_level(size_t src_size, int level) fl_constfn;
 
 /**
  * Compress `src_size` bytes of data from `src` to `dst`.
--- a/3rd/fn.c
+++ b/3rd/fn.c
@@ -14,7 +14,7 @@
 	if(t == nil)
 		return false;
 	while(isbranch(t)){
-		__builtin_prefetch(t->ptr);
+		fl_prefetch(t->ptr);
 		Tindex i = t->index;
 		Tbitmap b = twigbit(i, key, len);
 		if(!hastwig(i, b))
@@ -80,7 +80,7 @@
 	Tindex i = 0;
 	Tbitmap b = 0;
 	while(isbranch(t)){
-		__builtin_prefetch(t->ptr);
+		fl_prefetch(t->ptr);
 		i = t->index;
 		b = twigbit(i, key, len);
 		if(!hastwig(i, b))
@@ -92,24 +92,24 @@
 	*pkey = Tleaf_key(t);
 	*pval = Tleaf_val(t);
 	if(p == nil){
-		MEM_FREE(tbl);
+		free(tbl);
 		return nil;
 	}
 	Trie *twigs = Tbranch_twigs(p);
-	uint32_t m = __builtin_popcount(Tindex_bitmap(i));
+	uint32_t m = fl_popcount(Tindex_bitmap(i));
 	assert(twigs <= t && t < twigs+m);
 	if(m == 2){
 		// Move the other twig to the parent branch.
 		*p = twigs[twigs == t];
-		MEM_FREE(twigs);
+		free(twigs);
 		return tbl;
 	}
 	memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie));
 	p->index = Tbitmap_del(i, b);
 	// We have now correctly removed the twig from the trie, so if
-	// MEM_REALLOC() fails we can ignore it and continue to use the
+	// realloc() fails we can ignore it and continue to use the
 	// slightly oversized twig array.
-	twigs = MEM_REALLOC(twigs, sizeof(Trie) * (m - 1));
+	twigs = realloc(twigs, sizeof(Trie) * (m - 1));
 	if(twigs != nil)
 		Tset_twigs(p, twigs);
 	return tbl;
@@ -123,7 +123,7 @@
 		return Tdell(tbl, key, len);
 	// First leaf in an empty tbl?
 	if(tbl == nil){
-		tbl = MEM_ALLOC(sizeof(*tbl));
+		tbl = malloc(sizeof(*tbl));
 		if(tbl == nil)
 			return nil;
 		Tset_key(tbl, key);
@@ -136,7 +136,7 @@
 	// which can be at a lower index than the point at which we
 	// detect a difference.
 	while(isbranch(t)){
-		__builtin_prefetch(t->ptr);
+		fl_prefetch(t->ptr);
 		Tindex i = t->index;
 		Tbitmap b = twigbit(i, key, len);
 		// Even if our key is missing from this branch we need to
@@ -158,7 +158,7 @@
 	Tset_val(t, val);
 	return tbl;
 newkey:; // We have the branch's byte index; what is its chunk index?
-	uint32_t bit = off * 8 + __builtin_clz(xor) + 8 - sizeof(uint32_t) * 8;
+	uint32_t bit = off * 8 + fl_clz(xor) + 8 - sizeof(uint32_t) * 8;
 	uint32_t qo = bit / 5;
 	off = qo * 5 / 8;
 	shf = qo * 5 % 8;
@@ -173,7 +173,7 @@
 	t = tbl;
 	Tindex i;
 	while(isbranch(t)){
-		__builtin_prefetch(t->ptr);
+		fl_prefetch(t->ptr);
 		i = t->index;
 		if(off == Tindex_offset(i) && shf == Tindex_shift(i))
 			goto growbranch;
@@ -186,7 +186,7 @@
 		t = Tbranch_twigs(t) + twigoff(i, b);
 	}
 newbranch:;
-	Trie *twigs = MEM_ALLOC(sizeof(Trie) * 2);
+	Trie *twigs = malloc(sizeof(Trie) * 2);
 	if(twigs == nil)
 		return nil;
 	i = Tindex_new(shf, off, nb | tb);
@@ -198,7 +198,7 @@
 growbranch:
 	assert(!hastwig(i, nb));
 	uint32_t s, m; TWIGOFFMAX(s, m, i, nb);
-	twigs = MEM_REALLOC(Tbranch_twigs(t), sizeof(Trie) * (m + 1));
+	twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1));
 	if(twigs == nil)
 		return nil;
 	memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s));
--- a/3rd/fn.h
+++ b/3rd/fn.h
@@ -22,18 +22,6 @@
 typedef uint32_t Tbitmap;
 typedef uint64_t Tindex;
 
-#if defined(__plan9__)
-static inline uint32_t
-__builtin_popcount(Tbitmap w)
-{
-	w -= (w >> 1) & 0x55555555U;
-	w = (w & 0x33333333U) + ((w >> 2) & 0x33333333U);
-	w = (w + (w >> 4)) & 0x0F0F0F0FU;
-	w = (w * 0x01010101U) >> 24;
-	return w;
-}
-#endif
-
 typedef struct Tbl {
 	Tindex index;
 	void *ptr;
@@ -93,7 +81,7 @@
 
 #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL)
 
-#define Tunmask(field,index) \
+#define Tunmask(field, index) \
 	((uint32_t)(((index) >> Tix_base_##field) & Tix_mask(field)))
 
 #define Tmaxlen Tix_mask(offset)
@@ -158,9 +146,9 @@
 static inline uint8_t
 knybble(const char *key, uint32_t off, uint32_t shift)
 {
-	uint32_t word = (uint8_t)key[off]<<8;
+	uint32_t word = ((uint8_t*)key)[off]<<8;
 	if(word)
-		word |= key[off+1];
+		word |= ((uint8_t*)key)[off+1];
 	uint32_t right = 16 - 5 - shift;
 	return (word >> right) & 0x1FU;
 }
@@ -189,10 +177,10 @@
 static inline uint32_t
 twigoff(Tindex i, Tbitmap bit)
 {
-	return __builtin_popcount(Tindex_bitmap(i) & (bit-1));
+	return fl_popcount(Tindex_bitmap(i) & (bit-1));
 }
 
 #define TWIGOFFMAX(off, max, i, b) do{ \
 		off = twigoff(i, b); \
-		max = __builtin_popcount(Tindex_bitmap(i)); \
+		max = fl_popcount(Tindex_bitmap(i)); \
 	}while(0)
--- a/3rd/mp/mp.h
+++ b/3rd/mp/mp.h
@@ -26,8 +26,8 @@
 
 extern	int	dec16(uint8_t*, int, char*, int);
 extern	int	enc16(char*, int, uint8_t*, int);
-extern	mpdigit	dec16chr(int);
-extern	int	enc16chr(int);
+extern	mpdigit	dec16chr(int) fl_constfn;
+extern	int	enc16chr(int) fl_constfn;
 
 /*
  * the code assumes mpdigit to be at least an int
--- a/3rd/utf/utf.h
+++ b/3rd/utf/utf.h
@@ -14,14 +14,14 @@
 int runenlen(const Rune *r, int nrune);
 int fullrune(const char *str, int n);
 int runelen(Rune c);
-Rune tolowerrune(Rune c);
-Rune toupperrune(Rune c);
-Rune totitlerune(Rune c);
-int islowerrune(Rune c);
-int isupperrune(Rune c);
-int isalpharune(Rune c);
-int istitlerune(Rune c);
-int isspacerune(Rune c);
-int isdigitrune(Rune c);
+Rune tolowerrune(Rune c) fl_constfn;
+Rune toupperrune(Rune c) fl_constfn;
+Rune totitlerune(Rune c) fl_constfn;
+int islowerrune(Rune c) fl_constfn;
+int isupperrune(Rune c) fl_constfn;
+int isalpharune(Rune c) fl_constfn;
+int istitlerune(Rune c) fl_constfn;
+int isspacerune(Rune c) fl_constfn;
+int isdigitrune(Rune c) fl_constfn;
 
 int utfnlen(const char *s, long m);
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 This is a reanimation of
 https://github.com/lambdaconservatory/femtolisp with bigger plans.
 
-Supported OS: [9front](http://9front.org), Unix-like operating systems (OpenBSD, NetBSD, Linux, etc), MacOS [89].x.
+Supported OS: [9front](http://9front.org), Unix-like operating systems (OpenBSD, NetBSD, Linux, etc), MacOS 7.x-9.x.
 
 Supported CPUs: any decent 32 or 64-bit, little or big endian.
 
@@ -43,7 +43,7 @@
 
 	mk all test
 
-### MacOS 9.x (PowerPC) or MacOS 8.x (m68k)
+### MacOS 7.x-9.x (PowerPC or m68k)
 
 Install and build [Retro68](https://github.com/autc04/Retro68).
 
--- a/builtins.c
+++ b/builtins.c
@@ -145,7 +145,7 @@
 BUILTIN("symbol", symbol)
 {
 	argcount(nargs, 1);
-	if(__unlikely(!fl_isstring(args[0])))
+	if(fl_unlikely(!fl_isstring(args[0])))
 		type_error("string", args[0]);
 	return symbol(cvalue_data(args[0]), true);
 }
--- a/builtins_plan9.c
+++ b/builtins_plan9.c
@@ -1,9 +1,9 @@
 #include "platform.h"
 
 int
-__builtin_clz(unsigned int x)
+fl_clz(uint32_t x)
 {
-	unsigned int r;
+	uint32_t r;
 	if(x == 0)
 		return 32;
 	for(r = 0; (x & (1UL<<31)) == 0; x <<= 1, r++);
--- a/builtins_plan9_amd64.s
+++ b/builtins_plan9_amd64.s
@@ -1,4 +1,4 @@
-TEXT __builtin_clz(SB),1,$0
+TEXT fl_clz(SB),1,$0
 	BYTE $0x0F; BYTE $0xBD; BYTE $0xC5 /* BSRL RARG, AX */
 	XORL $31, AX
 	RET
--- a/builtins_plan9_arm64.s
+++ b/builtins_plan9_arm64.s
@@ -1,3 +1,3 @@
-TEXT __builtin_clz(SB),1,$0
+TEXT fl_clz(SB),1,$0
 	CLZW R0, R0
 	RETURN
--- /dev/null
+++ b/cc.h
@@ -1,0 +1,54 @@
+#pragma once
+
+#ifdef __GNUC__
+
+#define fl_unlikely(x) __builtin_expect(!!(x), 0)
+#define fl_likely(x) __builtin_expect(!!(x), 1)
+#define fl_printfmt(x, y) __attribute__((format(printf, x, y)))
+#define fl_thread __thread
+#define fl_prefetch(x) __builtin_prefetch(x)
+#define fl_constfn __attribute__((const))
+#define fl_purefn __attribute__((pure))
+#define fl_hotfn __attribute__((hot))
+#define fl_aligned(x) __attribute__((aligned(x)))
+#define fl_popcount(x) __builtin_popcount(x)
+#define fl_clz(x) __builtin_clz(x)
+#define sadd_overflow __builtin_add_overflow
+#define sadd_overflow_64 __builtin_add_overflow
+#define smul_overflow_64 __builtin_mul_overflow
+
+#else
+
+#define fl_unlikely(x) (x)
+#define fl_likely(x) (x)
+#define fl_printfmt(x, y)
+#define fl_thread
+#define fl_prefetch(x)
+#define fl_constfn
+#define fl_purefn
+#define fl_hotfn
+#define fl_aligned(x)
+
+/* FIXME(sigrid): s*_overflow_* can be more optimal */
+#define sadd_overflow_64(a, b, c) ( \
+  (b < 1) ? \
+  ((INT64_MAX-(b) <= (a)) ? ((*(c)=(a)+(b)), 0) : 1) : \
+  ((INT64_MAX-(b) >= (a)) ? ((*(c)=(a)+(b)), 0) : 1) \
+)
+#define smul_overflow_64(a, b, c) ( \
+	((a)>0 ? ((b)>0 ? (a)>INT64_MAX/(b) : (b)<INT64_MIN/(a)) \
+	       : ((b)>0 ? (a)<INT64_MIN/(b) : ((a)!=0 && (b)<INT64_MAX/(a)))) \
+	? 1 \
+	: ((*(c)=(a)*(b)), 0) \
+)
+#if defined(BITS64)
+#define sadd_overflow(a, b, c) sadd_overflow_64(a, b, c)
+#else
+#define sadd_overflow(a, b, c) ( \
+  (b < 1) ? \
+  ((INT32_MAX-(b) <= (a)) ? ((*(c)=(a)+(b)), 0) : 1) : \
+  ((INT32_MAX-(b) >= (a)) ? ((*(c)=(a)+(b)), 0) : 1) \
+)
+#endif
+
+#endif
--- a/cvalues.h
+++ b/cvalues.h
@@ -25,7 +25,7 @@
 value_t cvalue_static_cstring(const char *str);
 value_t string_from_cstrn(char *str, size_t n);
 value_t string_from_cstr(char *str);
-int fl_isstring(value_t v);
+int fl_isstring(value_t v) fl_purefn;
 void cv_pin(cvalue_t *cv);
 value_t mk_mpint(mpint *n);
 value_t size_wrap(size_t sz);
@@ -32,14 +32,14 @@
 size_t tosize(value_t n);
 off_t tooffset(value_t n);
 int cvalue_enum_init(fltype_t *ft, value_t arg, void *dest);
-int isarray(value_t v);
+int isarray(value_t v) fl_purefn;
 int cvalue_array_init(fltype_t *ft, value_t arg, void *dest);
-size_t cvalue_arraylen(value_t v);
+size_t cvalue_arraylen(value_t v) fl_purefn;
 size_t ctype_sizeof(value_t type);
 void to_sized_ptr(value_t v, uint8_t **pdata, size_t *psz);
 value_t cvalue_relocate(value_t v);
 value_t cvalue_copy(value_t v);
-value_t cvalue_compare(value_t a, value_t b);
+value_t cvalue_compare(value_t a, value_t b) fl_purefn;
 value_t cvalue_array_aref(value_t *args);
 value_t cvalue_array_aset(value_t *args);
 value_t cbuiltin(const char *name, builtin_t f);
@@ -63,4 +63,4 @@
 value_t mk_rune(Rune n);
 
 /* builtins.c */
-size_t llength(value_t v);
+size_t llength(value_t v) fl_purefn;
--- a/flisp.c
+++ b/flisp.c
@@ -46,7 +46,7 @@
 }builtinspec_t;
 
 #if defined(NDEBUG)
-__thread
+fl_thread
 #endif
 Fl *fl;
 
@@ -57,7 +57,7 @@
 	return tag(x) == TAG_FUNCTION && (i = uintval(x)) < nelem(builtins) && builtins[i].name != nil;
 }
 
-static value_t apply_cl(uint32_t nargs);
+static value_t apply_cl(uint32_t nargs) fl_hotfn;
 
 // error utilities ------------------------------------------------------------
 
@@ -174,7 +174,7 @@
 #define SAFECAST_OP(type, ctype, cnvt) \
 	ctype to##type(value_t v) \
 	{ \
-		if(__likely(is##type(v))) \
+		if(fl_likely(is##type(v))) \
 			return (ctype)cnvt(v); \
 		type_error(#type, v); \
 	}
@@ -196,17 +196,7 @@
 static symbol_t *
 mk_symbol(const char *str, int len, bool copy)
 {
-	symbol_t *sym;
-	int sz = sizeof(*sym) + (copy ? len+1 : 0);
-#ifdef BITS64
-	sym = MEM_ALLOC(sz);
-#else
-	sym = MEM_ALLOC(sz+7);
-	if((uintptr_t)sym & 7)
-		sym = (symbol_t*)(((uintptr_t)sym + 7U) & ~7U);
-#endif
-	assert(((uintptr_t)sym & 7) == 0);
-
+	symbol_t *sym = MEM_ALLOC(sizeof(*sym) + (copy ? len+1 : 0));
 	sym->numtype = NONNUMERIC;
 	if(fl_is_keyword_name(str, len)){
 		value_t s = tagptr(sym, TAG_SYM);
@@ -305,7 +295,7 @@
 {
 	cons_t *c;
 
-	if(__unlikely(FL(curheap) > FL(lim)))
+	if(fl_unlikely(FL(curheap) > FL(lim)))
 		gc(0);
 	c = (cons_t*)FL(curheap);
 	FL(curheap) += sizeof(cons_t);
@@ -319,7 +309,7 @@
 
 	assert(n > 0);
 	n = ALIGNED(n, 2);   // only allocate multiples of 2 words
-	if(__unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)+2-n)){
+	if(fl_unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)+2-n)){
 		gc(0);
 		while((value_t*)FL(curheap) > ((value_t*)FL(lim))+2-n)
 			gc(1);
@@ -350,7 +340,7 @@
 void
 fl_gc_handle(value_t *pv)
 {
-	if(__unlikely(FL(ngchandles) >= N_GC_HANDLES))
+	if(fl_unlikely(FL(ngchandles) >= N_GC_HANDLES))
 		lerrorf(FL_MemoryError, "out of gc handles");
 	FL(gchandles)[FL(ngchandles)++] = pv;
 }
@@ -450,7 +440,7 @@
 		ng->binding = gs->binding;
 		nc = tagptr(ng, TAG_SYM);
 		forward(v, nc);
-		if(__likely(ng->binding != UNBOUND))
+		if(fl_likely(ng->binding != UNBOUND))
 			ng->binding = relocate(ng->binding);
 		return nc;
 	}
@@ -537,19 +527,19 @@
 	// grow the other half of the heap this time to catch up.
 	if(FL(grew) || ((FL(lim)-FL(curheap)) < (int)(FL(heapsize)/5)) || mustgrow){
 		temp = MEM_REALLOC(FL(tospace), FL(heapsize)*2);
-		if(__unlikely(temp == nil))
+		if(fl_unlikely(temp == nil))
 			fl_raise(FL(memory_exception_value));
 		FL(tospace) = temp;
 		if(FL(grew)){
 			FL(heapsize) *= 2;
 			temp = bitvector_resize(FL(consflags), 0, FL(heapsize)/sizeof(cons_t), 1);
-			if(__unlikely(temp == nil))
+			if(fl_unlikely(temp == nil))
 				fl_raise(FL(memory_exception_value));
 			FL(consflags) = (uint32_t*)temp;
 		}
 		FL(grew) = !FL(grew);
 	}
-	if(__unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)-2)){
+	if(fl_unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)-2)){
 		// all data was live; gc again and grow heap.
 		// but also always leave at least 4 words available, so a closure
 		// can be allocated without an extra check.
@@ -562,7 +552,7 @@
 {
 	size_t newsz = FL(nstack) * 2;
 	value_t *ns = MEM_REALLOC(FL(stack), newsz*sizeof(value_t));
-	if(__unlikely(ns == nil))
+	if(fl_unlikely(ns == nil))
 		lerrorf(FL_MemoryError, "stack overflow");
 	FL(stack) = ns;
 	FL(nstack) = newsz;
@@ -571,6 +561,7 @@
 // utils ----------------------------------------------------------------------
 
 // apply function with n args on the stack
+fl_hotfn
 static value_t
 _applyn(uint32_t n)
 {
@@ -581,9 +572,9 @@
 		v = ((builtin_t*)ptr(f))[3](&FL(stack)[FL(sp)-n], n);
 	else if(isfunction(f))
 		v = apply_cl(n);
-	else if(__likely(isbuiltin(f))){
+	else if(fl_likely(isbuiltin(f))){
 		value_t tab = symbol_value(FL_builtins_table_sym);
-		if(__unlikely(ptr(tab) == nil))
+		if(fl_unlikely(ptr(tab) == nil))
 			unbound_error(tab);
 		FL(stack)[FL(sp)-n-1] = vector_elt(tab, uintval(f));
 		v = apply_cl(n);
@@ -707,6 +698,7 @@
 
 // eval -----------------------------------------------------------------------
 
+fl_hotfn
 static value_t
 list(value_t *args, uint32_t nargs, int star)
 {
@@ -791,9 +783,9 @@
 	value_t s1 = FL(stack)[FL(sp)-1];
 	value_t s3 = FL(stack)[FL(sp)-3];
 	value_t s4 = FL(stack)[FL(sp)-4];
-	if(__unlikely(nargs < nreq))
+	if(fl_unlikely(nargs < nreq))
 		lerrorf(FL_ArgError, "too few arguments");
-	if(__unlikely(extr > nelem(args)))
+	if(fl_unlikely(extr > nelem(args)))
 		lerrorf(FL_ArgError, "too many arguments");
 	for(i = 0; i < extr; i++)
 		args[i] = UNBOUND;
@@ -811,12 +803,12 @@
 	uintptr_t n = vector_size(kwtable)/2;
 	do{
 		i++;
-		if(__unlikely(i >= nargs))
+		if(fl_unlikely(i >= nargs))
 			lerrorf(FL_ArgError, "keyword %s requires an argument", symbol_name(v));
 		value_t hv = fixnum(((symbol_t*)ptr(v))->hash);
 		fixnum_t lx = numval(hv);
 		uintptr_t x = 2*((lx < 0 ? -lx : lx) % n);
-		if(__likely(vector_elt(kwtable, x) == v)){
+		if(fl_likely(vector_elt(kwtable, x) == v)){
 			uintptr_t idx = numval(vector_elt(kwtable, x+1));
 			assert(idx < nkw);
 			idx += nopt;
@@ -834,7 +826,7 @@
 	}while(issymbol(v) && iskeyword((symbol_t*)ptr(v)));
 no_kw:
 	nrestargs = nargs - i;
-	if(__unlikely(!va && nrestargs > 0))
+	if(fl_unlikely(!va && nrestargs > 0))
 		lerrorf(FL_ArgError, "too many arguments");
 	nargs = ntot + nrestargs;
 	if(nrestargs)
@@ -1024,9 +1016,9 @@
 		return fn_builtin_builtin(args, nargs);
 	if(nargs < 2 || nargs > 4)
 		argcount(nargs, 2);
-	if(__unlikely(!fl_isstring(args[0])))
+	if(fl_unlikely(!fl_isstring(args[0])))
 		type_error("string", args[0]);
-	if(__unlikely(!isvector(args[1])))
+	if(fl_unlikely(!isvector(args[1])))
 		type_error("vector", args[1]);
 	cvalue_t *arr = (cvalue_t*)ptr(args[0]);
 	cv_pin(arr);
@@ -1063,12 +1055,12 @@
 		}else{
 			fn->env = args[2];
 			if(nargs > 3){
-				if(__unlikely(!issymbol(args[3])))
+				if(fl_unlikely(!issymbol(args[3])))
 					type_error("symbol", args[3]);
 				fn->name = args[3];
 			}
 		}
-		if(__unlikely(isgensym(fn->name)))
+		if(fl_unlikely(isgensym(fn->name)))
 			lerrorf(FL_ArgError, "name should not be a gensym");
 	}
 	return fv;
@@ -1078,7 +1070,7 @@
 {
 	argcount(nargs, 1);
 	value_t v = args[0];
-	if(__unlikely(!isclosure(v)))
+	if(fl_unlikely(!isclosure(v)))
 		type_error("function", v);
 	return fn_bcode(v);
 }
@@ -1087,7 +1079,7 @@
 {
 	argcount(nargs, 1);
 	value_t v = args[0];
-	if(__unlikely(!isclosure(v)))
+	if(fl_unlikely(!isclosure(v)))
 		type_error("function", v);
 	return fn_vals(v);
 }
@@ -1096,7 +1088,7 @@
 {
 	argcount(nargs, 1);
 	value_t v = args[0];
-	if(__unlikely(!isclosure(v)))
+	if(fl_unlikely(!isclosure(v)))
 		type_error("function", v);
 	return fn_env(v);
 }
@@ -1167,7 +1159,7 @@
 
 BUILTIN("map", map)
 {
-	if(__unlikely(nargs < 2))
+	if(fl_unlikely(nargs < 2))
 		lerrorf(FL_ArgError, "too few arguments");
 	intptr_t argSP = args-FL(stack);
 	assert(argSP >= 0 && argSP < (intptr_t)FL(nstack));
@@ -1202,7 +1194,7 @@
 
 BUILTIN("for-each", for_each)
 {
-	if(__unlikely(nargs < 2))
+	if(fl_unlikely(nargs < 2))
 		lerrorf(FL_ArgError, "too few arguments");
 	intptr_t argSP = args-FL(stack);
 	assert(argSP >= 0 && argSP < (intptr_t)FL(nstack));
--- a/flisp.h
+++ b/flisp.h
@@ -61,7 +61,7 @@
 typedef struct {
 	value_t car;
 	value_t cdr;
-}cons_t;
+}fl_aligned(8) cons_t;
 
 // NOTE: symbol_t MUST have the same fields as gensym_t first
 // there are places where gensyms are treated as normal symbols
@@ -74,13 +74,13 @@
 	uint8_t flags;
 	uint8_t _dummy;
 	const char *name;
-}symbol_t;
+}fl_aligned(8) symbol_t;
 
 typedef struct {
 	fltype_t *type;
 	value_t binding;   // global value binding
 	uint32_t id;
-}gensym_t;
+}fl_aligned(8) gensym_t;
 
 typedef struct Builtin Builtin;
 
@@ -97,15 +97,15 @@
 #define ANYARGS -10000
 #define NONNUMERIC (0xff)
 #define valid_numtype(v) ((v) <= T_DOUBLE)
-#define UNBOUND ((value_t)0x1) // an invalid value
+#define UNBOUND ((value_t)1) // an invalid value
 #define TAG_FWD UNBOUND
-#define tag(x) ((x) & 0x7)
-#define ptr(x) ((void*)((x) & (~(value_t)0x7)))
+#define tag(x) ((x) & 7)
+#define ptr(x) ((void*)((uintptr_t)(x) & (~(uintptr_t)7)))
 #define tagptr(p, t) ((value_t)(p) | (t))
 #define fixnum(x) ((value_t)(x)<<2)
 #define numval(x)  ((fixnum_t)(x)>>2)
 #define uintval(x) (((unsigned int)(x))>>3)
-#define builtin(n) tagptr(((int)n<<3), TAG_FUNCTION)
+#define builtin(n) tagptr(((value_t)n<<3), TAG_FUNCTION)
 #define iscons(x) (tag(x) == TAG_CONS)
 #define issymbol(x) (tag(x) == TAG_SYM)
 #define isfixnum(x) (((x)&3) == TAG_NUM)
@@ -175,7 +175,7 @@
 	}while(0)
 #define POP() (FL(stack)[--FL(sp)])
 
-bool isbuiltin(value_t x);
+bool isbuiltin(value_t x) fl_constfn fl_hotfn;
 int fl_init(size_t initial_heapsize);
 int fl_load_system_image(value_t ios);
 
@@ -182,7 +182,7 @@
 _Noreturn void fl_exit(int status);
 
 /* collector */
-value_t relocate(value_t v);
+value_t relocate(value_t v) fl_hotfn;
 void gc(int mustgrow);
 void fl_gc_handle(value_t *pv);
 void fl_free_gc_handles(uint32_t n);
@@ -189,7 +189,7 @@
 
 /* symbol table */
 value_t gensym(void);
-value_t symbol(const char *str, bool copy);
+value_t symbol(const char *str, bool copy) fl_hotfn;
 const char *symbol_name(value_t v);
 
 /* read, eval, print main entry points */
@@ -201,8 +201,8 @@
 value_t fl_cons(value_t a, value_t b);
 value_t fl_list2(value_t a, value_t b);
 value_t fl_listn(size_t n, ...);
-bool fl_is_keyword_name(const char *str, size_t len);
-bool fl_isnumber(value_t v);
+bool fl_is_keyword_name(const char *str, size_t len) fl_purefn fl_hotfn;
+bool fl_isnumber(value_t v) fl_purefn;
 value_t alloc_vector(size_t n, int init);
 
 /* safe casts */
@@ -213,8 +213,8 @@
 double todouble(value_t a);
 
 /* conses */
-value_t mk_cons(void);
-void *alloc_words(uint32_t n);
+value_t mk_cons(void) fl_hotfn;
+void *alloc_words(uint32_t n) fl_hotfn;
 
 char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
 
@@ -251,7 +251,7 @@
 	else \
 		for(l__ca=1; l__ca; l__ca=0, fl_restorestate(&_ctx))
 
-_Noreturn void lerrorf(value_t e, const char *format, ...) __printfmt(2, 3);
+_Noreturn void lerrorf(value_t e, const char *format, ...) fl_printfmt(2, 3);
 void fl_savestate(fl_exception_context_t *_ctx);
 void fl_restorestate(fl_exception_context_t *_ctx);
 _Noreturn void fl_raise(value_t e);
@@ -261,7 +261,7 @@
 
 #define argcount(nargs, c) \
 	do{ \
-		if(__unlikely(nargs != c)) \
+		if(fl_unlikely(nargs != c)) \
 			lerrorf(FL_ArgError, "arity mismatch: wanted %"PRIu32", got %"PRIu32, (uint32_t)c, nargs); \
 	}while(0)
 
@@ -293,12 +293,12 @@
 		value_t parent;	// optional
 		uint8_t _space[1];	// variable size
 	};
-}cvalue_t;
+}fl_aligned(8) cvalue_t;
 
 typedef struct {
 	fltype_t *type;
 	uint8_t _space[];
-}cprim_t;
+}fl_aligned(8) cprim_t;
 
 typedef struct {
 	value_t bcode;
@@ -305,7 +305,7 @@
 	value_t vals;
 	value_t env;
 	value_t name;
-}function_t;
+}fl_aligned(8) function_t;
 
 #define CPRIM_NWORDS 2
 #define cv_class(cv) ((fltype_t*)(((uintptr_t)((cvalue_t*)cv)->type)&~(uintptr_t)3))
@@ -420,7 +420,7 @@
 
 extern
 #if defined(NDEBUG)
-__thread
+fl_thread
 #endif
 Fl *fl;
 
--- a/hashing.h
+++ b/hashing.h
@@ -1,8 +1,8 @@
 #pragma once
 
-value_t nextipow2(value_t i);
-uint32_t int32hash(uint32_t a);
-uint64_t int64hash(uint64_t key);
-uint32_t int64to32hash(uint64_t key);
+value_t nextipow2(value_t i) fl_constfn;
+uint32_t int32hash(uint32_t a) fl_constfn;
+uint64_t int64hash(uint64_t key) fl_constfn;
+uint32_t int64to32hash(uint64_t key) fl_constfn;
 uint64_t memhash(const char* buf, size_t n);
 uint32_t memhash32(const char* buf, size_t n);
--- a/ios.h
+++ b/ios.h
@@ -103,8 +103,8 @@
 
 /* high-level functions - output */
 int ios_pututf8(ios_t *s, Rune r);
-int ios_printf(ios_t *s, const char *format, ...) __printfmt(2, 3);
-int ios_vprintf(ios_t *s, const char *format, va_list args) __printfmt(2, 0);
+int ios_printf(ios_t *s, const char *format, ...) fl_printfmt(2, 3);
+int ios_vprintf(ios_t *s, const char *format, va_list args) fl_printfmt(2, 0);
 
 void hexdump(ios_t *dest, const uint8_t *buffer, size_t len, size_t startoffs);
 
--- a/iostream.c
+++ b/iostream.c
@@ -62,7 +62,7 @@
 ios_t *
 toiostream(value_t v)
 {
-	if(__unlikely(!isiostream(v)))
+	if(fl_unlikely(!isiostream(v)))
 		type_error("iostream", v);
 	return value2c(ios_t*, v);
 }
--- a/macos/platform.h
+++ b/macos/platform.h
@@ -25,24 +25,13 @@
 #include <unistd.h>
 #include <wctype.h>
 #include <wchar.h>
-#include "mem.h"
 
 #define __os_name__ "macos"
 
-#define sadd_overflow __builtin_add_overflow
-#define sadd_overflow_64 __builtin_add_overflow
-#define smul_overflow_64 __builtin_mul_overflow
-
 #define nil NULL
 #define USED(x) ((void)(x))
 #define nelem(x) (int)(sizeof(x)/sizeof((x)[0]))
 
-#ifdef __GNUC__
-#define __unlikely(x) __builtin_expect(!!(x), 0)
-#define __likely(x) __builtin_expect(!!(x), 1)
-#define __printfmt(x, y) __attribute__((format(printf, x, y)))
-#endif
-
 #define PATHSEP '/'
 #define PATHSEPSTRING "/"
 #define PATHLISTSEP ':'
@@ -52,3 +41,6 @@
 #ifndef BYTE_ORDER
 #error unknown byte order
 #endif
+
+#include "cc.h"
+#include "mem.h"
--- a/mem.c
+++ b/mem.c
@@ -5,6 +5,7 @@
 fl_malloc(size_t sz)
 {
 	uint8_t *p = malloc(sz+1+7);
+	assert(p != nil);
 	uint8_t *a = (uint8_t*)(((uintptr_t)p+1+7) & ~(uintptr_t)7);
 	a[-1] = a-p;
 	return a;
@@ -25,10 +26,10 @@
 {
 	uint8_t *a = a_;
 	assert(((uintptr_t)a & 7) == 0);
-	uint8_t *p = a == nil ? nil : a-a[-1];
-	if((p = realloc(p, sz+1+7)) == nil)
-		return nil;
-	a = (void*)(((uintptr_t)p+1+7) & ~(uintptr_t)7);
+	uint8_t *p = a != nil ? a - a[-1] : nil;
+	p = realloc(p, sz+1+7);
+	assert(p != nil);
+	a = (uint8_t*)(((uintptr_t)p+1+7) & ~(uintptr_t)7);
 	a[-1] = a-p;
 	return a;
 }
--- a/meson.build
+++ b/meson.build
@@ -17,7 +17,13 @@
 	#'-Wcast-qual',
 	#'-Wconversion',
 	#'-Wfloat-equal',
+	#'-Wmissing-noreturn',
 	#'-Wsign-conversion',
+	#'-Wsuggest-attribute=const',
+	#'-Wsuggest-attribute=malloc',
+	#'-Wsuggest-attribute=noreturn',
+	#'-Wsuggest-attribute=pure',
+	#'-Wsuggest-attribute=returns_nonnull',
 	'-Waggregate-return',
 	'-Werror=odr',
 	'-Werror=strict-aliasing',
@@ -37,10 +43,10 @@
 	'-Wwrite-strings',
 	'-D_DEFAULT_SOURCE',
 	'-DBLZ_NO_LUT=1', # breaks on m68k - disable for everything
+	'-DCOMPUTED_GOTO',
 	language: 'c',
 )
 
-
 cc = meson.get_compiler('c')
 
 if cc.get_id() == 'clang'
@@ -82,11 +88,6 @@
 			'-DMEM_NEED_ALIGNED',
 			language: 'c',
 		)
-	else
-		add_project_arguments(
-			'-DCOMPUTED_GOTO',
-			language: 'c',
-		)
 	endif
 	extras = [
 		cpp.find_library('RetroConsole', required: true),
@@ -96,7 +97,6 @@
 	add_project_arguments(
 		'-DINITIAL_HEAP_SIZE=8*1024*1024',
 		'-DALLOC_LIMIT_TRIGGER=1*1024*1024',
-		'-DCOMPUTED_GOTO',
 		language: 'c',
 	)
 	extras = []
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -4,7 +4,6 @@
 #include <libc.h>
 #include <ctype.h>
 #include <mp.h>
-#include "mem.h"
 
 #ifdef NDEBUG
 #undef assert
@@ -22,26 +21,19 @@
 #endif
 
 #define __os_name__ "plan9"
-#define __thread
 
-#define __builtin_prefetch(x)
-int __builtin_clz(unsigned int x);
+static int
+fl_popcount(u32int w)
+{
+	w -= (w >> 1) & 0x55555555U;
+	w = (w & 0x33333333U) + ((w >> 2) & 0x33333333U);
+	w = (w + (w >> 4)) & 0x0F0F0F0FU;
+	w = (w * 0x01010101U) >> 24;
+	return w;
+}
 
-/* FIXME(sigrid): s*_overflow_* can be more optimal */
+int fl_clz(unsigned int x);
 
-#define sadd_overflow_64(a, b, c) ( \
-  (b < 1) ? \
-  ((INT64_MAX-(b) <= (a)) ? ((*(c)=(a)+(b)), 0) : 1) : \
-  ((INT64_MAX-(b) >= (a)) ? ((*(c)=(a)+(b)), 0) : 1) \
-)
-
-#define smul_overflow_64(a, b, c) ( \
-	((a)>0 ? ((b)>0 ? (a)>INT64_MAX/(b) : (b)<INT64_MIN/(a)) \
-	       : ((b)>0 ? (a)<INT64_MIN/(b) : ((a)!=0 && (b)<INT64_MAX/(a)))) \
-	? 1 \
-	: ((*(c)=(a)*(b)), 0) \
-)
-
 #if defined(__amd64__) || \
     defined(__arm64__) || \
     defined(__mips64__) || \
@@ -49,14 +41,8 @@
     defined(__sparc64__)
 #define BITS64
 #define PRIdPTR PRId64
-#define sadd_overflow(a, b, c) sadd_overflow_64(a, b, c)
 #else
 #define PRIdPTR "ld"
-#define sadd_overflow(a, b, c) ( \
-  (b < 1) ? \
-  ((INT32_MAX-(b) <= (a)) ? ((*(c)=(a)+(b)), 0) : 1) : \
-  ((INT32_MAX-(b) >= (a)) ? ((*(c)=(a)+(b)), 0) : 1) \
-)
 #endif
 
 #define unsetenv(name) putenv(name, "")
@@ -123,10 +109,6 @@
 #define BYTE_ORDER LITTLE_ENDIAN
 #endif
 
-#define __unlikely(x) (x)
-#define __likely(x) (x)
-#define __printfmt(x, y)
-
 typedef s8int int8_t;
 typedef s16int int16_t;
 typedef s32int int32_t;
@@ -144,3 +126,6 @@
 
 int wcwidth(Rune c);
 int ftruncate(int f, off_t sz);
+
+#include "cc.h"
+#include "mem.h"
--- a/posix/platform.h
+++ b/posix/platform.h
@@ -26,7 +26,6 @@
 #include <unistd.h>
 #include <wctype.h>
 #include <wchar.h>
-#include "mem.h"
 
 #if defined(__linux__)
 #define __os_name__ "linux"
@@ -48,20 +47,10 @@
 #define BITS64
 #endif
 
-#define sadd_overflow __builtin_add_overflow
-#define sadd_overflow_64 __builtin_add_overflow
-#define smul_overflow_64 __builtin_mul_overflow
-
 #define nil NULL
 #define USED(x) ((void)(x))
 #define nelem(x) (int)(sizeof(x)/sizeof((x)[0]))
 
-#ifdef __GNUC__
-#define __unlikely(x) __builtin_expect(!!(x), 0)
-#define __likely(x) __builtin_expect(!!(x), 1)
-#define __printfmt(x, y) __attribute__((format(printf, x, y)))
-#endif
-
 #define PATHSEP '/'
 #define PATHSEPSTRING "/"
 #define PATHLISTSEP ':'
@@ -73,3 +62,6 @@
 #define BIG_ENDIAN __BIG_ENDIAN
 #define BYTE_ORDER __BYTE_ORDER
 #endif
+
+#include "cc.h"
+#include "mem.h"
--- a/read.c
+++ b/read.c
@@ -112,7 +112,7 @@
 	ctx->toktype = TOK_NONE;
 }
 
-static _Noreturn void __printfmt(2, 3)
+static _Noreturn void fl_printfmt(2, 3)
 parse_error(ios_loc_t *loc, const char *format, ...)
 {
 	char msgbuf[512];
--- a/table.h
+++ b/table.h
@@ -1,3 +1,3 @@
-bool ishashtable(value_t v);
-htable_t *totable(value_t v);
+bool ishashtable(value_t v) fl_purefn;
+htable_t *totable(value_t v) fl_purefn;
 void table_init(void);
--- a/utf8.c
+++ b/utf8.c
@@ -89,13 +89,13 @@
 	return ch - offsetsFromUTF8[sz];
 }
 
-int
+bool
 octal_digit(char c)
 {
 	return c >= '0' && c <= '7';
 }
 
-int
+bool
 hex_digit(char c)
 {
 	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
--- a/utf8.h
+++ b/utf8.h
@@ -3,18 +3,18 @@
 /* is c the start of a utf8 sequence? */
 #define isutf(c) (((c)&0xC0) != 0x80)
 
-int u8_iswprint(Rune c);
+int u8_iswprint(Rune c) fl_constfn;
 
 /* byte offset to character number */
-size_t u8_charnum(const char *s, size_t offset);
+size_t u8_charnum(const char *s, size_t offset) fl_purefn;
 
 /* next character without NUL character terminator */
 Rune u8_nextmemchar(const char *s, size_t *i);
 
 /* returns length of next utf-8 sequence */
-size_t u8_seqlen(const char *s);
+size_t u8_seqlen(const char *s) fl_purefn;
 
-char read_escape_control_char(char c);
+char read_escape_control_char(char c) fl_constfn;
 
 /* given a wide character, convert it to an ASCII escape sequence stored in
    buf, where buf is "sz" bytes. returns the number of characters output.
@@ -40,8 +40,8 @@
                  bool escape_quotes, bool ascii);
 
 /* utility predicates used by the above */
-int octal_digit(char c);
-int hex_digit(char c);
+bool octal_digit(char c) fl_constfn;
+bool hex_digit(char c) fl_constfn;
 
 /* same as the above, but searches a buffer of a given size instead of
    a NUL-terminated string. */
@@ -48,10 +48,10 @@
 char *u8_memchr(char *s, Rune ch, size_t sz, size_t *charn);
 
 /* number of columns occupied by a string */
-size_t u8_strwidth(const char *s);
+size_t u8_strwidth(const char *s) fl_purefn;
 
 /* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
-int u8_isvalid(const char *str, int length);
+int u8_isvalid(const char *str, int length) fl_purefn;
 
 /* reverse a UTF-8 string. len is length in bytes. dest and src must both
    be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
--- a/vm.inc
+++ b/vm.inc
@@ -84,7 +84,7 @@
 				}
 			}
 		}
-	}else if(__likely(iscbuiltin(func))){
+	}else if(fl_likely(iscbuiltin(func))){
 		s = FL(sp) - n;
 		v = (((builtin_t*)ptr(func))[3])(&FL(stack)[s], n);
 		FL(sp) = s;
@@ -106,7 +106,7 @@
 	}
 	assert(issymbol(v));
 	sym = (symbol_t*)ptr(v);
-	if(__unlikely(sym->binding == UNBOUND)){
+	if(fl_unlikely(sym->binding == UNBOUND)){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		unbound_error(v);
 	}
@@ -170,9 +170,9 @@
 
 OP(OP_CAR)
 	v = FL(stack)[FL(sp)-1];
-	if(__likely(iscons(v)))
+	if(fl_likely(iscons(v)))
 		v = car_(v);
-	else if(__unlikely(v != FL_nil)){
+	else if(fl_unlikely(v != FL_nil)){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		type_error("cons", v);
 	}
@@ -181,9 +181,9 @@
 
 OP(OP_CDR)
 	v = FL(stack)[FL(sp)-1];
-	if(__likely(iscons(v)))
+	if(fl_likely(iscons(v)))
 		v = cdr_(v);
-	else if(__unlikely(v != FL_nil)){
+	else if(fl_unlikely(v != FL_nil)){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		type_error("cons", v);
 	}
@@ -203,7 +203,7 @@
 	}while(i <= n);
 	POPN(n);
 	PUSH(v);
-	if(__unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)-2))
+	if(fl_unlikely((value_t*)FL(curheap) > (value_t*)FL(lim)-2))
 		gc(0);
 	pv = (value_t*)FL(curheap);
 	FL(curheap) += 4*sizeof(value_t);
@@ -274,7 +274,7 @@
 		e = FL(stack)[FL(sp)-i];
 		isz = tosize(e);
 		if(isvector(v)){
-			if(__unlikely(isz >= vector_size(v)))
+			if(fl_unlikely(isz >= vector_size(v)))
 				bounds_error(v, e);
 			v = vector_elt(v, isz);
 			continue;
@@ -287,7 +287,7 @@
 				break;
 			}
 			v = cdr_(v);
-			if(__unlikely(!iscons(v)))
+			if(fl_unlikely(!iscons(v)))
 				bounds_error(v0, e);
 		}
 	}
@@ -365,7 +365,7 @@
 
 OP(OP_SETCDR)
 	v = FL(stack)[FL(sp)-2];
-	if(__unlikely(!iscons(v))){
+	if(fl_unlikely(!iscons(v))){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		type_error("cons", v);
 	}
@@ -403,15 +403,15 @@
 
 OP(OP_CADR)
 	v = FL(stack)[FL(sp)-1];
-	if(__likely(iscons(v))){
+	if(fl_likely(iscons(v))){
 		v = cdr_(v);
-		if(__likely(iscons(v)))
+		if(fl_likely(iscons(v)))
 			v = car_(v);
 		else
 			goto LABEL(cadr_nil);
 	}else{
 LABEL(cadr_nil):
-		if(__unlikely(v != FL_nil)){
+		if(fl_unlikely(v != FL_nil)){
 			FL(stack)[ipd] = (uintptr_t)ip;
 			type_error("cons", v);
 		}
@@ -540,7 +540,7 @@
 
 OP(OP_SETCAR)
 	v = FL(stack)[FL(sp)-2];
-	if(__unlikely(!iscons(v))){
+	if(fl_unlikely(!iscons(v))){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		type_error("cons", v);
 	}
@@ -729,7 +729,7 @@
 			e = FL(stack)[FL(sp)-i];
 			isz = tosize(e);
 			if(isvector(v)){
-				if(__unlikely(isz >= vector_size(v)))
+				if(fl_unlikely(isz >= vector_size(v)))
 					bounds_error(v, e);
 				v = vector_elt(v, isz);
 				continue;
@@ -742,7 +742,7 @@
 					break;
 				}
 				v = cdr_(v);
-				if(__unlikely(!iscons(v)))
+				if(fl_unlikely(!iscons(v)))
 					bounds_error(v0, e);
 			}
 		}
@@ -751,7 +751,7 @@
 	e = FL(stack)[FL(sp)-2];
 	isz = tosize(e);
 	if(isvector(v)){
-		if(__unlikely(isz >= vector_size(v)))
+		if(fl_unlikely(isz >= vector_size(v)))
 			bounds_error(v, e);
 		vector_elt(v, isz) = (e = FL(stack)[FL(sp)-1]);
 	}else if(iscons(v) || v == FL_nil){
@@ -761,7 +761,7 @@
 				break;
 			}
 			v = cdr_(v);
-			if(__unlikely(!iscons(v)))
+			if(fl_unlikely(!iscons(v)))
 				bounds_error(v0, e);
 		}
 	}else if(isarray(v)){
@@ -865,7 +865,7 @@
 			FL(sp) =  bp+i+5;
 			FL(curr_frame) = FL(sp);
 		}
-	}else if(__unlikely(s < 0)){
+	}else if(fl_unlikely(s < 0)){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		lerrorf(FL_ArgError, "too few arguments");
 	}else{
@@ -892,18 +892,18 @@
 	ip += 4;
 	n = GET_INT32(ip);
 	ip += 4;
-	if(__unlikely(nargs < i)){
+	if(fl_unlikely(nargs < i)){
 		FL(stack)[ipd] = (uintptr_t)ip;
 		lerrorf(FL_ArgError, "too few arguments");
 	}
 	if((int32_t)n > 0){
-		if(__unlikely(nargs > n)){
+		if(fl_unlikely(nargs > n)){
 			FL(stack)[ipd] = (uintptr_t)ip;
 			lerrorf(FL_ArgError, "too many arguments");
 		}
 	}else
 		n = -n;
-	if(__likely(n > nargs)){
+	if(fl_likely(n > nargs)){
 		n -= nargs;
 		FL(sp) += n;
 		FL(stack)[FL(sp)-1] = FL(stack)[FL(sp)-n-1];