ref: c89111f7cb0844696014e6061bc843c3cf315344
parent: 120522c2123c09b68539ae064734ce5000b3fc1e
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Wed Nov 5 23:04:04 EST 2008
refactored escape sequence handling a bit, added error for invalid hex discarding rest of input line after a parse error made compare() do less work for unordered comparison added peekc and purge to ios
--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@@ -30,10 +30,8 @@
ptrhash_put(table, (void*)b, (void*)ca);
}
-// ordered comparison
-
// a is a fixnum, b is a cvalue
-static value_t compare_num_cvalue(value_t a, value_t b)
+static value_t compare_num_cvalue(value_t a, value_t b, int eq)
{
cvalue_t *bcv = (cvalue_t*)ptr(b);
numerictype_t bt;
@@ -42,6 +40,7 @@
void *bptr = cv_data(bcv);
if (cmp_eq(&ia, T_FIXNUM, bptr, bt))
return fixnum(0);
+ if (eq) return fixnum(1);
if (cmp_lt(&ia, T_FIXNUM, bptr, bt))
return fixnum(-1);
}
@@ -51,17 +50,19 @@
return fixnum(1);
}
-static value_t bounded_compare(value_t a, value_t b, int bound);
-static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table);
+static value_t bounded_compare(value_t a, value_t b, int bound, int eq);
+static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq);
-static value_t bounded_vector_compare(value_t a, value_t b, int bound)
+static value_t bounded_vector_compare(value_t a, value_t b, int bound, int eq)
{
size_t la = vector_size(a);
size_t lb = vector_size(b);
size_t m, i;
+ if (eq && (la!=lb)) return fixnum(1);
m = la < lb ? la : lb;
for (i = 0; i < m; i++) {
- value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i), bound-1);
+ value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i),
+ bound-1, eq);
if (d==NIL || numval(d)!=0) return d;
}
if (la < lb) return fixnum(-1);
@@ -71,7 +72,7 @@
// strange comparisons are resolved arbitrarily but consistently.
// ordering: number < builtin < cvalue < vector < symbol < cons
-static value_t bounded_compare(value_t a, value_t b, int bound)
+static value_t bounded_compare(value_t a, value_t b, int bound, int eq)
{
value_t d;
@@ -88,16 +89,17 @@
return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1);
}
if (iscvalue(b)) {
- return compare_num_cvalue(a, b);
+ return compare_num_cvalue(a, b, eq);
}
return fixnum(-1);
case TAG_SYM:
+ if (eq) return fixnum(1);
if (tagb < TAG_SYM) return fixnum(1);
if (tagb > TAG_SYM) return fixnum(-1);
return fixnum(strcmp(symbol_name(a), symbol_name(b)));
case TAG_VECTOR:
if (isvector(b))
- return bounded_vector_compare(a, b, bound);
+ return bounded_vector_compare(a, b, bound, eq);
break;
case TAG_CVALUE:
if (iscvalue(b)) {
@@ -109,6 +111,7 @@
void *bptr = cv_data(bcv);
if (cmp_eq(aptr, at, bptr, bt))
return fixnum(0);
+ if (eq) return fixnum(1);
if (cmp_lt(aptr, at, bptr, bt))
return fixnum(-1);
return fixnum(1);
@@ -116,7 +119,7 @@
return cvalue_compare(a, b);
}
else if (isfixnum(b)) {
- return fixnum(-numval(compare_num_cvalue(b, a)));
+ return fixnum(-numval(compare_num_cvalue(b, a, eq)));
}
break;
case TAG_BUILTIN:
@@ -126,7 +129,7 @@
break;
case TAG_CONS:
if (tagb < TAG_CONS) return fixnum(1);
- d = bounded_compare(car_(a), car_(b), bound-1);
+ d = bounded_compare(car_(a), car_(b), bound-1, eq);
if (d==NIL || numval(d) != 0) return d;
a = cdr_(a); b = cdr_(b);
bound--;
@@ -135,7 +138,8 @@
return (taga < tagb) ? fixnum(-1) : fixnum(1);
}
-static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
+static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table,
+ int eq)
{
size_t la = vector_size(a);
size_t lb = vector_size(b);
@@ -143,12 +147,13 @@
value_t d, xa, xb, ca, cb;
// first try to prove them different with no recursion
+ if (eq && (la!=lb)) return fixnum(1);
m = la < lb ? la : lb;
for (i = 0; i < m; i++) {
xa = vector_elt(a,i);
xb = vector_elt(b,i);
if (leafp(xa) || leafp(xb)) {
- d = bounded_compare(xa, xb, 1);
+ d = bounded_compare(xa, xb, 1, eq);
if (numval(d)!=0) return d;
}
else if (cmptag(xa) < cmptag(xb)) {
@@ -170,7 +175,7 @@
xa = vector_elt(a,i);
xb = vector_elt(b,i);
if (!leafp(xa) && !leafp(xb)) {
- d = cyc_compare(xa, xb, table);
+ d = cyc_compare(xa, xb, table, eq);
if (numval(d)!=0)
return d;
}
@@ -181,7 +186,7 @@
return fixnum(0);
}
-static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
+static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq)
{
if (a==b)
return fixnum(0);
@@ -193,7 +198,7 @@
int tagab = cmptag(ab); int tagdb = cmptag(db);
value_t d, ca, cb;
if (leafp(aa) || leafp(ab)) {
- d = bounded_compare(aa, ab, 1);
+ d = bounded_compare(aa, ab, 1, eq);
if (numval(d)!=0) return d;
}
else if (tagaa < tagab)
@@ -201,7 +206,7 @@
else if (tagaa > tagab)
return fixnum(1);
if (leafp(da) || leafp(db)) {
- d = bounded_compare(da, db, 1);
+ d = bounded_compare(da, db, 1, eq);
if (numval(d)!=0) return d;
}
else if (tagda < tagdb)
@@ -215,9 +220,9 @@
return fixnum(0);
eq_union(table, a, b, ca, cb);
- d = cyc_compare(aa, ab, table);
+ d = cyc_compare(aa, ab, table, eq);
if (numval(d)!=0) return d;
- return cyc_compare(da, db, table);
+ return cyc_compare(da, db, table, eq);
}
else {
return fixnum(1);
@@ -224,9 +229,9 @@
}
}
else if (isvector(a) && isvector(b)) {
- return cyc_vector_compare(a, b, table);
+ return cyc_vector_compare(a, b, table, eq);
}
- return bounded_compare(a, b, 1);
+ return bounded_compare(a, b, 1, eq);
}
static ptrhash_t equal_eq_hashtable;
@@ -235,21 +240,27 @@
ptrhash_new(&equal_eq_hashtable, 512);
}
-value_t compare(value_t a, value_t b)
+// 'eq' means unordered comparison is sufficient
+static value_t compare_(value_t a, value_t b, int eq)
{
- value_t guess = bounded_compare(a, b, 2048);
+ value_t guess = bounded_compare(a, b, 2048, eq);
if (guess == NIL) {
- guess = cyc_compare(a, b, &equal_eq_hashtable);
+ guess = cyc_compare(a, b, &equal_eq_hashtable, eq);
ptrhash_reset(&equal_eq_hashtable, 512);
}
return guess;
}
+value_t compare(value_t a, value_t b)
+{
+ return compare_(a, b, 0);
+}
+
value_t equal(value_t a, value_t b)
{
if (eq_comparable(a, b))
return (a == b) ? T : NIL;
- return (numval(compare(a,b))==0 ? T : NIL);
+ return (numval(compare_(a,b,1))==0 ? T : NIL);
}
/*
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -1509,7 +1509,13 @@
repl:
while (1) {
ios_puts("> ", ios_stdout); ios_flush(ios_stdout);
- v = read_sexpr(ios_stdin);
+ FL_TRY {
+ v = read_sexpr(ios_stdin);
+ }
+ FL_CATCH {
+ ios_purge(ios_stdin);
+ raise(lasterror);
+ }
if (ios_eof(ios_stdin)) break;
print(ios_stdout, v=toplevel_eval(v), 0);
set(symbol("that"), v);
--- a/femtolisp/read.c
+++ b/femtolisp/read.c
@@ -357,7 +357,6 @@
else if ((c=='x' && (ndig=2)) ||
(c=='u' && (ndig=4)) ||
(c=='U' && (ndig=8))) {
- wc = c;
c = ios_getc(f);
while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
eseq[j++] = c;
@@ -366,24 +365,15 @@
if (c!=IOS_EOF) ios_ungetc(c, f);
eseq[j] = '\0';
if (j) wc = strtol(eseq, NULL, 16);
+ else {
+ free(buf);
+ lerror(ParseError, "read: invalid escape sequence");
+ }
i += u8_wc_toutf8(&buf[i], wc);
}
- else if (c == 'n')
- buf[i++] = '\n';
- else if (c == 't')
- buf[i++] = '\t';
- else if (c == 'r')
- buf[i++] = '\r';
- else if (c == 'b')
- buf[i++] = '\b';
- else if (c == 'f')
- buf[i++] = '\f';
- else if (c == 'v')
- buf[i++] = '\v';
- else if (c == 'a')
- buf[i++] = '\a';
- else
- buf[i++] = c;
+ else {
+ buf[i++] = read_escape_control_char((char)c);
+ }
}
else {
buf[i++] = c;
--- a/femtolisp/system.lsp
+++ b/femtolisp/system.lsp
@@ -47,11 +47,10 @@
(defun nconc lsts
(cond ((null lsts) ())
((null (cdr lsts)) (car lsts))
- (T ((lambda (l d) (if (null l) d
- (prog1 l
- (while (consp (cdr l)) (setq l (cdr l)))
- (rplacd l d))))
- (car lsts) (apply nconc (cdr lsts))))))
+ ((null (car lsts)) (apply nconc (cdr lsts)))
+ (T (prog1 (car lsts)
+ (rplacd (last (car lsts))
+ (apply nconc (cdr lsts)))))))
(defun append lsts
(cond ((null lsts) ())
@@ -211,10 +210,21 @@
(defun transpose (M) (apply mapcar (cons list M)))
-(defun filter (pred lst)
- (cond ((null lst) ())
- ((pred (car lst)) (cons (car lst) (filter pred (cdr lst))))
- (T (filter pred (cdr lst)))))
+(defun filter (pred lst) (filter- pred lst nil))
+(defun filter- (pred lst accum)
+ (cond ((null lst) accum)
+ ((pred (car lst))
+ (filter- pred (cdr lst) (cons (car lst) accum)))
+ (T
+ (filter- pred (cdr lst) accum))))
+
+(defun separate (pred lst) (separate- pred lst nil nil))
+(defun separate- (pred lst yes no)
+ (cond ((null lst) (cons yes no))
+ ((pred (car lst))
+ (separate- pred (cdr lst) (cons (car lst) yes) no))
+ (T
+ (separate- pred (cdr lst) yes (cons (car lst) no)))))
(define (foldr f zero lst)
(if (null lst) zero
--- a/femtolisp/table.c
+++ b/femtolisp/table.c
@@ -79,22 +79,30 @@
return NIL;
}
+// (put table key value)
value_t fl_hash_put(value_t *args, u_int32_t nargs)
{
+ argcount("put", nargs, 3);
return NIL;
}
+// (get table key)
value_t fl_hash_get(value_t *args, u_int32_t nargs)
{
+ argcount("get", nargs, 2);
return NIL;
}
+// (has table key)
value_t fl_hash_has(value_t *args, u_int32_t nargs)
{
+ argcount("has", nargs, 2);
return NIL;
}
+// (del table key)
value_t fl_hash_delete(value_t *args, u_int32_t nargs)
{
+ argcount("del", nargs, 2);
return NIL;
}
--- a/femtolisp/test.lsp
+++ b/femtolisp/test.lsp
@@ -43,10 +43,11 @@
(defun sort (l)
(if (or (null l) (null (cdr l))) l
- (let ((piv (car l)))
- (nconc (sort (filter (lambda (x) (<= x piv)) (cdr l)))
+ (let* ((piv (car l))
+ (halves (separate (lambda (x) (< x piv)) (cdr l))))
+ (nconc (sort (car halves))
(list piv)
- (sort (filter (lambda (x) (> x piv)) (cdr l)))))))
+ (sort (cdr halves))))))
(defmacro dotimes (var . body)
(let ((v (car var))
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -928,3 +928,35 @@
- remaining cvalues functions
- special efficient reader for #array
- finish ios
+
+-----------------------------------------------------------------------------
+
+cvalues redesign
+
+goals:
+. allow custom types with vtables
+. use less space, share types more
+. simplify access to important metadata like length
+. unify vectors and arrays
+
+typedef struct {
+ fltype_t *type;
+ void *data;
+ size_t len; // length of *data in bytes
+
+ value_t parent; // optional
+ char data[1]; // variable size
+} cvalue_t;
+
+typedef struct {
+ fltype_t *type;
+ void *data;
+} cprim_t;
+
+typedef struct _fltype_t {
+ value_t type;
+ int numtype;
+ size_t sz;
+ cvtable_t *vtable;
+ struct _fltype_t *eltype; // for arrays
+} fltype_t;
--- a/llt/ios.c
+++ b/llt/ios.c
@@ -715,6 +715,16 @@
return (int)ch;
}
+int ios_peekc(ios_t *s)
+{
+ if (s->bpos < s->size)
+ return s->buf[s->bpos];
+ if (s->_eof) return IOS_EOF;
+ size_t n = ios_readprep(s, 1);
+ if (n == 0) return IOS_EOF;
+ return s->buf[s->bpos];
+}
+
int ios_ungetc(int c, ios_t *s)
{
if (s->state == bst_wr)
@@ -759,6 +769,13 @@
*pwc = u8_nextchar(s->buf, &i);
ios_read(s, buf, sz+1);
return 1;
+}
+
+void ios_purge(ios_t *s)
+{
+ if (s->state == bst_rd) {
+ s->bpos = s->size;
+ }
}
int ios_printf(ios_t *s, char *format, ...)
--- a/llt/ios.h
+++ b/llt/ios.h
@@ -112,6 +112,9 @@
int ios_readline(ios_t *dest, ios_t *s, char delim);
int ios_getline(ios_t *s, char **pbuf, size_t *psz);
+// discard data buffered for reading
+void ios_purge(ios_t *s);
+
// seek by utf8 sequence increments
int ios_nextutf8(ios_t *s);
int ios_prevutf8(ios_t *s);
@@ -121,6 +124,7 @@
int ios_putc(int c, ios_t *s);
//wint_t ios_putwc(ios_t *s, wchar_t wc);
int ios_getc(ios_t *s);
+int ios_peekc(ios_t *s);
//wint_t ios_getwc(ios_t *s);
int ios_ungetc(int c, ios_t *s);
//wint_t ios_ungetwc(ios_t *s, wint_t wc);
--- a/llt/ptrhash.c
+++ b/llt/ptrhash.c
@@ -70,7 +70,7 @@
orig = index;
do {
- if (tab[index] == PH_NOTFOUND) {
+ if (tab[index+1] == PH_NOTFOUND) {
tab[index] = key;
return &tab[index+1];
}
--- a/llt/utf8.c
+++ b/llt/utf8.c
@@ -313,57 +313,57 @@
(c >= 'a' && c <= 'f'));
}
+char read_escape_control_char(char c)
+{
+ if (c == 'n')
+ return '\n';
+ else if (c == 't')
+ return '\t';
+ else if (c == 'r')
+ return '\r';
+ else if (c == 'b')
+ return '\b';
+ else if (c == 'f')
+ return '\f';
+ else if (c == 'v')
+ return '\v';
+ else if (c == 'a')
+ return '\a';
+ return c;
+}
+
/* assumes that src points to the character after a backslash
- returns number of input characters processed */
-int u8_read_escape_sequence(const char *str, u_int32_t *dest)
+ returns number of input characters processed, 0 if error */
+size_t u8_read_escape_sequence(const char *str, size_t ssz, u_int32_t *dest)
{
+ assert(ssz > 0);
u_int32_t ch;
- char digs[9]="\0\0\0\0\0\0\0\0\0";
- int dno=0, i=1;
+ char digs[10];
+ int dno=0, ndig;
+ size_t i=1;
+ char c0 = str[0];
- ch = (u_int32_t)str[0]; /* take literal character */
- if (str[0] == 'n')
- ch = L'\n';
- else if (str[0] == 't')
- ch = L'\t';
- else if (str[0] == 'r')
- ch = L'\r';
- else if (str[0] == 'b')
- ch = L'\b';
- else if (str[0] == 'f')
- ch = L'\f';
- else if (str[0] == 'v')
- ch = L'\v';
- else if (str[0] == 'a')
- ch = L'\a';
- else if (octal_digit(str[0])) {
+ if (octal_digit(c0)) {
i = 0;
do {
digs[dno++] = str[i++];
- } while (octal_digit(str[i]) && dno < 3);
+ } while (i<ssz && octal_digit(str[i]) && dno<3);
+ digs[dno] = '\0';
ch = strtol(digs, NULL, 8);
}
- else if (str[0] == 'x') {
- while (hex_digit(str[i]) && dno < 2) {
+ else if ((c0=='x' && (ndig=2)) ||
+ (c0=='u' && (ndig=4)) ||
+ (c0=='U' && (ndig=8))) {
+ while (i<ssz && hex_digit(str[i]) && dno<ndig) {
digs[dno++] = str[i++];
}
- if (dno > 0)
- ch = strtol(digs, NULL, 16);
+ if (dno == 0) return 0;
+ digs[dno] = '\0';
+ ch = strtol(digs, NULL, 16);
}
- else if (str[0] == 'u') {
- while (hex_digit(str[i]) && dno < 4) {
- digs[dno++] = str[i++];
- }
- if (dno > 0)
- ch = strtol(digs, NULL, 16);
+ else {
+ ch = (u_int32_t)read_escape_control_char(c0);
}
- else if (str[0] == 'U') {
- while (hex_digit(str[i]) && dno < 8) {
- digs[dno++] = str[i++];
- }
- if (dno > 0)
- ch = strtol(digs, NULL, 16);
- }
*dest = ch;
return i;
@@ -381,7 +381,7 @@
while (*src && c < sz) {
if (*src == '\\') {
src++;
- amt = u8_read_escape_sequence(src, &ch);
+ amt = u8_read_escape_sequence(src, 1000, &ch);
}
else {
ch = (u_int32_t)*src;
--- a/llt/utf8.h
+++ b/llt/utf8.h
@@ -55,10 +55,12 @@
/* computes the # of bytes needed to encode a WC string as UTF-8 */
size_t u8_codingsize(u_int32_t *wcstr, size_t n);
+char read_escape_control_char(char c);
+
/* assuming src points to the character after a backslash, read an
escape sequence, storing the result in dest and returning the number of
input characters processed */
-int u8_read_escape_sequence(const char *src, u_int32_t *dest);
+size_t u8_read_escape_sequence(const char *src, size_t ssz, u_int32_t *dest);
/* given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output.