shithub: femtolisp

Download patch

ref: c89111f7cb0844696014e6061bc843c3cf315344
parent: 120522c2123c09b68539ae064734ce5000b3fc1e
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Wed Nov 5 23:04:04 EST 2008

refactored escape sequence handling a bit, added error for invalid hex
discarding rest of input line after a parse error
made compare() do less work for unordered comparison
added peekc and purge to ios


--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@@ -30,10 +30,8 @@
     ptrhash_put(table, (void*)b, (void*)ca);
 }
 
-// ordered comparison
-
 // a is a fixnum, b is a cvalue
-static value_t compare_num_cvalue(value_t a, value_t b)
+static value_t compare_num_cvalue(value_t a, value_t b, int eq)
 {
     cvalue_t *bcv = (cvalue_t*)ptr(b);
     numerictype_t bt;
@@ -42,6 +40,7 @@
         void *bptr = cv_data(bcv);
         if (cmp_eq(&ia, T_FIXNUM, bptr, bt))
             return fixnum(0);
+        if (eq) return fixnum(1);
         if (cmp_lt(&ia, T_FIXNUM, bptr, bt))
             return fixnum(-1);
     }
@@ -51,17 +50,19 @@
     return fixnum(1);
 }
 
-static value_t bounded_compare(value_t a, value_t b, int bound);
-static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table);
+static value_t bounded_compare(value_t a, value_t b, int bound, int eq);
+static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq);
 
-static value_t bounded_vector_compare(value_t a, value_t b, int bound)
+static value_t bounded_vector_compare(value_t a, value_t b, int bound, int eq)
 {
     size_t la = vector_size(a);
     size_t lb = vector_size(b);
     size_t m, i;
+    if (eq && (la!=lb)) return fixnum(1);
     m = la < lb ? la : lb;
     for (i = 0; i < m; i++) {
-        value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i), bound-1);
+        value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i),
+                                    bound-1, eq);
         if (d==NIL || numval(d)!=0) return d;
     }
     if (la < lb) return fixnum(-1);
@@ -71,7 +72,7 @@
 
 // strange comparisons are resolved arbitrarily but consistently.
 // ordering: number < builtin < cvalue < vector < symbol < cons
-static value_t bounded_compare(value_t a, value_t b, int bound)
+static value_t bounded_compare(value_t a, value_t b, int bound, int eq)
 {
     value_t d;
 
@@ -88,16 +89,17 @@
             return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1);
         }
         if (iscvalue(b)) {
-            return compare_num_cvalue(a, b);
+            return compare_num_cvalue(a, b, eq);
         }
         return fixnum(-1);
     case TAG_SYM:
+        if (eq) return fixnum(1);
         if (tagb < TAG_SYM) return fixnum(1);
         if (tagb > TAG_SYM) return fixnum(-1);
         return fixnum(strcmp(symbol_name(a), symbol_name(b)));
     case TAG_VECTOR:
         if (isvector(b))
-            return bounded_vector_compare(a, b, bound);
+            return bounded_vector_compare(a, b, bound, eq);
         break;
     case TAG_CVALUE:
         if (iscvalue(b)) {
@@ -109,6 +111,7 @@
                 void *bptr = cv_data(bcv);
                 if (cmp_eq(aptr, at, bptr, bt))
                     return fixnum(0);
+                if (eq) return fixnum(1);
                 if (cmp_lt(aptr, at, bptr, bt))
                     return fixnum(-1);
                 return fixnum(1);
@@ -116,7 +119,7 @@
             return cvalue_compare(a, b);
         }
         else if (isfixnum(b)) {
-            return fixnum(-numval(compare_num_cvalue(b, a)));
+            return fixnum(-numval(compare_num_cvalue(b, a, eq)));
         }
         break;
     case TAG_BUILTIN:
@@ -126,7 +129,7 @@
         break;
     case TAG_CONS:
         if (tagb < TAG_CONS) return fixnum(1);
-        d = bounded_compare(car_(a), car_(b), bound-1);
+        d = bounded_compare(car_(a), car_(b), bound-1, eq);
         if (d==NIL || numval(d) != 0) return d;
         a = cdr_(a); b = cdr_(b);
         bound--;
@@ -135,7 +138,8 @@
     return (taga < tagb) ? fixnum(-1) : fixnum(1);
 }
 
-static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
+static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table,
+                                  int eq)
 {
     size_t la = vector_size(a);
     size_t lb = vector_size(b);
@@ -143,12 +147,13 @@
     value_t d, xa, xb, ca, cb;
 
     // first try to prove them different with no recursion
+    if (eq && (la!=lb)) return fixnum(1);
     m = la < lb ? la : lb;
     for (i = 0; i < m; i++) {
         xa = vector_elt(a,i);
         xb = vector_elt(b,i);
         if (leafp(xa) || leafp(xb)) {
-            d = bounded_compare(xa, xb, 1);
+            d = bounded_compare(xa, xb, 1, eq);
             if (numval(d)!=0) return d;
         }
         else if (cmptag(xa) < cmptag(xb)) {
@@ -170,7 +175,7 @@
         xa = vector_elt(a,i);
         xb = vector_elt(b,i);
         if (!leafp(xa) && !leafp(xb)) {
-            d = cyc_compare(xa, xb, table);
+            d = cyc_compare(xa, xb, table, eq);
             if (numval(d)!=0)
                 return d;
         }
@@ -181,7 +186,7 @@
     return fixnum(0);
 }
 
-static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
+static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq)
 {
     if (a==b)
         return fixnum(0);
@@ -193,7 +198,7 @@
             int tagab = cmptag(ab); int tagdb = cmptag(db);
             value_t d, ca, cb;
             if (leafp(aa) || leafp(ab)) {
-                d = bounded_compare(aa, ab, 1);
+                d = bounded_compare(aa, ab, 1, eq);
                 if (numval(d)!=0) return d;
             }
             else if (tagaa < tagab)
@@ -201,7 +206,7 @@
             else if (tagaa > tagab)
                 return fixnum(1);
             if (leafp(da) || leafp(db)) {
-                d = bounded_compare(da, db, 1);
+                d = bounded_compare(da, db, 1, eq);
                 if (numval(d)!=0) return d;
             }
             else if (tagda < tagdb)
@@ -215,9 +220,9 @@
                 return fixnum(0);
 
             eq_union(table, a, b, ca, cb);
-            d = cyc_compare(aa, ab, table);
+            d = cyc_compare(aa, ab, table, eq);
             if (numval(d)!=0) return d;
-            return cyc_compare(da, db, table);
+            return cyc_compare(da, db, table, eq);
         }
         else {
             return fixnum(1);
@@ -224,9 +229,9 @@
         }
     }
     else if (isvector(a) && isvector(b)) {
-        return cyc_vector_compare(a, b, table);
+        return cyc_vector_compare(a, b, table, eq);
     }
-    return bounded_compare(a, b, 1);
+    return bounded_compare(a, b, 1, eq);
 }
 
 static ptrhash_t equal_eq_hashtable;
@@ -235,21 +240,27 @@
     ptrhash_new(&equal_eq_hashtable, 512);
 }
 
-value_t compare(value_t a, value_t b)
+// 'eq' means unordered comparison is sufficient
+static value_t compare_(value_t a, value_t b, int eq)
 {
-    value_t guess = bounded_compare(a, b, 2048);
+    value_t guess = bounded_compare(a, b, 2048, eq);
     if (guess == NIL) {
-        guess = cyc_compare(a, b, &equal_eq_hashtable);
+        guess = cyc_compare(a, b, &equal_eq_hashtable, eq);
         ptrhash_reset(&equal_eq_hashtable, 512);
     }
     return guess;
 }
 
+value_t compare(value_t a, value_t b)
+{
+    return compare_(a, b, 0);
+}
+
 value_t equal(value_t a, value_t b)
 {
     if (eq_comparable(a, b))
         return (a == b) ? T : NIL;
-    return (numval(compare(a,b))==0 ? T : NIL);
+    return (numval(compare_(a,b,1))==0 ? T : NIL);
 }
 
 /*
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -1509,7 +1509,13 @@
  repl:
     while (1) {
         ios_puts("> ", ios_stdout); ios_flush(ios_stdout);
-        v = read_sexpr(ios_stdin);
+        FL_TRY {
+            v = read_sexpr(ios_stdin);
+        }
+        FL_CATCH {
+            ios_purge(ios_stdin);
+            raise(lasterror);
+        }
         if (ios_eof(ios_stdin)) break;
         print(ios_stdout, v=toplevel_eval(v), 0);
         set(symbol("that"), v);
--- a/femtolisp/read.c
+++ b/femtolisp/read.c
@@ -357,7 +357,6 @@
             else if ((c=='x' && (ndig=2)) ||
                      (c=='u' && (ndig=4)) ||
                      (c=='U' && (ndig=8))) {
-                wc = c;
                 c = ios_getc(f);
                 while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
                     eseq[j++] = c;
@@ -366,24 +365,15 @@
                 if (c!=IOS_EOF) ios_ungetc(c, f);
                 eseq[j] = '\0';
                 if (j) wc = strtol(eseq, NULL, 16);
+                else {
+                    free(buf);
+                    lerror(ParseError, "read: invalid escape sequence");
+                }
                 i += u8_wc_toutf8(&buf[i], wc);
             }
-            else if (c == 'n')
-                buf[i++] = '\n';
-            else if (c == 't')
-                buf[i++] = '\t';
-            else if (c == 'r')
-                buf[i++] = '\r';
-            else if (c == 'b')
-                buf[i++] = '\b';
-            else if (c == 'f')
-                buf[i++] = '\f';
-            else if (c == 'v')
-                buf[i++] = '\v';
-            else if (c == 'a')
-                buf[i++] = '\a';
-            else
-                buf[i++] = c;
+            else {
+                buf[i++] = read_escape_control_char((char)c);
+            }
         }
         else {
             buf[i++] = c;
--- a/femtolisp/system.lsp
+++ b/femtolisp/system.lsp
@@ -47,11 +47,10 @@
 (defun nconc lsts
   (cond ((null lsts) ())
         ((null (cdr lsts)) (car lsts))
-        (T ((lambda (l d) (if (null l) d
-                            (prog1 l
-                              (while (consp (cdr l)) (setq l (cdr l)))
-                              (rplacd l d))))
-            (car lsts) (apply nconc (cdr lsts))))))
+        ((null (car lsts)) (apply nconc (cdr lsts)))
+        (T (prog1 (car lsts)
+             (rplacd (last (car lsts))
+                     (apply nconc (cdr lsts)))))))
 
 (defun append lsts
   (cond ((null lsts) ())
@@ -211,10 +210,21 @@
 
 (defun transpose (M) (apply mapcar (cons list M)))
 
-(defun filter (pred lst)
-  (cond ((null lst) ())
-        ((pred (car lst)) (cons (car lst) (filter pred (cdr lst))))
-        (T (filter pred (cdr lst)))))
+(defun filter (pred lst) (filter- pred lst nil))
+(defun filter- (pred lst accum)
+  (cond ((null lst) accum)
+        ((pred (car lst))
+         (filter- pred (cdr lst) (cons (car lst) accum)))
+        (T
+         (filter- pred (cdr lst) accum))))
+
+(defun separate (pred lst) (separate- pred lst nil nil))
+(defun separate- (pred lst yes no)
+  (cond ((null lst) (cons yes no))
+        ((pred (car lst))
+         (separate- pred (cdr lst) (cons (car lst) yes) no))
+        (T
+         (separate- pred (cdr lst) yes (cons (car lst) no)))))
 
 (define (foldr f zero lst)
   (if (null lst) zero
--- a/femtolisp/table.c
+++ b/femtolisp/table.c
@@ -79,22 +79,30 @@
     return NIL;
 }
 
+// (put table key value)
 value_t fl_hash_put(value_t *args, u_int32_t nargs)
 {
+    argcount("put", nargs, 3);
     return NIL;
 }
 
+// (get table key)
 value_t fl_hash_get(value_t *args, u_int32_t nargs)
 {
+    argcount("get", nargs, 2);
     return NIL;
 }
 
+// (has table key)
 value_t fl_hash_has(value_t *args, u_int32_t nargs)
 {
+    argcount("has", nargs, 2);
     return NIL;
 }
 
+// (del table key)
 value_t fl_hash_delete(value_t *args, u_int32_t nargs)
 {
+    argcount("del", nargs, 2);
     return NIL;
 }
--- a/femtolisp/test.lsp
+++ b/femtolisp/test.lsp
@@ -43,10 +43,11 @@
 
 (defun sort (l)
   (if (or (null l) (null (cdr l))) l
-    (let ((piv (car l)))
-      (nconc (sort (filter (lambda (x) (<= x piv)) (cdr l)))
+    (let* ((piv (car l))
+           (halves (separate (lambda (x) (< x piv)) (cdr l))))
+      (nconc (sort (car halves))
              (list piv)
-             (sort (filter (lambda (x) (>  x piv)) (cdr l)))))))
+             (sort (cdr halves))))))
 
 (defmacro dotimes (var . body)
   (let ((v   (car var))
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -928,3 +928,35 @@
 - remaining cvalues functions
 - special efficient reader for #array
 - finish ios
+
+-----------------------------------------------------------------------------
+
+cvalues redesign
+
+goals:
+. allow custom types with vtables
+. use less space, share types more
+. simplify access to important metadata like length
+. unify vectors and arrays
+
+typedef struct {
+    fltype_t *type;
+    void *data;
+    size_t len;      // length of *data in bytes
+
+    value_t parent;  // optional
+    char data[1];    // variable size
+} cvalue_t;
+
+typedef struct {
+    fltype_t *type;
+    void *data;
+} cprim_t;
+
+typedef struct _fltype_t {
+    value_t type;
+    int numtype;
+    size_t sz;
+    cvtable_t *vtable;
+    struct _fltype_t *eltype;  // for arrays
+} fltype_t;
--- a/llt/ios.c
+++ b/llt/ios.c
@@ -715,6 +715,16 @@
     return (int)ch;
 }
 
+int ios_peekc(ios_t *s)
+{
+    if (s->bpos < s->size)
+        return s->buf[s->bpos];
+    if (s->_eof) return IOS_EOF;
+    size_t n = ios_readprep(s, 1);
+    if (n == 0)  return IOS_EOF;
+    return s->buf[s->bpos];
+}
+
 int ios_ungetc(int c, ios_t *s)
 {
     if (s->state == bst_wr)
@@ -759,6 +769,13 @@
     *pwc = u8_nextchar(s->buf, &i);
     ios_read(s, buf, sz+1);
     return 1;
+}
+
+void ios_purge(ios_t *s)
+{
+    if (s->state == bst_rd) {
+        s->bpos = s->size;
+    }
 }
 
 int ios_printf(ios_t *s, char *format, ...)
--- a/llt/ios.h
+++ b/llt/ios.h
@@ -112,6 +112,9 @@
 int ios_readline(ios_t *dest, ios_t *s, char delim);
 int ios_getline(ios_t *s, char **pbuf, size_t *psz);
 
+// discard data buffered for reading
+void ios_purge(ios_t *s);
+
 // seek by utf8 sequence increments
 int ios_nextutf8(ios_t *s);
 int ios_prevutf8(ios_t *s);
@@ -121,6 +124,7 @@
 int ios_putc(int c, ios_t *s);
 //wint_t ios_putwc(ios_t *s, wchar_t wc);
 int ios_getc(ios_t *s);
+int ios_peekc(ios_t *s);
 //wint_t ios_getwc(ios_t *s);
 int ios_ungetc(int c, ios_t *s);
 //wint_t ios_ungetwc(ios_t *s, wint_t wc);
--- a/llt/ptrhash.c
+++ b/llt/ptrhash.c
@@ -70,7 +70,7 @@
     orig = index;
 
     do {
-        if (tab[index] == PH_NOTFOUND) {
+        if (tab[index+1] == PH_NOTFOUND) {
             tab[index] = key;
             return &tab[index+1];
         }
--- a/llt/utf8.c
+++ b/llt/utf8.c
@@ -313,57 +313,57 @@
             (c >= 'a' && c <= 'f'));
 }
 
+char read_escape_control_char(char c)
+{
+    if (c == 'n')
+        return '\n';
+    else if (c == 't')
+        return '\t';
+    else if (c == 'r')
+        return '\r';
+    else if (c == 'b')
+        return '\b';
+    else if (c == 'f')
+        return '\f';
+    else if (c == 'v')
+        return '\v';
+    else if (c == 'a')
+        return '\a';
+    return c;
+}
+
 /* assumes that src points to the character after a backslash
-   returns number of input characters processed */
-int u8_read_escape_sequence(const char *str, u_int32_t *dest)
+   returns number of input characters processed, 0 if error */
+size_t u8_read_escape_sequence(const char *str, size_t ssz, u_int32_t *dest)
 {
+    assert(ssz > 0);
     u_int32_t ch;
-    char digs[9]="\0\0\0\0\0\0\0\0\0";
-    int dno=0, i=1;
+    char digs[10];
+    int dno=0, ndig;
+    size_t i=1;
+    char c0 = str[0];
 
-    ch = (u_int32_t)str[0];    /* take literal character */
-    if (str[0] == 'n')
-        ch = L'\n';
-    else if (str[0] == 't')
-        ch = L'\t';
-    else if (str[0] == 'r')
-        ch = L'\r';
-    else if (str[0] == 'b')
-        ch = L'\b';
-    else if (str[0] == 'f')
-        ch = L'\f';
-    else if (str[0] == 'v')
-        ch = L'\v';
-    else if (str[0] == 'a')
-        ch = L'\a';
-    else if (octal_digit(str[0])) {
+    if (octal_digit(c0)) {
         i = 0;
         do {
             digs[dno++] = str[i++];
-        } while (octal_digit(str[i]) && dno < 3);
+        } while (i<ssz && octal_digit(str[i]) && dno<3);
+        digs[dno] = '\0';
         ch = strtol(digs, NULL, 8);
     }
-    else if (str[0] == 'x') {
-        while (hex_digit(str[i]) && dno < 2) {
+    else if ((c0=='x' && (ndig=2)) ||
+             (c0=='u' && (ndig=4)) ||
+             (c0=='U' && (ndig=8))) {
+        while (i<ssz && hex_digit(str[i]) && dno<ndig) {
             digs[dno++] = str[i++];
         }
-        if (dno > 0)
-            ch = strtol(digs, NULL, 16);
+        if (dno == 0) return 0;
+        digs[dno] = '\0';
+        ch = strtol(digs, NULL, 16);
     }
-    else if (str[0] == 'u') {
-        while (hex_digit(str[i]) && dno < 4) {
-            digs[dno++] = str[i++];
-        }
-        if (dno > 0)
-            ch = strtol(digs, NULL, 16);
+    else {
+        ch = (u_int32_t)read_escape_control_char(c0);
     }
-    else if (str[0] == 'U') {
-        while (hex_digit(str[i]) && dno < 8) {
-            digs[dno++] = str[i++];
-        }
-        if (dno > 0)
-            ch = strtol(digs, NULL, 16);
-    }
     *dest = ch;
 
     return i;
@@ -381,7 +381,7 @@
     while (*src && c < sz) {
         if (*src == '\\') {
             src++;
-            amt = u8_read_escape_sequence(src, &ch);
+            amt = u8_read_escape_sequence(src, 1000, &ch);
         }
         else {
             ch = (u_int32_t)*src;
--- a/llt/utf8.h
+++ b/llt/utf8.h
@@ -55,10 +55,12 @@
 /* computes the # of bytes needed to encode a WC string as UTF-8 */
 size_t u8_codingsize(u_int32_t *wcstr, size_t n);
 
+char read_escape_control_char(char c);
+
 /* assuming src points to the character after a backslash, read an
    escape sequence, storing the result in dest and returning the number of
    input characters processed */
-int u8_read_escape_sequence(const char *src, u_int32_t *dest);
+size_t u8_read_escape_sequence(const char *src, size_t ssz, u_int32_t *dest);
 
 /* given a wide character, convert it to an ASCII escape sequence stored in
    buf, where buf is "sz" bytes. returns the number of characters output.