ref: 830e1c986ce5347fe7d80fc47ea7973aab0bfcb2
parent: 8e4ba69a7bfc6aa49f0b33ff098869204b1487e1
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Tue Dec 23 23:43:36 EST 2008
renaming 'char' type to 'byte' to avoid confusion wchar will be used for all individual characters adding string.find function fixing bug in #sym(...) if sym was undefined
--- a/femtolisp/cvalues.c
+++ b/femtolisp/cvalues.c
@@ -9,7 +9,7 @@
value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
value_t int64sym, uint64sym;
-value_t longsym, ulongsym, charsym, wcharsym;
+value_t longsym, ulongsym, bytesym, wcharsym;
value_t floatsym, doublesym;
value_t gftypesym, stringtypesym, wcstringtypesym;
value_t emptystringsym;
@@ -25,7 +25,7 @@
static fltype_t *int64type, *uint64type;
static fltype_t *longtype, *ulongtype;
static fltype_t *floattype, *doubletype;
- fltype_t *chartype, *wchartype;
+ fltype_t *bytetype, *wchartype;
fltype_t *stringtype, *wcstringtype;
fltype_t *builtintype;
@@ -231,11 +231,11 @@
}
*/
-#define num_ctor(typenam, cnvt, tag) \
+#define num_ctor(typenam, ctype, cnvt, tag) \
static void cvalue_##typenam##_init(fltype_t *type, value_t arg, \
void *dest) \
{ \
- typenam##_t n=0; \
+ ctype##_t n=0; \
(void)type; \
if (isfixnum(arg)) { \
n = numval(arg); \
@@ -244,7 +244,7 @@
cvalue_t *cv = (cvalue_t*)ptr(arg); \
void *p = cv_data(cv); \
if (valid_numtype(cv_numtype(cv))) \
- n = (typenam##_t)conv_to_##cnvt(p, cv_numtype(cv)); \
+ n = (ctype##_t)conv_to_##cnvt(p, cv_numtype(cv)); \
else \
goto cnvt_error; \
} \
@@ -251,7 +251,7 @@
else { \
goto cnvt_error; \
} \
- *((typenam##_t*)dest) = n; \
+ *((ctype##_t*)dest) = n; \
return; \
cnvt_error: \
type_error(#typenam, "number", arg); \
@@ -259,37 +259,37 @@
value_t cvalue_##typenam(value_t *args, u_int32_t nargs) \
{ \
if (nargs==0) { PUSH(fixnum(0)); args = &Stack[SP-1]; } \
- value_t cv = cvalue(typenam##type, sizeof(typenam##_t)); \
+ value_t cv = cvalue(typenam##type, sizeof(ctype##_t)); \
cvalue_##typenam##_init(typenam##type, \
args[0], &((cvalue_t*)ptr(cv))->_space[0]); \
return cv; \
} \
-value_t mk_##typenam(typenam##_t n) \
+value_t mk_##typenam(ctype##_t n) \
{ \
- value_t cv = cvalue(typenam##type, sizeof(typenam##_t)); \
- *(typenam##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n; \
+ value_t cv = cvalue(typenam##type, sizeof(ctype##_t)); \
+ *(ctype##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n; \
return cv; \
}
-num_ctor(int8, int32, T_INT8)
-num_ctor(uint8, uint32, T_UINT8)
-num_ctor(int16, int32, T_INT16)
-num_ctor(uint16, uint32, T_UINT16)
-num_ctor(int32, int32, T_INT32)
-num_ctor(uint32, uint32, T_UINT32)
-num_ctor(int64, int64, T_INT64)
-num_ctor(uint64, uint64, T_UINT64)
-num_ctor(char, uint32, T_UINT8)
-num_ctor(wchar, int32, T_INT32)
+num_ctor(int8, int8, int32, T_INT8)
+num_ctor(uint8, uint8, uint32, T_UINT8)
+num_ctor(int16, int16, int32, T_INT16)
+num_ctor(uint16, uint16, uint32, T_UINT16)
+num_ctor(int32, int32, int32, T_INT32)
+num_ctor(uint32, uint32, uint32, T_UINT32)
+num_ctor(int64, int64, int64, T_INT64)
+num_ctor(uint64, uint64, uint64, T_UINT64)
+num_ctor(byte, uint8, uint32, T_UINT8)
+num_ctor(wchar, int32, int32, T_INT32)
#ifdef BITS64
-num_ctor(long, int64, T_INT64)
-num_ctor(ulong, uint64, T_UINT64)
+num_ctor(long, long, int64, T_INT64)
+num_ctor(ulong, ulong, uint64, T_UINT64)
#else
-num_ctor(long, int32, T_INT32)
-num_ctor(ulong, uint32, T_UINT32)
+num_ctor(long, long, int32, T_INT32)
+num_ctor(ulong, ulong, uint32, T_UINT32)
#endif
-num_ctor(float, double, T_FLOAT)
-num_ctor(double, double, T_DOUBLE)
+num_ctor(float, float, double, T_FLOAT)
+num_ctor(double, double, double, T_DOUBLE)
value_t size_wrap(size_t sz)
{
@@ -313,14 +313,6 @@
return 0;
}
-value_t char_from_code(uint32_t code)
-{
- value_t ccode = fixnum(code);
- if (code > 0x7f)
- return cvalue_wchar(&ccode, 1);
- return cvalue_char(&ccode, 1);
-}
-
static void cvalue_enum_init(fltype_t *ft, value_t arg, void *dest)
{
int n=0;
@@ -457,7 +449,7 @@
static value_t alloc_array(fltype_t *type, size_t sz)
{
value_t cv;
- if (type->eltype == chartype) {
+ if (type->eltype == bytetype) {
cv = cvalue_string(sz);
}
else {
@@ -556,7 +548,7 @@
// *palign is an output argument giving the alignment required by type
size_t ctype_sizeof(value_t type, int *palign)
{
- if (type == int8sym || type == uint8sym || type == charsym) {
+ if (type == int8sym || type == uint8sym || type == bytesym) {
*palign = 1;
return 1;
}
@@ -672,7 +664,7 @@
{
if (type == int8sym)
return T_INT8;
- else if (type == uint8sym || type == charsym)
+ else if (type == uint8sym || type == bytesym)
return T_UINT8;
else if (type == int16sym)
return T_INT16;
@@ -868,7 +860,7 @@
ctor_cv_intern(uint32);
ctor_cv_intern(int64);
ctor_cv_intern(uint64);
- ctor_cv_intern(char);
+ ctor_cv_intern(byte);
ctor_cv_intern(wchar);
ctor_cv_intern(long);
ctor_cv_intern(ulong);
@@ -890,7 +882,7 @@
// todo: autorelease
stringtypesym = symbol("*string-type*");
- setc(stringtypesym, list2(arraysym, charsym));
+ setc(stringtypesym, list2(arraysym, bytesym));
wcstringtypesym = symbol("*wcstring-type*");
setc(wcstringtypesym, list2(arraysym, wcharsym));
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -904,7 +904,7 @@
v = size_wrap(cvalue_arraylen(Stack[SP-1]));
break;
}
- else if (v == charsym) {
+ else if (v == bytesym) {
v = fixnum(1);
break;
}
--- a/femtolisp/flisp.h
+++ b/femtolisp/flisp.h
@@ -212,7 +212,7 @@
#define cv_type(cv) (cv_class(cv)->type)
#define cv_data(cv) ((cv)->data)
#define cv_numtype(cv) (cv_class(cv)->numtype)
-#define cv_isstr(cv) (cv_class(cv)->eltype == chartype)
+#define cv_isstr(cv) (cv_class(cv)->eltype == bytetype)
#define cvalue_data(v) cv_data((cvalue_t*)ptr(v))
@@ -232,11 +232,11 @@
extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
extern value_t int64sym, uint64sym;
-extern value_t longsym, ulongsym, charsym, ucharsym, wcharsym;
+extern value_t longsym, ulongsym, bytesym, wcharsym;
extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
extern value_t stringtypesym, wcstringtypesym, emptystringsym;
extern value_t unionsym, floatsym, doublesym, builtinsym;
-extern fltype_t *chartype, *wchartype;
+extern fltype_t *bytetype, *wchartype;
extern fltype_t *stringtype, *wcstringtype;
extern fltype_t *builtintype;
@@ -266,9 +266,9 @@
value_t mk_float(float_t n);
value_t mk_uint32(uint32_t n);
value_t mk_uint64(uint64_t n);
+value_t mk_wchar(int32_t n);
value_t return_from_uint64(uint64_t Uaccum);
value_t return_from_int64(int64_t Saccum);
-value_t char_from_code(uint32_t code);
typedef struct {
char *name;
@@ -279,7 +279,7 @@
/* builtins */
value_t fl_hash(value_t *args, u_int32_t nargs);
-value_t cvalue_char(value_t *args, uint32_t nargs);
+value_t cvalue_byte(value_t *args, uint32_t nargs);
value_t cvalue_wchar(value_t *args, uint32_t nargs);
#endif
--- a/femtolisp/print.c
+++ b/femtolisp/print.c
@@ -411,39 +411,19 @@
{
int64_t tmp=0;
- if (type == charsym) {
- // print chars as characters when possible
+ if (type == bytesym) {
unsigned char ch = *(unsigned char*)data;
if (princ)
outc(ch, f);
else if (weak)
- HPOS+=ios_printf(f, "%hhu", ch);
- else if (isprint(ch))
- HPOS+=ios_printf(f, "#\\%c", ch);
+ HPOS+=ios_printf(f, "0x%hhx", ch);
else
- HPOS+=ios_printf(f, "#char(%hhu)", ch);
+ HPOS+=ios_printf(f, "#byte(0x%hhx)", ch);
}
- /*
- else if (type == ucharsym) {
- uchar ch = *(uchar*)data;
- if (princ)
- outc(ch, f);
- else {
- if (!weak)
- ios_printf(f, "#uchar(");
- ios_printf(f, "%hhu", ch);
- if (!weak)
- outs(")", f);
- }
- }
- */
else if (type == wcharsym) {
uint32_t wc = *(uint32_t*)data;
char seq[8];
- if (weak)
- HPOS+=ios_printf(f, "%d", (int)wc);
- else if (princ || (iswprint(wc) && wc>0x7f)) {
- // reader only reads #\c syntax as wchar if the code is >0x7f
+ if (princ || iswprint(wc)) {
size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1);
seq[nb] = '\0';
// TODO: better multibyte handling
@@ -450,6 +430,9 @@
if (!princ) outs("#\\", f);
outs(seq, f);
}
+ else if (weak) {
+ HPOS+=ios_printf(f, "%d", (int)wc);
+ }
else {
HPOS+=ios_printf(f, "#%s(%d)", symbol_name(type), (int)wc);
}
@@ -544,7 +527,7 @@
elsize = ctype_sizeof(eltype, &junk);
cnt = elsize ? len/elsize : 0;
}
- if (eltype == charsym) {
+ if (eltype == bytesym) {
if (princ) {
ios_write(f, data, len);
}
--- a/femtolisp/read.c
+++ b/femtolisp/read.c
@@ -189,13 +189,7 @@
if (ios_getutf8(f, &cval) == IOS_EOF)
lerror(ParseError, "read: end of input in character constant");
toktype = TOK_NUM;
- tokval = fixnum(cval);
- if (cval > 0x7f) {
- tokval = cvalue_wchar(&tokval, 1);
- }
- else {
- tokval = cvalue_char(&tokval, 1);
- }
+ tokval = mk_wchar(cval);
}
else if ((char)ch == '(') {
toktype = TOK_SHARPOPEN;
@@ -501,7 +495,7 @@
PUSH(NIL);
read_list(f, &Stack[SP-1], UNBOUND);
v = POP();
- return apply(symbol_value(sym), v);
+ return apply(toplevel_eval(sym), v);
case TOK_OPENB:
return read_vector(f, label, TOK_CLOSEB);
case TOK_SHARPOPEN:
--- a/femtolisp/string.c
+++ b/femtolisp/string.c
@@ -109,7 +109,7 @@
else if (iscvalue(args[i])) {
temp = (cvalue_t*)ptr(args[i]);
t = cv_type(temp);
- if (t == charsym) {
+ if (t == bytesym) {
sz++;
continue;
}
@@ -136,7 +136,7 @@
temp = (cvalue_t*)ptr(args[i]);
t = cv_type(temp);
data = cvalue_data(args[i]);
- if (t == charsym) {
+ if (t == bytesym) {
*ptr++ = *(char*)data;
}
else if (t == wcharsym) {
@@ -225,9 +225,61 @@
size_t sl = u8_seqlen(&s[i]);
if (sl > len || i > len-sl)
bounds_error("string.char", args[0], args[1]);
- return char_from_code(u8_nextchar(s, &i));
+ return mk_wchar(u8_nextchar(s, &i));
}
+static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
+{
+ char *p = memchr(s+start, c, len-start);
+ if (p == NULL)
+ return NIL;
+ return size_wrap((size_t)(p - s));
+}
+
+value_t fl_string_find(value_t *args, u_int32_t nargs)
+{
+ char cbuf[8];
+ size_t start = 0;
+ if (nargs == 3)
+ start = toulong(args[2], "string.find");
+ else
+ argcount("string.find", nargs, 2);
+ char *s = tostring(args[0], "string.find");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ if (start > len)
+ bounds_error("string.find", args[0], args[2]);
+ char *needle=NULL; size_t needlesz=0;
+ if (!iscvalue(args[1]))
+ type_error("string.find", "string", args[1]);
+ cvalue_t *cv = (cvalue_t*)ptr(args[1]);
+ if (isstring(args[1])) {
+ needlesz = cv_len(cv);
+ needle = (char*)cv_data(cv);
+ }
+ else if (cv_class(cv) == wchartype) {
+ uint32_t c = *(uint32_t*)cv_data(cv);
+ if (c <= 0x7f)
+ return mem_find_byte(s, (char)c, start, len);
+ needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
+ needle = cbuf;
+ }
+ else if (cv_class(cv) == bytetype) {
+ return mem_find_byte(s, *(char*)cv_data(cv), start, len);
+ }
+ if (needlesz == 0)
+ return fixnum(start);
+ if (needlesz > len-start)
+ return NIL;
+ size_t i;
+ for(i=start; i < len; i++) {
+ if (s[i] == needle[0]) {
+ if (!memcmp(&s[i], needle, needlesz))
+ return size_wrap(i);
+ }
+ }
+ return NIL;
+}
+
value_t fl_string_inc(value_t *args, u_int32_t nargs)
{
if (nargs < 2 || nargs > 3)
@@ -274,6 +326,7 @@
{ "string.length", fl_string_length },
{ "string.split", fl_string_split },
{ "string.sub", fl_string_sub },
+ { "string.find", fl_string_find },
{ "string.char", fl_string_char },
{ "string.inc", fl_string_inc },
{ "string.dec", fl_string_dec },
--- a/femtolisp/system.lsp
+++ b/femtolisp/system.lsp
@@ -87,8 +87,8 @@
(define (cadr x) (car (cdr x)))
-;(setq *special-forms* '(quote cond if and or while lambda label trycatch
-; %top progn))
+;(setq *special-forms* '(quote cond if and or while lambda trycatch
+; setq progn))
(defun macroexpand (e)
((label mexpand
@@ -101,8 +101,7 @@
(cond ((and (consp e)
(not (eq (car e) 'quote)))
(let ((newenv
- (if (and (or (eq (car e) 'lambda)
- (eq (car e) 'label))
+ (if (and (eq (car e) 'lambda)
(consp (cdr e)))
(append.2 (cadr e) env)
env)))
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -818,7 +818,8 @@
*string.split - (string.split s sep-chars)
string.trim - (string.trim s chars-at-start chars-at-end)
*string.reverse
- string.find - (string.find s str|char), or nil if not found
+*string.find - (string.find s str|char [offs]), or nil if not found
+ string.rfind
string.map - (string.map f s)
*string.encode - to utf8
*string.decode - from utf8 to UCS
--- a/femtolisp/types.c
+++ b/femtolisp/types.c
@@ -114,7 +114,7 @@
mk_primtype(uint64);
mk_primtype(long);
mk_primtype(ulong);
- mk_primtype(char);
+ mk_primtype(byte);
mk_primtype(wchar);
mk_primtype(float);
mk_primtype(double);
--- a/femtolisp/unittest.lsp
+++ b/femtolisp/unittest.lsp
@@ -64,7 +64,7 @@
(assert (equal (uint64 (double -123)) #uint64(0xffffffffffffff85)))
-(assert (equal (string 'sym #char(65) #wchar(945) "blah") "symA\u03B1blah"))
+(assert (equal (string 'sym #byte(65) #wchar(945) "blah") "symA\u03B1blah"))
; this crashed once
(for 1 10 (lambda (i) 0))