ref: 62e5c359d0101a763613f294f4847d3f7c8d012b
parent: 46f2f47b1405c0f644e6d3dd5b8cdf458c458814
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Tue Aug 5 00:34:14 EDT 2008
adding string.inc and string.dec moving string functions to their own file
--- a/femtolisp/Makefile
+++ b/femtolisp/Makefile
@@ -1,7 +1,7 @@
CC = gcc
NAME = flisp
-SRCS = $(NAME).c builtins.c equal.c
+SRCS = $(NAME).c equal.c builtins.c string.c
OBJS = $(SRCS:%.c=%.o)
DOBJS = $(SRCS:%.c=%.do)
EXENAME = $(NAME)
--- a/femtolisp/builtins.c
+++ b/femtolisp/builtins.c
@@ -234,226 +234,6 @@
return v;
}
-int isstring(value_t v)
-{
- return (iscvalue(v) && ((cvalue_t*)ptr(v))->flags.cstring);
-}
-
-value_t fl_intern(value_t *args, u_int32_t nargs)
-{
- argcount("intern", nargs, 1);
- if (!isstring(args[0]))
- type_error("intern", "string", args[0]);
- return symbol(cvalue_data(args[0]));
-}
-
-value_t fl_stringp(value_t *args, u_int32_t nargs)
-{
- argcount("stringp", nargs, 1);
- return isstring(args[0]) ? T : NIL;
-}
-
-value_t fl_string_length(value_t *args, u_int32_t nargs)
-{
- argcount("string.length", nargs, 1);
- if (!isstring(args[0]))
- type_error("string.length", "string", args[0]);
- size_t len = cv_len((cvalue_t*)ptr(args[0]));
- return size_wrap(u8_charnum(cvalue_data(args[0]), len));
-}
-
-value_t fl_string_reverse(value_t *args, u_int32_t nargs)
-{
- argcount("string.reverse", nargs, 1);
- if (!isstring(args[0]))
- type_error("string.reverse", "string", args[0]);
- size_t len = cv_len((cvalue_t*)ptr(args[0]));
- value_t ns = cvalue_string(len);
- u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
- return ns;
-}
-
-value_t fl_string_encode(value_t *args, u_int32_t nargs)
-{
- argcount("string.encode", nargs, 1);
- if (iscvalue(args[0])) {
- cvalue_t *cv = (cvalue_t*)ptr(args[0]);
- value_t t = cv_type(cv);
- if (iscons(t) && car_(t) == arraysym &&
- iscons(cdr_(t)) && car_(cdr_(t)) == wcharsym) {
- size_t nc = cv_len(cv) / sizeof(uint32_t);
- uint32_t *ptr = (uint32_t*)cv_data(cv);
- size_t nbytes = u8_codingsize(ptr, nc);
- value_t str = cvalue_string(nbytes);
- ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
- u8_toutf8(cvalue_data(str), nbytes, ptr, nc);
- return str;
- }
- }
- type_error("string.encode", "wide character array", args[0]);
-}
-
-value_t fl_string_decode(value_t *args, u_int32_t nargs)
-{
- int term=0;
- if (nargs == 2) {
- term = (POP() != NIL);
- nargs--;
- }
- argcount("string.decode", nargs, 1);
- if (!isstring(args[0]))
- type_error("string.decode", "string", args[0]);
- cvalue_t *cv = (cvalue_t*)ptr(args[0]);
- char *ptr = (char*)cv_data(cv);
- size_t nb = cv_len(cv);
- size_t nc = u8_charnum(ptr, nb);
- size_t newsz = nc*sizeof(uint32_t);
- if (term) newsz += sizeof(uint32_t);
- value_t wcstr = cvalue(symbol_value(wcstringtypesym), newsz);
- ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
- uint32_t *pwc = cvalue_data(wcstr);
- u8_toucs(pwc, nc, ptr, nb);
- if (term) pwc[nc] = 0;
- return wcstr;
-}
-
-value_t fl_string(value_t *args, u_int32_t nargs)
-{
- value_t cv, t;
- u_int32_t i;
- size_t len, sz = 0;
- cvalue_t *temp;
- char *data;
- wchar_t wc;
-
- for(i=0; i < nargs; i++) {
- if (issymbol(args[i])) {
- sz += strlen(symbol_name(args[i]));
- continue;
- }
- else if (iscvalue(args[i])) {
- temp = (cvalue_t*)ptr(args[i]);
- t = cv_type(temp);
- if (t == charsym) {
- sz++;
- continue;
- }
- else if (t == wcharsym) {
- wc = *(wchar_t*)cv_data(temp);
- sz += u8_charlen(wc);
- continue;
- }
- else if (temp->flags.cstring) {
- sz += cv_len(temp);
- continue;
- }
- }
- lerror(ArgError, "string: expected string, symbol or character");
- }
- cv = cvalue_string(sz);
- char *ptr = cvalue_data(cv);
- for(i=0; i < nargs; i++) {
- if (issymbol(args[i])) {
- char *name = symbol_name(args[i]);
- while (*name) *ptr++ = *name++;
- }
- else {
- temp = (cvalue_t*)ptr(args[i]);
- t = cv_type(temp);
- data = cvalue_data(args[i]);
- if (t == charsym) {
- *ptr++ = *(char*)data;
- }
- else if (t == wcharsym) {
- ptr += u8_wc_toutf8(ptr, *(wchar_t*)data);
- }
- else {
- len = cv_len(temp);
- memcpy(ptr, data, len);
- ptr += len;
- }
- }
- }
- return cv;
-}
-
-value_t fl_string_split(value_t *args, u_int32_t nargs)
-{
- argcount("string.split", nargs, 2);
- char *s = tostring(args[0], "string.split");
- char *delim = tostring(args[1], "string.split");
- size_t len = cv_len((cvalue_t*)ptr(args[0]));
- size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
- PUSH(NIL);
- size_t ssz, tokend=0, tokstart=0, i=0;
- value_t c=NIL;
- size_t junk;
- do {
- // find and allocate next token
- tokstart = tokend = i;
- while (i < len &&
- !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
- tokend = i;
- ssz = tokend - tokstart;
- PUSH(c); // save previous cons cell
- c = fl_cons(cvalue_string(ssz), NIL);
-
- // we've done allocation; reload movable pointers
- s = cv_data((cvalue_t*)ptr(args[0]));
- delim = cv_data((cvalue_t*)ptr(args[1]));
-
- if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);
-
- // link new cell
- if (Stack[SP-1] == NIL) {
- Stack[SP-2] = c; // first time, save first cons
- (void)POP();
- }
- else {
- ((cons_t*)ptr(POP()))->cdr = c;
- }
-
- // note this tricky condition: if the string ends with a
- // delimiter, we need to go around one more time to add an
- // empty string. this happens when (i==len && tokend<i)
- } while (i < len || (i==len && (tokend!=i)));
- return POP();
-}
-
-value_t fl_string_sub(value_t *args, u_int32_t nargs)
-{
- argcount("string.sub", nargs, 3);
- char *s = tostring(args[0], "string.sub");
- size_t len = cv_len((cvalue_t*)ptr(args[0]));
- size_t i1, i2;
- i1 = toulong(args[1], "string.sub");
- if (i1 > len)
- bounds_error("string.sub", args[0], args[1]);
- i2 = toulong(args[2], "string.sub");
- if (i2 > len)
- bounds_error("string.sub", args[0], args[2]);
- if (i2 <= i1)
- return cvalue_string(0);
- value_t ns = cvalue_string(i2-i1);
- memcpy(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1);
- return ns;
-}
-
-value_t fl_string_char(value_t *args, u_int32_t nargs)
-{
- argcount("string.char", nargs, 2);
- char *s = tostring(args[0], "string.char");
- size_t len = cv_len((cvalue_t*)ptr(args[0]));
- size_t i;
- i = toulong(args[1], "string.char");
- if (i > len)
- bounds_error("string.char", args[0], args[1]);
- size_t sl = u8_seqlen(&s[i]);
- if (sl > len || i > len-sl)
- bounds_error("string.char", args[0], args[1]);
- return char_from_code(u8_nextchar(s, &i));
-}
-
value_t fl_time_now(value_t *args, u_int32_t nargs)
{
argcount("time.now", nargs, 0);
@@ -559,6 +339,8 @@
return mk_double(rand_double());
}
+extern void stringfuncs_init();
+
void builtins_init()
{
set(symbol("set-syntax"), guestfunc(fl_setsyntax));
@@ -572,22 +354,11 @@
set(symbol("read"), guestfunc(fl_read));
set(symbol("load"), guestfunc(fl_load));
set(symbol("exit"), guestfunc(fl_exit));
- set(symbol("intern"), guestfunc(fl_intern));
set(symbol("fixnum"), guestfunc(fl_fixnum));
set(symbol("truncate"), guestfunc(fl_truncate));
set(symbol("vector.alloc"), guestfunc(fl_vector_alloc));
- set(symbol("string"), guestfunc(fl_string));
- set(symbol("stringp"), guestfunc(fl_stringp));
- set(symbol("string.length"), guestfunc(fl_string_length));
- set(symbol("string.split"), guestfunc(fl_string_split));
- set(symbol("string.sub"), guestfunc(fl_string_sub));
- set(symbol("string.char"), guestfunc(fl_string_char));
- set(symbol("string.reverse"), guestfunc(fl_string_reverse));
- set(symbol("string.encode"), guestfunc(fl_string_encode));
- set(symbol("string.decode"), guestfunc(fl_string_decode));
-
set(symbol("time.now"), guestfunc(fl_time_now));
set(symbol("time.string"), guestfunc(fl_time_string));
@@ -600,4 +371,6 @@
set(symbol("os.getenv"), guestfunc(fl_os_getenv));
set(symbol("os.setenv"), guestfunc(fl_os_setenv));
+
+ stringfuncs_init();
}
--- a/femtolisp/cvalues.c
+++ b/femtolisp/cvalues.c
@@ -183,6 +183,11 @@
return v;
}
+int isstring(value_t v)
+{
+ return (iscvalue(v) && ((cvalue_t*)ptr(v))->flags.cstring);
+}
+
// convert to malloc representation (fixed address)
/*
static void cv_pin(cvalue_t *cv)
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -397,7 +397,8 @@
else if (iscvalue(v)) {
return cvalue_relocate(v);
}
- else if (ismanaged(v) && issymbol(v)) {
+ else if (ismanaged(v)) {
+ assert(issymbol(v));
gensym_t *gs = (gensym_t*)ptr(v);
if (gs->id == 0xffffffff)
return gs->binding;
--- /dev/null
+++ b/femtolisp/string.c
@@ -1,0 +1,285 @@
+/*
+ string functions
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <errno.h>
+#include "llt.h"
+#include "flisp.h"
+
+value_t fl_intern(value_t *args, u_int32_t nargs)
+{
+ argcount("intern", nargs, 1);
+ if (!isstring(args[0]))
+ type_error("intern", "string", args[0]);
+ return symbol(cvalue_data(args[0]));
+}
+
+value_t fl_stringp(value_t *args, u_int32_t nargs)
+{
+ argcount("stringp", nargs, 1);
+ return isstring(args[0]) ? T : NIL;
+}
+
+value_t fl_string_length(value_t *args, u_int32_t nargs)
+{
+ argcount("string.length", nargs, 1);
+ if (!isstring(args[0]))
+ type_error("string.length", "string", args[0]);
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ return size_wrap(u8_charnum(cvalue_data(args[0]), len));
+}
+
+value_t fl_string_reverse(value_t *args, u_int32_t nargs)
+{
+ argcount("string.reverse", nargs, 1);
+ if (!isstring(args[0]))
+ type_error("string.reverse", "string", args[0]);
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ value_t ns = cvalue_string(len);
+ u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
+ return ns;
+}
+
+value_t fl_string_encode(value_t *args, u_int32_t nargs)
+{
+ argcount("string.encode", nargs, 1);
+ if (iscvalue(args[0])) {
+ cvalue_t *cv = (cvalue_t*)ptr(args[0]);
+ value_t t = cv_type(cv);
+ if (iscons(t) && car_(t) == arraysym &&
+ iscons(cdr_(t)) && car_(cdr_(t)) == wcharsym) {
+ size_t nc = cv_len(cv) / sizeof(uint32_t);
+ uint32_t *ptr = (uint32_t*)cv_data(cv);
+ size_t nbytes = u8_codingsize(ptr, nc);
+ value_t str = cvalue_string(nbytes);
+ ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
+ u8_toutf8(cvalue_data(str), nbytes, ptr, nc);
+ return str;
+ }
+ }
+ type_error("string.encode", "wide character array", args[0]);
+}
+
+value_t fl_string_decode(value_t *args, u_int32_t nargs)
+{
+ int term=0;
+ if (nargs == 2) {
+ term = (POP() != NIL);
+ nargs--;
+ }
+ argcount("string.decode", nargs, 1);
+ if (!isstring(args[0]))
+ type_error("string.decode", "string", args[0]);
+ cvalue_t *cv = (cvalue_t*)ptr(args[0]);
+ char *ptr = (char*)cv_data(cv);
+ size_t nb = cv_len(cv);
+ size_t nc = u8_charnum(ptr, nb);
+ size_t newsz = nc*sizeof(uint32_t);
+ if (term) newsz += sizeof(uint32_t);
+ value_t wcstr = cvalue(symbol_value(wcstringtypesym), newsz);
+ ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
+ uint32_t *pwc = cvalue_data(wcstr);
+ u8_toucs(pwc, nc, ptr, nb);
+ if (term) pwc[nc] = 0;
+ return wcstr;
+}
+
+value_t fl_string(value_t *args, u_int32_t nargs)
+{
+ value_t cv, t;
+ u_int32_t i;
+ size_t len, sz = 0;
+ cvalue_t *temp;
+ char *data;
+ wchar_t wc;
+
+ for(i=0; i < nargs; i++) {
+ if (issymbol(args[i])) {
+ sz += strlen(symbol_name(args[i]));
+ continue;
+ }
+ else if (iscvalue(args[i])) {
+ temp = (cvalue_t*)ptr(args[i]);
+ t = cv_type(temp);
+ if (t == charsym) {
+ sz++;
+ continue;
+ }
+ else if (t == wcharsym) {
+ wc = *(wchar_t*)cv_data(temp);
+ sz += u8_charlen(wc);
+ continue;
+ }
+ else if (temp->flags.cstring) {
+ sz += cv_len(temp);
+ continue;
+ }
+ }
+ lerror(ArgError, "string: expected string, symbol or character");
+ }
+ cv = cvalue_string(sz);
+ char *ptr = cvalue_data(cv);
+ for(i=0; i < nargs; i++) {
+ if (issymbol(args[i])) {
+ char *name = symbol_name(args[i]);
+ while (*name) *ptr++ = *name++;
+ }
+ else {
+ temp = (cvalue_t*)ptr(args[i]);
+ t = cv_type(temp);
+ data = cvalue_data(args[i]);
+ if (t == charsym) {
+ *ptr++ = *(char*)data;
+ }
+ else if (t == wcharsym) {
+ ptr += u8_wc_toutf8(ptr, *(wchar_t*)data);
+ }
+ else {
+ len = cv_len(temp);
+ memcpy(ptr, data, len);
+ ptr += len;
+ }
+ }
+ }
+ return cv;
+}
+
+value_t fl_string_split(value_t *args, u_int32_t nargs)
+{
+ argcount("string.split", nargs, 2);
+ char *s = tostring(args[0], "string.split");
+ char *delim = tostring(args[1], "string.split");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
+ PUSH(NIL);
+ size_t ssz, tokend=0, tokstart=0, i=0;
+ value_t c=NIL;
+ size_t junk;
+ do {
+ // find and allocate next token
+ tokstart = tokend = i;
+ while (i < len &&
+ !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
+ tokend = i;
+ ssz = tokend - tokstart;
+ PUSH(c); // save previous cons cell
+ c = fl_cons(cvalue_string(ssz), NIL);
+
+ // we've done allocation; reload movable pointers
+ s = cv_data((cvalue_t*)ptr(args[0]));
+ delim = cv_data((cvalue_t*)ptr(args[1]));
+
+ if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);
+
+ // link new cell
+ if (Stack[SP-1] == NIL) {
+ Stack[SP-2] = c; // first time, save first cons
+ (void)POP();
+ }
+ else {
+ ((cons_t*)ptr(POP()))->cdr = c;
+ }
+
+ // note this tricky condition: if the string ends with a
+ // delimiter, we need to go around one more time to add an
+ // empty string. this happens when (i==len && tokend<i)
+ } while (i < len || (i==len && (tokend!=i)));
+ return POP();
+}
+
+value_t fl_string_sub(value_t *args, u_int32_t nargs)
+{
+ argcount("string.sub", nargs, 3);
+ char *s = tostring(args[0], "string.sub");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ size_t i1, i2;
+ i1 = toulong(args[1], "string.sub");
+ if (i1 > len)
+ bounds_error("string.sub", args[0], args[1]);
+ i2 = toulong(args[2], "string.sub");
+ if (i2 > len)
+ bounds_error("string.sub", args[0], args[2]);
+ if (i2 <= i1)
+ return cvalue_string(0);
+ value_t ns = cvalue_string(i2-i1);
+ memcpy(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1);
+ return ns;
+}
+
+value_t fl_string_char(value_t *args, u_int32_t nargs)
+{
+ argcount("string.char", nargs, 2);
+ char *s = tostring(args[0], "string.char");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ size_t i = toulong(args[1], "string.char");
+ if (i >= len)
+ bounds_error("string.char", args[0], args[1]);
+ size_t sl = u8_seqlen(&s[i]);
+ if (sl > len || i > len-sl)
+ bounds_error("string.char", args[0], args[1]);
+ return char_from_code(u8_nextchar(s, &i));
+}
+
+value_t fl_string_inc(value_t *args, u_int32_t nargs)
+{
+ if (nargs < 2 || nargs > 3)
+ argcount("string.inc", nargs, 2);
+ char *s = tostring(args[0], "string.inc");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ size_t i = toulong(args[1], "string.inc");
+ size_t cnt = 1;
+ if (nargs == 3)
+ cnt = toulong(args[2], "string.inc");
+ while (cnt--) {
+ if (i >= len)
+ bounds_error("string.inc", args[0], args[1]);
+ u8_inc(s, &i);
+ }
+ return size_wrap(i);
+}
+
+value_t fl_string_dec(value_t *args, u_int32_t nargs)
+{
+ if (nargs < 2 || nargs > 3)
+ argcount("string.dec", nargs, 2);
+ char *s = tostring(args[0], "string.dec");
+ size_t len = cv_len((cvalue_t*)ptr(args[0]));
+ size_t i = toulong(args[1], "string.dec");
+ size_t cnt = 1;
+ if (nargs == 3)
+ cnt = toulong(args[2], "string.dec");
+ // note: i is allowed to start at index len
+ if (i > len)
+ bounds_error("string.dec", args[0], args[1]);
+ while (cnt--) {
+ if (i == 0)
+ bounds_error("string.dec", args[0], args[1]);
+ u8_dec(s, &i);
+ }
+ return size_wrap(i);
+}
+
+void stringfuncs_init()
+{
+ set(symbol("intern"), guestfunc(fl_intern));
+
+ set(symbol("string"), guestfunc(fl_string));
+ set(symbol("stringp"), guestfunc(fl_stringp));
+ set(symbol("string.length"), guestfunc(fl_string_length));
+ set(symbol("string.split"), guestfunc(fl_string_split));
+ set(symbol("string.sub"), guestfunc(fl_string_sub));
+ set(symbol("string.char"), guestfunc(fl_string_char));
+ set(symbol("string.inc"), guestfunc(fl_string_inc));
+ set(symbol("string.dec"), guestfunc(fl_string_dec));
+ set(symbol("string.reverse"), guestfunc(fl_string_reverse));
+ set(symbol("string.encode"), guestfunc(fl_string_encode));
+ set(symbol("string.decode"), guestfunc(fl_string_decode));
+}
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -768,8 +768,8 @@
String API
*string - append/construct
- string.inc - (string.inc s i [nchars])
- string.dec
+*string.inc - (string.inc s i [nchars])
+*string.dec
string.count - # of chars between 2 byte offsets
string.width - # columns
*string.char - char at byte offset
@@ -798,8 +798,8 @@
stream.copy - (stream.copy to from [nbytes])
stream.copyuntil - (stream.copy to from byte)
stream.flush
- stream.pos
- stream.seek
+ stream.pos - (stream.pos s [set-pos])
+ stream.seek - (stream.seek s offset)
stream.trunc
stream.getc - get utf8 character(s)