ref: ffd3421cf0fbc4120923ea41f71fc6eb184e258d
parent: abb8ce3878cd18cb37548ee1305b626bcfc09b80
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Sun Oct 27 21:19:01 EDT 2013
libjson: fix for 21 bit runes, implement utf-16 surrogates
--- a/sys/src/libjson/json.c
+++ b/sys/src/libjson/json.c
@@ -7,7 +7,7 @@
enum {TEOF,
- TSTRING = (1<<(8*sizeof(Rune)))+1,
+ TSTRING = Runemax+1,
TNUM,
TNULL,
TFALSE,
@@ -48,9 +48,32 @@
}
static int
+fixsurrogate(Rune *rp, Rune r2)
+{+ Rune r1;
+
+ r1 = *rp;
+ if(r1 >= 0xD800 && r1 <= 0xDBFF){+ if(r2 >= 0xDC00 && r2 <= 0xDFFF){+ *rp = 0x10000 + (((r1 - 0xD800)<<10) | (r2 - 0xDC00));
+ return 0;
+ }
+ return 1;
+ } else
+ if(r1 >= 0xDC00 && r1 <= 0xDFFF){+ if(r2 >= 0xD800 && r2 <= 0xDBFF){+ *rp = 0x10000 + (((r2 - 0xD800)<<10) | (r1 - 0xDC00));
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int
lex(Lex *l)
{- Rune r;
+ Rune r, r2;
char *t;
int i;
char c;
@@ -101,6 +124,7 @@
return 0;
}
if(r == '"'){+ r2 = 0;
t = l->buf;
for(;;){r = getch(l);
@@ -127,10 +151,17 @@
c = getch(l);
r *= 16;
- if(c >= '0' && c <= '9') r += c - '0';
- else if(c >= 'a' && c <= 'f') r += c - 'a' + 10;
- else if(c >= 'A' && c <= 'F') r += c - 'A' + 10;
+ if(c >= '0' && c <= '9')
+ r += c - '0';
+ if(c >= 'a' && c <= 'f')
+ r += c - 'a' + 10;
+ else if(c >= 'A' && c <= 'F')
+ r += c - 'A' + 10;
}
+ if(fixsurrogate(&r, r2)){+ r2 = r;
+ continue;
+ }
break;
case 't':
r = '\t';
@@ -148,6 +179,7 @@
return -1;
}
}
+ r2 = 0;
t += runetochar(t, &r);
if(t >= l->buf + sizeof(l->buf)){ werrstr("json: string too long");--
⑨