ref: 91d5706eb0db886724e5635c97d1a3be09ca29be
parent: 0fbac08cd028f95c5f5e99e3f3dfd27a2f2858ab
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Dec 31 06:38:01 EST 2017
libc: update various libc functions (from 9front)
--- a/libc/cleanname.c
+++ b/libc/cleanname.c
@@ -9,9 +9,10 @@
cleanname(char *name)
{
char *p, *q, *dotdot;
- int rooted;
+ int rooted, erasedprefix;
rooted = name[0] == '/';
+ erasedprefix = 0;
/*
* invariants:
@@ -24,9 +25,11 @@
while(*p) {
if(p[0] == '/') /* null element */
p++;
- else if(p[0] == '.' && SEP(p[1]))
+ else if(p[0] == '.' && SEP(p[1])) {
+ if(p == name)
+ erasedprefix = 1;
p += 1; /* don't count the separator in case it is nul */
- else if(p[0] == '.' && p[1] == '.' && SEP(p[2])) {
+ } else if(p[0] == '.' && p[1] == '.' && SEP(p[2])) {
p += 2;
if(q > dotdot) { /* can backtrack */
while(--q > dotdot && *q != '/')
@@ -38,6 +41,8 @@
*q++ = '.';
dotdot = q;
}
+ if(q == name)
+ erasedprefix = 1; /* erased entire path via dotdot */
} else { /* real path element */
if(q != name+rooted)
*q++ = '/';
@@ -48,5 +53,11 @@
if(q == name) /* empty string is really ``.'' */
*q++ = '.';
*q = '\0';
+ if(erasedprefix && name[0] == '#'){
+ /* this was not a #x device path originally - make it not one now */
+ memmove(name+2, name, strlen(name)+1);
+ name[0] = '.';
+ name[1] = '/';
+ }
return name;
}
--- a/libc/crypt.c
+++ b/libc/crypt.c
@@ -9,7 +9,6 @@
*/
#include <u.h>
#include <libc.h>
-#include <auth.h>
#include <libsec.h>
/*
--- a/libc/nrand.c
+++ b/libc/nrand.c
@@ -10,6 +10,9 @@
if(n < 0)
return n;
+ if(n == 1)
+ return 0;
+ /* and if n == 0, you deserve what you get */
slop = MASK % n;
do
v = lrand();
--- a/libc/rune.c
+++ b/libc/rune.c
@@ -1,24 +1,29 @@
#include <u.h>
#include <libc.h>
-#define Bit(i) (7-(i))
-/* N 0's preceded by i 1's, T(Bit(2)) is 1100 0000 */
-#define T(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
-/* 0000 0000 0000 0111 1111 1111 */
-#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
-
enum
{
- Bitx = Bit(1),
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
- Tx = T(1), /* 1000 0000 */
- Rune1 = (1<<(Bit(0)+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
- Maskx = (1<<Bitx)-1, /* 0011 1111 */
- Testx = Maskx ^ 0xFF, /* 1100 0000 */
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
- SurrogateMin = 0xD800,
- SurrogateMax = 0xDFFF,
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
Bad = Runeerror,
};
@@ -26,41 +31,67 @@
int
chartorune(Rune *rune, char *str)
{
- int c[UTFmax], i;
- Rune l;
+ int c, c1, c2, c3;
+ long l;
/*
- * N character sequence
+ * one character sequence
* 00000-0007F => T1
- * 00080-007FF => T2 Tx
- * 00800-0FFFF => T3 Tx Tx
- * 10000-10FFFF => T4 Tx Tx Tx
*/
-
- c[0] = *(uchar*)(str);
- if(c[0] < Tx){
- *rune = c[0];
+ c = *(uchar*)str;
+ if(c < Tx) {
+ *rune = c;
return 1;
}
- l = c[0];
- for(i = 1; i < UTFmax; i++) {
- c[i] = *(uchar*)(str+i);
- c[i] ^= Tx;
- if(c[i] & Testx)
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(uchar*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
goto bad;
- l = (l << Bitx) | c[i];
- if(c[0] < T(i + 2)) {
- l &= RuneX(i + 1);
- if(i == 1) {
- if(c[0] < T(2) || l <= Rune1)
- goto bad;
- } else if(l <= RuneX(i) || l > Runemax)
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(uchar*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
goto bad;
- if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+ if(l > Runemax)
goto bad;
*rune = l;
- return i + 1;
+ return 4;
}
}
@@ -75,10 +106,16 @@
int
runetochar(char *str, Rune *rune)
{
- int i, j;
- Rune c;
+ long c;
c = *rune;
+ if(c > Runemax)
+ c = Runeerror;
+
+ /*
+ * one character sequence
+ * 00000-0007F => 00-7F
+ */
if(c <= Rune1) {
str[0] = c;
return 1;
@@ -85,35 +122,35 @@
}
/*
- * one character sequence
- * 00000-0007F => 00-7F
* two character sequence
* 0080-07FF => T2 Tx
+ */
+ if(c <= Rune2) {
+ str[0] = T2 | (c >> 1*Bitx);
+ str[1] = Tx | (c & Maskx);
+ return 2;
+ }
+
+ /*
* three character sequence
* 0800-FFFF => T3 Tx Tx
- * four character sequence (21-bit value)
- * 10000-1FFFFF => T4 Tx Tx Tx
- * If the Rune is out of range or a surrogate half,
- * convert it to the error rune.
- * Do this test when i==3 because the error rune encodes to three bytes.
- * Doing it earlier would duplicate work, since an out of range
- * Rune wouldn't have fit in one or two bytes.
*/
- for(i = 2; i < UTFmax + 1; i++){
- if(i == 3){
- if(c > Runemax)
- c = Runeerror;
- if(SurrogateMin <= c && c <= SurrogateMax)
- c = Runeerror;
- }
- if (c <= RuneX(i) || i == UTFmax ) {
- str[0] = T(i) | (c >> (i - 1)*Bitx);
- for(j = 1; j < i; j++)
- str[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
- return i;
- }
+ if(c <= Rune3) {
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
}
- return UTFmax;
+
+ /*
+ * four character sequence
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ str[0] = T4 | (c >> 3*Bitx);
+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[3] = Tx | (c & Maskx);
+ return 4;
}
int
@@ -120,7 +157,7 @@
runelen(long c)
{
Rune rune;
- char str[10];
+ char str[UTFmax];
rune = c;
return runetochar(str, &rune);
@@ -129,21 +166,21 @@
int
runenlen(Rune *r, int nrune)
{
- int nb, i;
- Rune c;
+ int nb, c;
nb = 0;
while(nrune--) {
c = *r++;
- if(c <= Rune1){
+ if(c <= Rune1)
nb++;
- } else {
- for(i = 2; i < UTFmax + 1; i++)
- if(c <= RuneX(i) || i == UTFmax){
- nb += i;
- break;
- }
- }
+ else
+ if(c <= Rune2)
+ nb += 2;
+ else
+ if(c <= Rune3 || c > Runemax)
+ nb += 3;
+ else
+ nb += 4;
}
return nb;
}
@@ -151,8 +188,7 @@
int
fullrune(char *str, int n)
{
- int i;
- Rune c;
+ int c;
if(n <= 0)
return 0;
@@ -159,8 +195,10 @@
c = *(uchar*)str;
if(c < Tx)
return 1;
- for(i = 3; i < UTFmax + 1; i++)
- if(c < T(i))
- return n >= i - 1;
- return n >= UTFmax;
+ if(c < T3)
+ return n >= 2;
+ if(UTFmax == 3 || c < T4)
+ return n >= 3;
+ return n >= 4;
}
+
--- a/libc/runestrdup.c
+++ b/libc/runestrdup.c
@@ -2,13 +2,13 @@
#include <libc.h>
Rune*
-runestrdup(Rune *s)
-{
+runestrdup(Rune *s)
+{
Rune *ns;
ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
if(ns == 0)
return 0;
-
+ setmalloctag(ns, getcallerpc(&s));
return runestrcpy(ns, s);
}
--- a/libc/runestrecpy.c
+++ b/libc/runestrecpy.c
@@ -9,9 +9,9 @@
while(*s1++ = *s2++){
if(s1 == es1){
- *--s1 = '\0';
+ s1[-1] = '\0';
break;
}
}
- return s1;
+ return s1-1;
}
--- a/libc/runetype.c
+++ b/libc/runetype.c
@@ -211,9 +211,14 @@
{
0x0009, 0x000a, /* tab and newline */
0x0020, 0x0020, /* space */
+ 0x0085, 0x0085,
0x00a0, 0x00a0, /* */
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
0x2000, 0x200b, /* - */
0x2028, 0x2029, /* - */
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
0x3000, 0x3000, /* */
0xfeff, 0xfeff, /* */
};
@@ -611,6 +616,34 @@
0x1ff3, 509, /* ῳ ῼ */
};
+static Rune __isdigitr[] = {
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x1b50, 0x1b59,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff,
+};
+
/*
* upper case ranges
* 3rd col is conversion excess 500
@@ -1132,6 +1165,17 @@
Rune *p;
p = bsearch(c, _space2, nelem(_space2)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+int
+isdigitrune(Rune c)
+{
+ Rune *p;
+
+ p = bsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
if(p && c >= p[0] && c <= p[1])
return 1;
return 0;
--- a/libc/utfecpy.c
+++ b/libc/utfecpy.c
@@ -10,7 +10,7 @@
return to;
end = memccpy(to, from, '\0', e - to);
if(end == nil){
- end = e-1;
+ end = e;
while(end>to && (*--end&0xC0)==0x80)
;
*end = '\0';