ref: dfec09c436a8a328fc97007cf2dc3cd5476cc748
parent: dbfb76673539e5b59dac437c6b2a2159c896731c
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Tue Sep 25 16:14:25 EDT 2018
libc: add utf2idn() and idn2utf() functions to deal with internationalized domain names
--- a/sys/include/libc.h
+++ b/sys/include/libc.h
@@ -527,6 +527,9 @@
extern NetConnInfo* getnetconninfo(char*, int);
extern void freenetconninfo(NetConnInfo*);
+extern char* idn2utf(char*, char*, int);
+extern char* utf2idn(char*, char*, int);
+
/*
* system calls
*
--- /dev/null
+++ b/sys/src/libc/9sys/idn.c
@@ -1,0 +1,262 @@
+#include <u.h>
+#include <libc.h>
+
+enum {
+ base = 36,
+ tmin = 1,
+ tmax = 26,
+ skew = 38,
+ damp = 700,
+ initial_bias = 72,
+ initial_n = 0x80,
+
+ Domlen = 256,
+};
+
+static uint maxint = ~0;
+
+static uint
+decode_digit(uint cp)
+{
+ if((cp - '0') < 10)
+ return cp - ('0' - 26);
+ if((cp - 'A') < 26)
+ return cp - 'A';
+ if((cp - 'a') < 26)
+ return cp - 'a';
+ return base;
+}
+
+static char
+encode_digit(uint d, int flag)
+{
+ if(d < 26)
+ return d + (flag ? 'A' : 'a');
+ return d + ('0' - 26);
+}
+
+static uint
+adapt(uint delta, uint numpoints, int firsttime)
+{
+ uint k;
+
+ delta = firsttime ? delta / damp : delta >> 1;
+ delta += delta / numpoints;
+ for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base)
+ delta /= base - tmin;
+ return k + (base - tmin + 1) * delta / (delta + skew);
+}
+
+static int
+punyencode(uint input_length, Rune input[], uint max_out, char output[])
+{
+ uint n, delta, h, b, out, bias, j, m, q, k, t;
+
+ n = initial_n;
+ delta = out = 0;
+ bias = initial_bias;
+
+ for (j = 0; j < input_length; ++j) {
+ if ((uint)input[j] < 0x80) {
+ if (max_out - out < 2)
+ return -1;
+ output[out++] = input[j];
+ }
+ }
+
+ h = b = out;
+
+ if (b > 0)
+ output[out++] = '-';
+
+ while (h < input_length) {
+ for (m = maxint, j = 0; j < input_length; ++j) {
+ if (input[j] >= n && input[j] < m)
+ m = input[j];
+ }
+
+ if (m - n > (maxint - delta) / (h + 1))
+ return -1;
+
+ delta += (m - n) * (h + 1);
+ n = m;
+
+ for (j = 0; j < input_length; ++j) {
+ if (input[j] < n) {
+ if (++delta == 0)
+ return -1;
+ }
+
+ if (input[j] == n) {
+ for (q = delta, k = base;; k += base) {
+ if (out >= max_out)
+ return -1;
+ if (k <= bias)
+ t = tmin;
+ else if (k >= bias + tmax)
+ t = tmax;
+ else
+ t = k - bias;
+ if (q < t)
+ break;
+ output[out++] = encode_digit(t + (q - t) % (base - t), 0);
+ q = (q - t) / (base - t);
+ }
+ output[out++] = encode_digit(q, isupperrune(input[j]));
+ bias = adapt(delta, h + 1, h == b);
+ delta = 0;
+ ++h;
+ }
+ }
+
+ ++delta, ++n;
+ }
+
+ return (int)out;
+}
+
+static int
+punydecode(uint input_length, char input[], uint max_out, Rune output[])
+{
+ uint n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+
+ n = initial_n;
+ out = i = 0;
+ bias = initial_bias;
+
+ for (b = j = 0; j < input_length; ++j)
+ if (input[j] == '-')
+ b = j;
+
+ if (b > max_out)
+ return -1;
+
+ for (j = 0; j < b; ++j) {
+ if (input[j] & 0x80)
+ return -1;
+ output[out++] = input[j];
+ }
+
+ for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
+ for (oldi = i, w = 1, k = base;; k += base) {
+ if (in >= input_length)
+ return -1;
+ digit = decode_digit(input[in++]);
+ if (digit >= base)
+ return -1;
+ if (digit > (maxint - i) / w)
+ return -1;
+ i += digit * w;
+ if (k <= bias)
+ t = tmin;
+ else if (k >= bias + tmax)
+ t = tmax;
+ else
+ t = k - bias;
+ if (digit < t)
+ break;
+ if (w > maxint / (base - t))
+ return -1;
+ w *= (base - t);
+ }
+
+ bias = adapt(i - oldi, out + 1, oldi == 0);
+
+ if (i / (out + 1) > maxint - n)
+ return -1;
+ n += i / (out + 1);
+ i %= (out + 1);
+
+ if (out >= max_out)
+ return -1;
+
+ memmove(output + i + 1, output + i, (out - i) * sizeof *output);
+ if(((uint)input[in-1] - 'A') < 26)
+ output[i++] = toupperrune(n);
+ else
+ output[i++] = tolowerrune(n);
+ }
+
+ return (int)out;
+}
+
+/*
+ * convert punycode encoded internationalized
+ * domain name to unicode string
+ */
+char*
+idn2utf(char *name, char *buf, int nbuf)
+{
+ char *dp, *de, *cp;
+ Rune rb[Domlen], r;
+ int nc, nr, n;
+
+ cp = name;
+ dp = buf;
+ de = dp+nbuf-1;
+ for(;;){
+ nc = nr = 0;
+ while(cp[nc] != 0){
+ n = chartorune(&r, cp+nc);
+ if(r == '.')
+ break;
+ rb[nr++] = r;
+ nc += n;
+ }
+ if(cistrncmp(cp, "xn--", 4) == 0)
+ if((nr = punydecode(nc-4, cp+4, nelem(rb), rb)) < 0)
+ return nil;
+ dp = seprint(dp, de, "%.*S", nr, rb);
+ if(dp >= de)
+ return nil;
+ if(cp[nc] == 0)
+ break;
+ *dp++ = '.';
+ cp += nc+1;
+ }
+ *dp = 0;
+ return buf;
+}
+
+/*
+ * convert unicode string to punycode
+ * encoded internationalized domain name
+ */
+char*
+utf2idn(char *name, char *buf, int nbuf)
+{
+ char *dp, *de, *cp;
+ Rune rb[Domlen], r;
+ int nc, nr, n;
+
+ dp = buf;
+ de = dp+nbuf-1;
+ cp = name;
+ for(;;){
+ nc = nr = 0;
+ while(cp[nc] != 0 && nr < nelem(rb)){
+ n = chartorune(&r, cp+nc);
+ if(r == '.')
+ break;
+ rb[nr++] = r;
+ nc += n;
+ }
+ if(nc == nr)
+ dp = seprint(dp, de, "%.*s", nc, cp);
+ else {
+ dp = seprint(dp, de, "xn--");
+ if((n = punyencode(nr, rb, de - dp, dp)) < 0)
+ return nil;
+ dp += n;
+ }
+ if(dp >= de)
+ return nil;
+ if(cp[nc] == 0)
+ break;
+ *dp++ = '.';
+ cp += nc+1;
+ }
+ *dp = 0;
+ return buf;
+}
+
--- a/sys/src/libc/9sys/mkfile
+++ b/sys/src/libc/9sys/mkfile
@@ -25,6 +25,7 @@
getpid.$O\
getppid.$O\
getwd.$O\
+ idn.$O\
iounit.$O\
nsec.$O\
nulldir.$O\