ref: d2753b4d5f877b14426b55554945863364b0fbbf
parent: e0d114547c7f54ebd32b69f922e7d6538a63636e
author: Jacob Moody <moody@posixcafe.org>
date: Sun Mar 26 23:45:32 EDT 2023
tcs: add nfc and nfd output formats
--- a/sys/man/1/tcs
+++ b/sys/man/1/tcs
@@ -144,6 +144,12 @@
.TP
.B atari
Atari-ST character set
+.TP
+.B nfd
+Unicode Normalization Form D
+.TP
+.B nfc
+Unicode Normalization Form C
.SH EXAMPLES
.TP
.B tcs -f 8859-1
--- a/sys/src/cmd/tcs/hdr.h
+++ b/sys/src/cmd/tcs/hdr.h
@@ -23,6 +23,8 @@
void utf_in(int, long *, struct convert *);
void utf_out(Rune *, int, long *);
+void utfnfc_out(Rune *, int, long *);
+void utfnfd_out(Rune *, int, long *);
void isoutf_in(int, long *, struct convert *);
void isoutf_out(Rune *, int, long *);
--- a/sys/src/cmd/tcs/tcs.c
+++ b/sys/src/cmd/tcs/tcs.c
@@ -613,6 +613,10 @@
{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
+ { "nfc", "Unicode Normalization Form C", From|Func, 0, (Fnptr)utf_in },
+ { "nfc", "Unicode Normalization Form C", Func, 0, (Fnptr)utfnfc_out },
+ { "nfd", "Unicode Normalization Form D", From|Func, 0, (Fnptr)utf_in },
+ { "nfd", "Unicode Normalization Form D", Func, 0, (Fnptr)utfnfd_out },
{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
--- a/sys/src/cmd/tcs/utf.c
+++ b/sys/src/cmd/tcs/utf.c
@@ -69,6 +69,46 @@
}
void
+utfnorm_out(Rune *base, int n, int (*fn)(Rune*,Rune*,int))
+{
+ static Rune rbuf[32];
+ static int nremain = 0;
+ Rune src[N + 1 + nelem(rbuf)];
+ Rune dst[N + 1 + nelem(rbuf)];
+ Rune *p, *p2, *e;
+ int i;
+
+ e = base+n;
+ for(i = 0; i < nremain; i++,n++)
+ src[i] = rbuf[i];
+ nremain = 0;
+ for(p2 = p = base; n > 0;){
+ p2 = fullrunenorm(p, n);
+ if(p == p2)
+ break;
+ n -= p2-p;
+ for(;p < p2; p++)
+ src[i++] = *p;
+ }
+ src[i] = 0;
+ utf_out(dst, fn(dst, src, sizeof dst), nil);
+ for(; p2 < e; p2++)
+ rbuf[nremain++] = *p2;
+}
+
+void
+utfnfc_out(Rune *base, int n, long *)
+{
+ utfnorm_out(base, n, runecomp);
+}
+
+void
+utfnfd_out(Rune *base, int n, long *)
+{
+ utfnorm_out(base, n, runedecomp);
+}
+
+void
isoutf_in(int fd, long *, struct convert *out)
{
char buf[N];