shithub: riscv

Download patch

ref: d2753b4d5f877b14426b55554945863364b0fbbf
parent: e0d114547c7f54ebd32b69f922e7d6538a63636e
author: Jacob Moody <moody@posixcafe.org>
date: Sun Mar 26 23:45:32 EDT 2023

tcs: add nfc and nfd output formats

--- a/sys/man/1/tcs
+++ b/sys/man/1/tcs
@@ -144,6 +144,12 @@
 .TP
 .B atari
 Atari-ST character set
+.TP
+.B nfd
+Unicode Normalization Form D
+.TP
+.B nfc
+Unicode Normalization Form C
 .SH EXAMPLES
 .TP
 .B tcs -f 8859-1
--- a/sys/src/cmd/tcs/hdr.h
+++ b/sys/src/cmd/tcs/hdr.h
@@ -23,6 +23,8 @@
 
 void utf_in(int, long *, struct convert *);
 void utf_out(Rune *, int, long *);
+void utfnfc_out(Rune *, int, long *);
+void utfnfd_out(Rune *, int, long *);
 void isoutf_in(int, long *, struct convert *);
 void isoutf_out(Rune *, int, long *);
 
--- a/sys/src/cmd/tcs/tcs.c
+++ b/sys/src/cmd/tcs/tcs.c
@@ -613,6 +613,10 @@
 	{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
 	{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
 	{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
+	{ "nfc", "Unicode Normalization Form C", From|Func, 0, (Fnptr)utf_in },
+	{ "nfc", "Unicode Normalization Form C", Func, 0, (Fnptr)utfnfc_out },
+	{ "nfd", "Unicode Normalization Form D", From|Func, 0, (Fnptr)utf_in },
+	{ "nfd", "Unicode Normalization Form D", Func, 0, (Fnptr)utfnfd_out },
 	{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
 	{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
 	{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
--- a/sys/src/cmd/tcs/utf.c
+++ b/sys/src/cmd/tcs/utf.c
@@ -69,6 +69,46 @@
 }
 
 void
+utfnorm_out(Rune *base, int n, int (*fn)(Rune*,Rune*,int))
+{
+	static Rune rbuf[32];
+	static int nremain = 0;
+	Rune src[N + 1 + nelem(rbuf)];
+	Rune dst[N + 1 + nelem(rbuf)];
+	Rune *p, *p2, *e;
+	int i;
+
+	e = base+n;
+	for(i = 0; i < nremain; i++,n++)
+		src[i] = rbuf[i];
+	nremain = 0;
+	for(p2 = p = base; n > 0;){
+		p2 = fullrunenorm(p, n);
+		if(p == p2)
+			break;
+		n -= p2-p;
+		for(;p < p2; p++)
+			src[i++] = *p;
+	}
+	src[i] = 0;
+	utf_out(dst, fn(dst, src, sizeof dst), nil);
+	for(; p2 < e; p2++)
+		rbuf[nremain++] = *p2;
+}
+
+void
+utfnfc_out(Rune *base, int n, long *)
+{
+	utfnorm_out(base, n, runecomp);
+}
+
+void
+utfnfd_out(Rune *base, int n, long *)
+{
+	utfnorm_out(base, n, runedecomp);
+}
+
+void
 isoutf_in(int fd, long *, struct convert *out)
 {
 	char buf[N];