shithub: rd

Download patch

ref: 753c11b08d2adde70a23505f683dacfbf92c9e1a
parent: a417c3351cf35b6005ff5324b84caa5a1ec8097a
author: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
date: Tue Mar 31 15:01:00 EDT 2026

Add utf16_test.c with tests for toutf16 and fromutf16

Agent-Logs-Url: https://github.com/yrk-lab/rd/sessions/79734dde-a721-443f-b82b-9cf0d926d284

Co-authored-by: yrk-lab <11537279+yrk-lab@users.noreply.github.com>

--- a/efs_test.c
+++ b/efs_test.c
@@ -13,6 +13,7 @@
 int msgtests(void);
 int mppctests(void);
 int rletests(void);
+int utf16tests(void);
 
 void
 testsannrq()
@@ -130,6 +131,7 @@
 	msgtests();
 	mppctests();
 	rletests();
+	utf16tests();
 	print("ok\n");
 	exits(nil);
 }
--- a/mkfile
+++ b/mkfile
@@ -42,6 +42,7 @@
 	msg_test.$O	x224.$O mcs.$O ele.$O mpas.$O alloc.$O cap.$O egdi.$O \
 	mppc_test.$O	\
 	rle_test.$O	\
+	utf16_test.$O	\
 
 </sys/src/cmd/mkone
 #<$PLAN9/src/mkone
--- /dev/null
+++ b/utf16_test.c
@@ -1,0 +1,234 @@
+#include <u.h>
+#include <libc.h>
+#include "dat.h"
+#include "fns.h"
+
+int utf16tests(void);
+
+static int
+testtoutf161(void)
+{
+	/* Single ASCII character 'A' → UTF-16LE: 41 00 */
+	char s[] = "A";
+	uchar buf[2];
+	int n;
+
+	n = toutf16(buf, sizeof buf, s, 1);
+	if(n != 2)
+		sysfatal("testtoutf161: len: want 2, got %d", n);
+	if(buf[0] != 0x41 || buf[1] != 0x00)
+		sysfatal("testtoutf161: bytes: want 41 00, got %02x %02x",
+			buf[0], buf[1]);
+	return 0;
+}
+
+static int
+testtoutf162(void)
+{
+	/*
+	 * Newline '\n' → CR LF pair in UTF-16LE: 0D 00 0A 00.
+	 * toutf16 inserts a 0D 00 word before each 0A 00 word.
+	 */
+	char s[] = "\n";
+	uchar buf[4];
+	int n;
+
+	n = toutf16(buf, sizeof buf, s, 1);
+	if(n != 4)
+		sysfatal("testtoutf162: len: want 4, got %d", n);
+	if(buf[0]!=0x0D || buf[1]!=0x00 || buf[2]!=0x0A || buf[3]!=0x00)
+		sysfatal("testtoutf162: bytes: want 0D 00 0A 00, got %02x %02x %02x %02x",
+			buf[0], buf[1], buf[2], buf[3]);
+	return 0;
+}
+
+static int
+testtoutf163(void)
+{
+	/* Empty string (ns=0) → 0 bytes written */
+	uchar buf[2];
+	int n;
+
+	n = toutf16(buf, sizeof buf, "", 0);
+	if(n != 0)
+		sysfatal("testtoutf163: len: want 0, got %d", n);
+	return 0;
+}
+
+static int
+testtoutf164(void)
+{
+	/* Buffer too small (1 byte) to hold a UTF-16 unit → 0 bytes written */
+	char s[] = "A";
+	uchar buf[1];
+	int n;
+
+	n = toutf16(buf, sizeof buf, s, 1);
+	if(n != 0)
+		sysfatal("testtoutf164: len: want 0, got %d", n);
+	return 0;
+}
+
+static int
+testtoutf165(void)
+{
+	/*
+	 * Non-ASCII BMP character U+00E9 'é' (UTF-8: C3 A9)
+	 * → UTF-16LE: E9 00
+	 */
+	uchar s[] = {0xC3, 0xA9};
+	uchar buf[2];
+	int n;
+
+	n = toutf16(buf, sizeof buf, (char*)s, 2);
+	if(n != 2)
+		sysfatal("testtoutf165: len: want 2, got %d", n);
+	if(buf[0] != 0xE9 || buf[1] != 0x00)
+		sysfatal("testtoutf165: bytes: want E9 00, got %02x %02x",
+			buf[0], buf[1]);
+	return 0;
+}
+
+static int
+testtoutf166(void)
+{
+	/*
+	 * U+1F600 😀 (UTF-8: F0 9F 98 80) is outside the BMP and encodes
+	 * as a surrogate pair in UTF-16LE: 3D D8 00 DE.
+	 */
+	uchar s[] = {0xF0, 0x9F, 0x98, 0x80};
+	uchar buf[4];
+	int n;
+
+	n = toutf16(buf, sizeof buf, (char*)s, 4);
+	if(n != 4)
+		sysfatal("testtoutf166: len: want 4, got %d", n);
+	if(buf[0]!=0x3D || buf[1]!=0xD8 || buf[2]!=0x00 || buf[3]!=0xDE)
+		sysfatal("testtoutf166: bytes: want 3D D8 00 DE, got %02x %02x %02x %02x",
+			buf[0], buf[1], buf[2], buf[3]);
+	return 0;
+}
+
+static int
+testfromutf161(void)
+{
+	/* UTF-16LE 41 00 → ASCII 'A' */
+	uchar ws[] = {0x41, 0x00};
+	char buf[1];
+	int n;
+
+	n = fromutf16(buf, sizeof buf, ws, 2);
+	if(n != 1)
+		sysfatal("testfromutf161: len: want 1, got %d", n);
+	if(buf[0] != 'A')
+		sysfatal("testfromutf161: byte: want 'A', got %02x", (uchar)buf[0]);
+	return 0;
+}
+
+static int
+testfromutf162(void)
+{
+	/*
+	 * CR LF in UTF-16LE (0D 00 0A 00): fromutf16 discards CR,
+	 * so only '\n' appears in the output.
+	 */
+	uchar ws[] = {0x0D, 0x00, 0x0A, 0x00};
+	char buf[1];
+	int n;
+
+	n = fromutf16(buf, sizeof buf, ws, 4);
+	if(n != 1)
+		sysfatal("testfromutf162: len: want 1, got %d", n);
+	if(buf[0] != '\n')
+		sysfatal("testfromutf162: byte: want '\\n', got %02x", (uchar)buf[0]);
+	return 0;
+}
+
+static int
+testfromutf163(void)
+{
+	/* Empty input (nw=0) → 0 bytes written */
+	uchar ws[1];
+	char buf[4];
+	int n;
+
+	n = fromutf16(buf, sizeof buf, ws, 0);
+	if(n != 0)
+		sysfatal("testfromutf163: len: want 0, got %d", n);
+	return 0;
+}
+
+static int
+testfromutf164(void)
+{
+	/*
+	 * Output buffer too small for all input: 'A' 'B' in UTF-16LE
+	 * (41 00 42 00) with a 1-byte output buffer → only 'A' is written.
+	 */
+	uchar ws[] = {0x41, 0x00, 0x42, 0x00};
+	char buf[1];
+	int n;
+
+	n = fromutf16(buf, sizeof buf, ws, 4);
+	if(n != 1)
+		sysfatal("testfromutf164: len: want 1, got %d", n);
+	if(buf[0] != 'A')
+		sysfatal("testfromutf164: byte: want 'A', got %02x", (uchar)buf[0]);
+	return 0;
+}
+
+static int
+testfromutf165(void)
+{
+	/*
+	 * Non-ASCII BMP character: UTF-16LE E9 00 → U+00E9 'é' (UTF-8: C3 A9)
+	 */
+	uchar ws[] = {0xE9, 0x00};
+	uchar buf[2];
+	int n;
+
+	n = fromutf16((char*)buf, sizeof buf, ws, 2);
+	if(n != 2)
+		sysfatal("testfromutf165: len: want 2, got %d", n);
+	if(buf[0] != 0xC3 || buf[1] != 0xA9)
+		sysfatal("testfromutf165: bytes: want C3 A9, got %02x %02x",
+			buf[0], buf[1]);
+	return 0;
+}
+
+static int
+testfromutf166(void)
+{
+	/*
+	 * Surrogate pair 3D D8 00 DE → U+1F600 😀 (UTF-8: F0 9F 98 80)
+	 */
+	uchar ws[] = {0x3D, 0xD8, 0x00, 0xDE};
+	uchar buf[4];
+	int n;
+
+	n = fromutf16((char*)buf, sizeof buf, ws, 4);
+	if(n != 4)
+		sysfatal("testfromutf166: len: want 4, got %d", n);
+	if(buf[0]!=0xF0 || buf[1]!=0x9F || buf[2]!=0x98 || buf[3]!=0x80)
+		sysfatal("testfromutf166: bytes: want F0 9F 98 80, got %02x %02x %02x %02x",
+			buf[0], buf[1], buf[2], buf[3]);
+	return 0;
+}
+
+int
+utf16tests(void)
+{
+	testtoutf161();
+	testtoutf162();
+	testtoutf163();
+	testtoutf164();
+	testtoutf165();
+	testtoutf166();
+	testfromutf161();
+	testfromutf162();
+	testfromutf163();
+	testfromutf164();
+	testfromutf165();
+	testfromutf166();
+	return 0;
+}
--