ref: d4df22aa1e8e2ee8d389be9fb485985c4d80d47e
parent: d0c89e469d1523eae383ff5b2dcf9d3bd758dc16
author: Simon Howard <fraggle@gmail.com>
date: Thu Feb 2 18:35:16 EST 2012
Beginnings of limited textscreen UTF-8 support. Subversion-branch: /trunk/chocolate-doom Subversion-revision: 2487
--- a/textscreen/txt_gui.c
+++ b/textscreen/txt_gui.c
@@ -28,6 +28,51 @@
typedef struct txt_cliparea_s txt_cliparea_t;
+// Mapping table that converts from the Extended ASCII codes in the
+// CP437 codepage to Unicode character numbers.
+
+static const uint16_t cp437_unicode[] = {
+ 0x00c7, 0x00fc, 0x00e9, 0x00e2, // 80-8f
+ 0x00e4, 0x00e0, 0x00e5, 0x00e6,
+ 0x00ea, 0x00eb, 0x00e8, 0x00ef,
+ 0x00ee, 0x00ec, 0x00c4, 0x00c5,
+
+ 0x00c9, 0x00e6, 0x00c6, 0x00f4, // 90-9f
+ 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+ 0x00ff, 0x00d6, 0x00dc, 0x00a2,
+ 0x00a3, 0x00a5, 0x20a7, 0x0192,
+
+ 0x00e1, 0x00ed, 0x00f3, 0x00fa, // a0-af
+ 0x00f1, 0x00d1, 0x00aa, 0x00ba,
+ 0x00bf, 0x2310, 0x00ac, 0x00bd,
+ 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+
+ 0x2591, 0x2592, 0x2593, 0x2502, // b0-bf
+ 0x2524, 0x2561, 0x2562, 0x2556,
+ 0x2555, 0x2563, 0x2551, 0x2557,
+ 0x255D, 0x255C, 0x255B, 0x2510,
+
+ 0x2514, 0x2534, 0x252C, 0x251C, // c0-cf
+ 0x2500, 0x253C, 0x255E, 0x255F,
+ 0x255A, 0x2554, 0x2569, 0x2566,
+ 0x2560, 0x2550, 0x256C, 0x2567,
+
+ 0x2568, 0x2564, 0x2565, 0x2559, // d0-df
+ 0x2558, 0x2552, 0x2553, 0x256B,
+ 0x256A, 0x2518, 0x250C, 0x2588,
+ 0x2584, 0x258C, 0x2590, 0x2580,
+
+ 0x03B1, 0x00DF, 0x0393, 0x03C0, // e0-ef
+ 0x03A3, 0x03C3, 0x00B5, 0x03C4,
+ 0x03A6, 0x0398, 0x03A9, 0x03B4,
+ 0x221E, 0x03C6, 0x03B5, 0x2229,
+
+ 0x2261, 0x00B1, 0x2265, 0x2264, // f0-ff
+ 0x2320, 0x2321, 0x00F7, 0x2248,
+ 0x00B0, 0x2219, 0x00B7, 0x221A,
+ 0x207F, 0x00B2, 0x25A0, 0x00A0,
+};
+
struct txt_cliparea_s
{
int x1, x2;
@@ -242,6 +287,119 @@
{
TXT_GotoXY(x1, y);
TXT_PutChar(*p);
+ }
+
+ x1 += 1;
+ }
+ }
+
+ TXT_GotoXY(x + strlen(s), y);
+}
+
+// Decode UTF-8 character, incrementing *ptr over the decoded bytes.
+
+static unsigned int DecodeUTF8(const char **ptr)
+{
+ const char *p = *ptr;
+ unsigned int c;
+
+ // UTF-8 decode.
+
+ if ((*p & 0x80) == 0) // 1 character (ASCII):
+ {
+ c = *p;
+ *ptr += 1;
+ }
+ else if ((p[0] & 0xe0) == 0xc0 // 2 character:
+ && (p[1] & 0xc0) == 0x80)
+ {
+ c = ((p[0] & 0x1f) << 6)
+ | (p[1] & 0x3f);
+ *ptr += 2;
+ }
+ else if ((p[0] & 0xf0) == 0xe0 // 3 character:
+ && (p[1] & 0xc0) == 0x80
+ && (p[2] & 0xc0) == 0x80)
+ {
+ c = ((p[0] & 0x0f) << 12)
+ | ((p[1] & 0x3f) << 6)
+ | (p[2] & 0x3f);
+ *ptr += 3;
+ }
+ else if ((p[0] & 0xf8) == 0xf0 // 4 character:
+ && (p[1] & 0xc0) == 0x80
+ && (p[2] & 0xc0) == 0x80
+ && (p[3] & 0xc0) == 0x80)
+ {
+ c = ((p[0] & 0x07) << 18)
+ | ((p[1] & 0x3f) << 12)
+ | ((p[2] & 0x3f) << 6)
+ | (p[3] & 0x3f);
+ *ptr += 4;
+ }
+ else
+ {
+ // Decode failure.
+ // Don't bother with 5/6 byte sequences.
+
+ c = 0;
+ }
+
+ return c;
+}
+
+static void PutUnicodeChar(unsigned int c)
+{
+ unsigned int i;
+
+ if (c < 128)
+ {
+ TXT_PutChar(c);
+ return;
+ }
+
+ // We can only display this character if it is in the CP437 codepage.
+
+ for (i = 0; i < 128; ++i)
+ {
+ if (cp437_unicode[i] == c)
+ {
+ TXT_PutChar(128 + i);
+ return;
+ }
+ }
+
+ // Otherwise, print a fallback character (inverted question mark):
+
+ TXT_PutChar('\xa8');
+}
+
+void TXT_DrawUTF8String(const char *s)
+{
+ int x, y;
+ int x1;
+ const char *p;
+ unsigned int c;
+
+ TXT_GetXY(&x, &y);
+
+ if (VALID_Y(y))
+ {
+ x1 = x;
+
+ for (p = s; *p != '\0'; )
+ {
+ c = DecodeUTF8(&p);
+
+ if (c == 0)
+ {
+ break;
+ }
+
+ if (VALID_X(x1))
+ {
+ TXT_GotoXY(x1, y);
+ PutUnicodeChar(c);
}
x1 += 1;