ref: 55edec2b8a9cdb40176a8e278a32ade57a6bd092
parent: 5db80edcd16092125f53ec247433eb14919982f9
author: aiju <devnull@localhost>
date: Fri Jun 1 20:02:29 EDT 2018
tcs: support EUC-JP JIS X 0212 codes
--- a/sys/src/cmd/tcs/conv_jis.c
+++ b/sys/src/cmd/tcs/conv_jis.c
@@ -10,6 +10,7 @@
#include "hdr.h"
#include "conv.h"
#include "kuten208.h"
+#include "kuten212.h"
#include "jis.h"
/*
@@ -195,7 +196,7 @@
static void
ujis(int c, Rune **r, long input_loc)
{
- static enum { state0, state1 } state = state0;
+ static enum { state0, state1, state2, state3 } state = state0;
static int lastc;
int n;
long l;
@@ -216,16 +217,12 @@
emit(BADMAP);
return;
}
- if(c == 0x8f){ /* codeset 3 */
- nerrors++;
- if(squawk)
- EPR "%s: unknown codeset 3 near byte %ld in %s\n", argv0, input_loc, file);
- if(!clean)
- emit(BADMAP);
- return;
+ if(c == 0x8f) /* codeset 3 */
+ state = state2;
+ else{
+ lastc = c;
+ state = state1;
}
- lastc = c;
- state = state1;
return;
case state1: /* two part char */
@@ -250,6 +247,56 @@
emit(l);
}
state = state0;
+ return;
+
+ case state2: /* three part char, part #2 */
+ if(c < 0){
+ if(squawk)
+ EPR "%s: unexpected EOF in %s\n", argv0, file);
+ c = 0xA1;
+ }
+ if(c < 0xa1 || c > 0xfe){
+ if(squawk)
+ EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c);
+ state = state0;
+ }else{
+ lastc = c;
+ state = state3;
+ }
+ return;
+
+ case state3: /* three part char, part #3 */
+ if(c < 0){
+ if(squawk)
+ EPR "%s: unexpected EOF in %s\n", argv0, file);
+ c = 0xA1;
+ }
+ if(c < 0xa1 || c > 0xfe){
+ if(squawk)
+ EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c);
+ state = state0;
+ return;
+ }
+
+ n = (lastc&0x7F)*100 + (c&0x7F) - 3232; /* kuten212 */
+ if((n >= KUTEN212MAX) || ((l = tabkuten212[n]) == -1)){
+ nerrors++;
+ if(squawk)
+ EPR "%s: unknown kuten212 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file);
+ if(!clean)
+ emit(BADMAP);
+ } else {
+ if(l < 0){
+ l = -l;
+ if(squawk)
+ EPR "%s: ambiguous kuten212 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file);
+ }
+ emit(l);
+ }
+ state = state0;
+ return;
+
+
}
}
--- a/sys/src/cmd/tcs/mkfile
+++ b/sys/src/cmd/tcs/mkfile
@@ -10,6 +10,7 @@
utf.$O\
html.$O\
kuten208.$O\
+ kuten212.$O\
gb.$O\
gbk.$O\
ksc.$O\