shithub: misc

ref: 001e60ea4b45fec9cb40ab8fbbebc334b4d3e9f4
dir: /9legacy/9front_libsec.diff/

View raw version
--- /sys/src/libc/9sys/pushtls.c	Thu Jul 15 09:41:49 2021
+++ /sys/src/libc/9sys/pushtls.c	Mon Jul 12 10:56:52 2021
@@ -42,14 +42,14 @@
 pushtls(int fd, char *hashalg, char *encalg, int isclient, char *secret, char *dir)
 {
 	char buf[8];
-	char dname[64];
+	char dname[32];
 	int n, data, ctl, hand;
 
 	// open a new filter; get ctl fd
 	data = hand = -1;
 	// /net/tls uses decimal file descriptors to name channels, hence a
 	// user-level file server can't stand in for #a; may as well hard-code it.
-	ctl = open("#a/tls/clone", ORDWR);
+	ctl = open("#a/tls/clone", ORDWR|OCEXEC);
 	if(ctl < 0)
 		goto error;
 	n = read(ctl, buf, sizeof(buf)-1);
@@ -60,14 +60,14 @@
 		sprint(dir, "#a/tls/%s", buf);
 
 	// get application fd
-	snprint(dname, sizeof dname, "#a/tls/%s/data", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/data", buf);
 	data = open(dname, ORDWR);
 	if(data < 0)
 		goto error;
 
 	// get handshake fd
-	snprint(dname, sizeof dname, "#a/tls/%s/hand", buf);
-	hand = open(dname, ORDWR);
+	snprint(dname, sizeof(dname), "#a/tls/%s/hand", buf);
+	hand = open(dname, ORDWR|OCEXEC);
 	if(hand < 0)
 		goto error;
 
--- sys/src/ape/lib/mp/port/libc.h	Thu Jul  1 17:29:16 2021
+++ /sys/src/ape/lib/mp/port/libc.h	Tue Jun 29 01:46:14 2021
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <utf.h>
 #include <fmt.h>
+#include <assert.h>
 #ifndef _SUSV2_SOURCE
 #define _SUSV2_SOURCE
 #include <inttypes.h>
--- sys/src/ape/lib/9/mkfile	Wed May 29 01:18:14 2013
+++ /sys/src/ape/lib/9/mkfile	Tue Jun 29 23:03:17 2021
@@ -14,6 +14,7 @@
 	segflush.$O\
 	segfree.$O\
 	setmalloctag.$O\
+	setrealloctag.$O\
 	sysfatal.$O\
 	tokenize.$O\
 	truerand.$O\
--- sys/src/ape/lib/9/libc.h	Thu Jul  1 17:29:16 2021
+++ /sys/src/ape/lib/9/libc.h	Tue Jun 29 23:29:25 2021
@@ -145,7 +145,8 @@
 
 #define ERRMAX 128
 
-extern	void	setmalloctag(void*, ulong);
+extern	void		setmalloctag(void*, ulong);
+extern	void		setrealloctag(void*, uintptr_t);
 extern	uintptr_t getcallerpc(void*);
 
 /* Used in libsec.h and not picked up in earlier type definitions */
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/ape/lib/9/setrealloctag.c	Tue Jun 29 23:29:29 2021
@@ -0,0 +1,12 @@
+#ifndef _SUSV2_SOURCE
+#define _SUSV2_SOURCE
+#include <inttypes.h>
+#undef	_SUSV2_SOURCE
+#else
+#include <inttypes.h>
+#endif
+
+void
+setrealloctag(void*, uintptr_t)
+{
+}
--- sys/src/ape/lib/sec/port/libc.h	Thu Sep 19 23:24:49 2013
+++ /sys/src/ape/lib/sec/port/libc.h	Tue Jun 29 23:31:40 2021
@@ -2,6 +2,14 @@
 #define _QLOCK_EXTENSION
 #define _BSD_EXTENSION
 #include <sys/types.h>
+#ifndef _SUSV2_SOURCE
+#define _SUSV2_SOURCE
+#include <inttypes.h>
+#undef	_SUSV2_SOURCE
+#else
+#include <inttypes.h>
+#endif
+
 #include <lock.h>
 #include <qlock.h>
 #include <lib9.h>
@@ -170,6 +178,7 @@
 #define ERRMAX 128
 
 extern	void	setmalloctag(void*, ulong);
+extern	void	setrealloctag(void*, uintptr_t);
 extern	ulong	getcallerpc(void*);
 
 /* Used in libsec.h and not picked up in earlier type definitions */
--- sys/include/ape/libsec.h	Wed Jan 30 22:18:23 2013
+++ /sys/include/ape/libsec.h	Mon Jul 12 11:08:00 2021
@@ -26,28 +26,47 @@
 struct AESstate
 {
 	ulong	setup;
+	ulong	offset;
 	int	rounds;
 	int	keybytes;
 	uint	ctrsz;
+	void	*ekey;				/* expanded encryption round key */
+	void	*dkey;				/* expanded decryption round key */
 	uchar	key[AESmaxkey];			/* unexpanded key */
-	ulong	ekey[4*(AESmaxrounds + 1)];	/* encryption key */
-	ulong	dkey[4*(AESmaxrounds + 1)];	/* decryption key */
 	uchar	ivec[AESbsize];			/* initialization vector */
-	uchar	mackey[3 * AESbsize];		/* 3 XCBC mac 96 keys */
+	uchar	storage[512];			/* storage for expanded keys */
 };
 
 /* block ciphers */
-void	aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
-void	aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
+extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
+extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
 
-void	setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
+void	setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
+
+void	aesCTRencrypt(uchar *p, int len, AESstate *s);
+void	aesCTRdecrypt(uchar *p, int len, AESstate *s);
 void	aesCBCencrypt(uchar *p, int len, AESstate *s);
 void	aesCBCdecrypt(uchar *p, int len, AESstate *s);
-void	aesCTRdecrypt(uchar *p, int len, AESstate *s);
-void	aesCTRencrypt(uchar *p, int len, AESstate *s);
+void	aesCFBencrypt(uchar *p, int len, AESstate *s);
+void	aesCFBdecrypt(uchar *p, int len, AESstate *s);
+void	aesOFBencrypt(uchar *p, int len, AESstate *s);
+
+void	aes_xts_encrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len);
+void	aes_xts_decrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len);
+
+typedef struct AESGCMstate AESGCMstate;
+struct AESGCMstate
+{
+	AESstate;
+
+	ulong	H[4];
+	ulong	M[16][256][4];
+};
 
-void	setupAESXCBCstate(AESstate *s);
-uchar*	aesXCBCmac(uchar *p, int len, AESstate *s);
+void	setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen);
+void	aesgcm_setiv(AESGCMstate *s, uchar *iv, int ivlen);
+void	aesgcm_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s);
+int	aesgcm_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s);
 
 /*
  * Blowfish Definitions
@@ -79,6 +98,76 @@
 void	bfECBdecrypt(uchar*, int, BFstate*);
 
 /*
+ * Chacha definitions
+ */
+
+enum
+{
+	ChachaBsize=	64,
+	ChachaKeylen=	256/8,
+	ChachaIVlen=	96/8,
+	XChachaIVlen=	192/8,
+};
+
+typedef struct Chachastate Chachastate;
+struct Chachastate
+{
+	union{
+		u32int	input[16];
+		struct {
+			u32int	constant[4];
+			u32int	key[8];
+			u32int	counter;
+			u32int	iv[3];
+		};
+	};
+	u32int	xkey[8];
+	int	rounds;
+	int	ivwords;
+};
+
+void	setupChachastate(Chachastate*, uchar*, ulong, uchar*, ulong, int);
+void	chacha_setiv(Chachastate *, uchar*);
+void	chacha_setblock(Chachastate*, u64int);
+void	chacha_encrypt(uchar*, ulong, Chachastate*);
+void	chacha_encrypt2(uchar*, uchar*, ulong, Chachastate*);
+
+void	hchacha(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
+
+void	ccpoly_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
+int	ccpoly_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
+
+/*
+ * Salsa definitions
+ */
+enum
+{
+	SalsaBsize=	64,
+	SalsaKeylen=	256/8,
+	SalsaIVlen=	64/8,
+	XSalsaIVlen=	192/8,
+};
+
+typedef struct Salsastate Salsastate;
+struct Salsastate
+{
+	u32int	input[16];
+	u32int	xkey[8];
+	int	rounds;
+	int	ivwords;
+};
+
+void	setupSalsastate(Salsastate*, uchar*, ulong, uchar*, ulong, int);
+void	salsa_setiv(Salsastate*, uchar*);
+void	salsa_setblock(Salsastate*, u64int);
+void	salsa_encrypt(uchar*, ulong, Salsastate*);
+void	salsa_encrypt2(uchar*, uchar*, ulong, Salsastate*);
+
+void	salsa_core(u32int in[16], u32int out[16], int rounds);
+
+void	hsalsa(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
+
+/*
  * DES definitions
  */
 
@@ -149,7 +238,8 @@
 	SHA2_512dlen=	64,	/* SHA-512 digest length */
 	MD4dlen=	16,	/* MD4 digest length */
 	MD5dlen=	16,	/* MD5 digest length */
-	AESdlen=	16,	/* TODO: see rfc */
+	RIPEMD160dlen=	20,	/* RIPEMD-160 digest length */
+	Poly1305dlen=	16,	/* Poly1305 digest length */
 
 	Hmacblksz	= 64,	/* in bytes; from rfc2104 */
 };
@@ -159,7 +249,7 @@
 {
 	uvlong	len;
 	union {
-		u32int	state[8];
+		u32int	state[16];
 		u64int	bstate[8];
 	};
 	uchar	buf[256];
@@ -175,16 +265,15 @@
 typedef struct DigestState SHA2_512state;
 typedef struct DigestState MD5state;
 typedef struct DigestState MD4state;
-typedef struct DigestState AEShstate;
 
 DigestState*	md4(uchar*, ulong, uchar*, DigestState*);
 DigestState*	md5(uchar*, ulong, uchar*, DigestState*);
+DigestState*	ripemd160(uchar *, ulong, uchar *, DigestState *);
 DigestState*	sha1(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_224(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_256(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_384(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_512(uchar*, ulong, uchar*, DigestState*);
-DigestState*	aes(uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_x(uchar *p, ulong len, uchar *key, ulong klen,
 			uchar *digest, DigestState *s,
 			DigestState*(*x)(uchar*, ulong, uchar*, DigestState*),
@@ -195,7 +284,7 @@
 DigestState*	hmac_sha2_256(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_sha2_384(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_sha2_512(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
-DigestState*	hmac_aes(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+DigestState*	poly1305(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 char*		md5pickle(MD5state*);
 MD5state*	md5unpickle(char*);
 char*		sha1pickle(SHA1state*);
@@ -280,15 +369,27 @@
 void		rsaprivfree(RSApriv*);
 RSApub*		rsaprivtopub(RSApriv*);
 RSApub*		X509toRSApub(uchar*, int, char*, int);
+RSApub*		asn1toRSApub(uchar*, int);
 RSApriv*	asn1toRSApriv(uchar*, int);
 void		asn1dump(uchar *der, int len);
 uchar*		decodePEM(char *s, char *type, int *len, char **new_s);
 PEMChain*	decodepemchain(char *s, char *type);
-uchar*		X509gen(RSApriv *priv, char *subj, ulong valid[2], int *certlen);
-uchar*		X509req(RSApriv *priv, char *subj, int *certlen);
-char*		X509verify(uchar *cert, int ncert, RSApub *pk);
+uchar*		X509rsagen(RSApriv *priv, char *subj, ulong valid[2], int *certlen);
+uchar*		X509rsareq(RSApriv *priv, char *subj, int *certlen);
+char*		X509rsaverify(uchar *cert, int ncert, RSApub *pk);
+char*		X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk);
+
 void		X509dump(uchar *cert, int ncert);
 
+mpint*		pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		asn1encodeRSApub(RSApub *pk, uchar *buf, int len);
+int		asn1encodeRSApriv(RSApriv *k, uchar *buf, int len);
+int		asn1encodedigest(DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*),
+			uchar *digest, uchar *buf, int len);
+
+int		X509digestSPKI(uchar *, int, DigestState* (*)(uchar*, ulong, uchar*, DigestState*), uchar *);
+
 /*
  * elgamal
  */
@@ -376,21 +477,26 @@
  */
 typedef struct Thumbprint{
 	struct Thumbprint *next;
-	uchar	sha1[SHA1dlen];
+	uchar	hash[SHA2_256dlen];
+	uchar	len;
 } Thumbprint;
 
 typedef struct TLSconn{
 	char	dir[40];	/* connection directory */
 	uchar	*cert;	/* certificate (local on input, remote on output) */
 	uchar	*sessionID;
+	uchar	*psk;
 	int	certlen;
 	int	sessionIDlen;
+	int	psklen;
 	int	(*trace)(char*fmt, ...);
 	PEMChain*chain;	/* optional extra certificate evidence for servers to present */
 	char	*sessionType;
 	uchar	*sessionKey;
 	int	sessionKeylen;
 	char	*sessionConst;
+	char	*serverName;
+	char	*pskID;
 } TLSconn;
 
 /* tlshand.c */
@@ -398,12 +504,106 @@
 int tlsServer(int fd, TLSconn *c);
 
 /* thumb.c */
-Thumbprint* initThumbprints(char *ok, char *crl);
+Thumbprint* initThumbprints(char *ok, char *crl, char *tag);
 void	freeThumbprints(Thumbprint *ok);
-int	okThumbprint(uchar *sha1, Thumbprint *ok);
+int	okThumbprint(uchar *hash, int len, Thumbprint *ok);
+int	okCertificate(uchar *cert, int len, Thumbprint *ok);
 
 /* readcert.c */
 uchar	*readcert(char *filename, int *pcertlen);
 PEMChain*readcertchain(char *filename);
+
+typedef struct ECpoint{
+	int inf;
+	mpint *x;
+	mpint *y;
+	mpint *z;	/* nil when using affine coordinates */
+} ECpoint;
+
+typedef ECpoint ECpub;
+typedef struct ECpriv{
+	ECpoint;
+	mpint *d;
+} ECpriv;
+
+typedef struct ECdomain{
+	mpint *p;
+	mpint *a;
+	mpint *b;
+	ECpoint G;
+	mpint *n;
+	mpint *h;
+} ECdomain;
+
+void	ecdominit(ECdomain *, void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h));
+void	ecdomfree(ECdomain *);
+
+void	ecassign(ECdomain *, ECpoint *old, ECpoint *new);
+void	ecadd(ECdomain *, ECpoint *a, ECpoint *b, ECpoint *s);
+void	ecmul(ECdomain *, ECpoint *a, mpint *k, ECpoint *s);
+ECpoint*	strtoec(ECdomain *, char *, char **, ECpoint *);
+ECpriv*	ecgen(ECdomain *, ECpriv*);
+int	ecverify(ECdomain *, ECpoint *);
+int	ecpubverify(ECdomain *, ECpub *);
+void	ecdsasign(ECdomain *, ECpriv *, uchar *, int, mpint *, mpint *);
+int	ecdsaverify(ECdomain *, ECpub *, uchar *, int, mpint *, mpint *);
+void	base58enc(uchar *, char *, int);
+int	base58dec(char *, uchar *, int);
+
+ECpub*	ecdecodepub(ECdomain *dom, uchar *, int);
+int	ecencodepub(ECdomain *dom, ECpub *, uchar *, int);
+void	ecpubfree(ECpub *);
+
+ECpub*	X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom);
+char*	X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pub);
+char*	X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub);
+
+/* curves */
+void	secp256r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp256k1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp384r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+
+/*
+ * Diffie-Hellman key exchange
+ */
+
+typedef struct DHstate DHstate;
+struct DHstate
+{
+	mpint	*g;	/* base g */
+	mpint	*p;	/* large prime */
+	mpint	*q;	/* subgroup prime */
+	mpint	*x;	/* random secret */
+	mpint	*y;	/* public key y = g**x % p */
+};
+
+/* generate new public key: y = g**x % p */
+mpint* dh_new(DHstate *dh, mpint *p, mpint *q, mpint *g);
+
+/* calculate shared key: k = y**x % p */
+mpint* dh_finish(DHstate *dh, mpint *y);
+
+/* Curve25519 elliptic curve, public key function */
+void curve25519(uchar mypublic[32], uchar secret[32], uchar basepoint[32]);
+
+/* Curve25519 diffie hellman */
+void curve25519_dh_new(uchar x[32], uchar y[32]);
+int curve25519_dh_finish(uchar x[32], uchar y[32], uchar z[32]);
+
+/* password-based key derivation function 2 (rfc2898) */
+void pbkdf2_x(uchar *p, ulong plen, uchar *s, ulong slen, ulong rounds, uchar *d, ulong dlen,
+	DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*), int xlen);
+
+/* scrypt password-based key derivation function */
+char* scrypt(uchar *p, ulong plen, uchar *s, ulong slen,
+	ulong N, ulong R, ulong P,
+	uchar *d, ulong dlen);
+
+/* hmac-based key derivation function (rfc5869) */
+void hkdf_x(uchar *salt, ulong nsalt, uchar *info, ulong ninfo, uchar *key, ulong nkey, uchar *d, ulong dlen,
+	DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*), int xlen);
+
+/* timing safe memcmp() */
+int tsmemcmp(void*, void*, ulong);
 
 #endif
--- sys/include/ape/mp.h	Mon Feb  4 20:12:44 2013
+++ /sys/include/ape/mp.h	Tue Jun 29 01:45:21 2021
@@ -32,6 +32,10 @@
 enum
 {
 	MPstatic=	0x01,
+	MPnorm=		0x02,	/* normalization status */
+	MPtimesafe=	0x04,	/* request time invariant computation */
+	MPfield=	0x08,	/* this mpint is a field modulus */
+
 	Dbytes=		sizeof(mpdigit),	/* bytes per digit */
 	Dbits=		Dbytes*8		/* bits per digit */
 };
@@ -41,12 +45,14 @@
 mpint*	mpnew(int n);		/* create a new mpint with at least n bits */
 void	mpfree(mpint *b);
 void	mpbits(mpint *b, int n);	/* ensure that b has at least n bits */
-void	mpnorm(mpint *b);		/* dump leading zeros */
+mpint*	mpnorm(mpint *b);		/* dump leading zeros */
 mpint*	mpcopy(mpint *b);
 void	mpassign(mpint *old, mpint *new);
 
 /* random bits */
 mpint*	mprand(int bits, void (*gen)(uchar*, int), mpint *b);
+/* return uniform random [0..n-1] */
+mpint*	mpnrand(mpint *n, void (*gen)(uchar*, int), mpint *b);
 
 /* conversion */
 mpint*	strtomp(char*, char**, int, mpint*);	/* ascii */
@@ -56,6 +62,7 @@
 int	mptole(mpint*, uchar*, uint, uchar**);
 mpint*	betomp(uchar*, uint, mpint*);	/* byte array, little-endian */
 int	mptobe(mpint*, uchar*, uint, uchar**);
+void	mptober(mpint *b, uchar *p, int n);
 uint	mptoui(mpint*);			/* unsigned int */
 mpint*	uitomp(uint, mpint*);
 int	mptoi(mpint*);			/* int */
@@ -78,6 +85,11 @@
 void	mpexp(mpint *b, mpint *e, mpint *m, mpint *res);	/* res = b**e mod m */
 void	mpmod(mpint *b, mpint *m, mpint *remainder);	/* remainder = b mod m */
 
+/* modular arithmetic, time invariant when 0≤b1≤m-1 and 0≤b2≤m-1 */
+void	mpmodadd(mpint *b1, mpint *b2, mpint *m, mpint *sum);	/* sum = b1+b2 % m */
+void	mpmodsub(mpint *b1, mpint *b2, mpint *m, mpint *diff);	/* diff = b1-b2 % m */
+void	mpmodmul(mpint *b1, mpint *b2, mpint *m, mpint *prod);	/* prod = b1*b2 % m */
+
 /* quotient = dividend/divisor, remainder = dividend % divisor */
 void	mpdiv(mpint *dividend, mpint *divisor,  mpint *quotient, mpint *remainder);
 
@@ -148,6 +160,19 @@
 void	crtprefree(CRTpre*);
 void	crtresfree(CRTres*);
 
+/* fast field arithmetic */
+typedef struct Mfield	Mfield;
+
+struct Mfield
+{
+	mpint;
+	int	(*reduce)(Mfield*, mpint*, mpint*);
+};
+
+mpint *mpfield(mpint*);
+
+Mfield *gmfield(mpint*);
+Mfield *cnfield(mpint*);
 
 #pragma	varargck	type	"B"	mpint*
 #endif
--- sys/include/libsec.h	Thu Jul 15 08:07:50 2021
+++ /sys/include/libsec.h	Mon Jul 12 11:08:58 2021
@@ -21,28 +21,47 @@
 struct AESstate
 {
 	ulong	setup;
+	ulong	offset;
 	int	rounds;
 	int	keybytes;
 	uint	ctrsz;
+	void	*ekey;				/* expanded encryption round key */
+	void	*dkey;				/* expanded decryption round key */
 	uchar	key[AESmaxkey];			/* unexpanded key */
-	ulong	ekey[4*(AESmaxrounds + 1)];	/* encryption key */
-	ulong	dkey[4*(AESmaxrounds + 1)];	/* decryption key */
 	uchar	ivec[AESbsize];			/* initialization vector */
-	uchar	mackey[3 * AESbsize];		/* 3 XCBC mac 96 keys */
+	uchar	storage[512];			/* storage for expanded keys */
 };
 
 /* block ciphers */
-void	aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
-void	aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
+extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
+extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
 
-void	setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
+void	setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
+
+void	aesCTRencrypt(uchar *p, int len, AESstate *s);
+void	aesCTRdecrypt(uchar *p, int len, AESstate *s);
 void	aesCBCencrypt(uchar *p, int len, AESstate *s);
 void	aesCBCdecrypt(uchar *p, int len, AESstate *s);
-void	aesCTRdecrypt(uchar *p, int len, AESstate *s);
-void	aesCTRencrypt(uchar *p, int len, AESstate *s);
+void	aesCFBencrypt(uchar *p, int len, AESstate *s);
+void	aesCFBdecrypt(uchar *p, int len, AESstate *s);
+void	aesOFBencrypt(uchar *p, int len, AESstate *s);
+
+void	aes_xts_encrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len);
+void	aes_xts_decrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len);
+
+typedef struct AESGCMstate AESGCMstate;
+struct AESGCMstate
+{
+	AESstate;
+
+	ulong	H[4];
+	ulong	M[16][256][4];
+};
 
-void	setupAESXCBCstate(AESstate *s);
-uchar*	aesXCBCmac(uchar *p, int len, AESstate *s);
+void	setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen);
+void	aesgcm_setiv(AESGCMstate *s, uchar *iv, int ivlen);
+void	aesgcm_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s);
+int	aesgcm_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s);
 
 /*
  * Blowfish Definitions
@@ -77,37 +96,71 @@
  * Chacha definitions
  */
 
-enum{
+enum
+{
 	ChachaBsize=	64,
 	ChachaKeylen=	256/8,
-	ChachaIVlen=	96/8
+	ChachaIVlen=	96/8,
+	XChachaIVlen=	192/8,
 };
 
 typedef struct Chachastate Chachastate;
 struct Chachastate
 {
-	/*
-	 * 0-3:	a constant (sigma or tau)
-	 * 4-11:	the key
-	 * 12:	block counter
-	 * 13-15:	IV
-	 */
 	union{
 		u32int	input[16];
-		struct{
+		struct {
 			u32int	constant[4];
 			u32int	key[8];
 			u32int	counter;
 			u32int	iv[3];
 		};
 	};
+	u32int	xkey[8];
 	int	rounds;
+	int	ivwords;
 };
 
-void	setupChachastate(Chachastate*, uchar*, usize, uchar*, int);
-void	chacha_setblock(Chachastate*, u32int);
-void	chacha_encrypt(uchar*, usize, Chachastate*);
-void	chacha_encrypt2(uchar*, uchar*, usize, Chachastate*);
+void	setupChachastate(Chachastate*, uchar*, ulong, uchar*, ulong, int);
+void	chacha_setiv(Chachastate *, uchar*);
+void	chacha_setblock(Chachastate*, u64int);
+void	chacha_encrypt(uchar*, ulong, Chachastate*);
+void	chacha_encrypt2(uchar*, uchar*, ulong, Chachastate*);
+
+void	hchacha(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
+
+void	ccpoly_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
+int	ccpoly_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
+
+/*
+ * Salsa definitions
+ */
+enum
+{
+	SalsaBsize=	64,
+	SalsaKeylen=	256/8,
+	SalsaIVlen=	64/8,
+	XSalsaIVlen=	192/8,
+};
+
+typedef struct Salsastate Salsastate;
+struct Salsastate
+{
+	u32int	input[16];
+	u32int	xkey[8];
+	int	rounds;
+	int	ivwords;
+};
+
+void	setupSalsastate(Salsastate*, uchar*, ulong, uchar*, ulong, int);
+void	salsa_setiv(Salsastate*, uchar*);
+void	salsa_setblock(Salsastate*, u64int);
+void	salsa_encrypt(uchar*, ulong, Salsastate*);
+void	salsa_encrypt2(uchar*, uchar*, ulong, Salsastate*);
+
+void	salsa_core(u32int in[16], u32int out[16], int rounds);
+
+void	hsalsa(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
 
 /*
  * DES definitions
@@ -180,7 +233,8 @@
 	SHA2_512dlen=	64,	/* SHA-512 digest length */
 	MD4dlen=	16,	/* MD4 digest length */
 	MD5dlen=	16,	/* MD5 digest length */
-	AESdlen=	16,	/* TODO: see rfc */
+	RIPEMD160dlen=	20,	/* RIPEMD-160 digest length */
+	Poly1305dlen=	16,	/* Poly1305 digest length */
 
 	Hmacblksz	= 64,	/* in bytes; from rfc2104 */
 };
@@ -190,7 +244,7 @@
 {
 	uvlong	len;
 	union {
-		u32int	state[8];
+		u32int	state[16];
 		u64int	bstate[8];
 	};
 	uchar	buf[256];
@@ -206,16 +260,15 @@
 typedef struct DigestState SHA2_512state;
 typedef struct DigestState MD5state;
 typedef struct DigestState MD4state;
-typedef struct DigestState AEShstate;
 
 DigestState*	md4(uchar*, ulong, uchar*, DigestState*);
 DigestState*	md5(uchar*, ulong, uchar*, DigestState*);
+DigestState*	ripemd160(uchar *, ulong, uchar *, DigestState *);
 DigestState*	sha1(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_224(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_256(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_384(uchar*, ulong, uchar*, DigestState*);
 DigestState*	sha2_512(uchar*, ulong, uchar*, DigestState*);
-DigestState*	aes(uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_x(uchar *p, ulong len, uchar *key, ulong klen,
 			uchar *digest, DigestState *s,
 			DigestState*(*x)(uchar*, ulong, uchar*, DigestState*),
@@ -226,7 +279,7 @@
 DigestState*	hmac_sha2_256(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_sha2_384(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 DigestState*	hmac_sha2_512(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
-DigestState*	hmac_aes(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+DigestState*	poly1305(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
 char*		md5pickle(MD5state*);
 MD5state*	md5unpickle(char*);
 char*		sha1pickle(SHA1state*);
@@ -311,17 +364,27 @@
 void		rsaprivfree(RSApriv*);
 RSApub*		rsaprivtopub(RSApriv*);
 RSApub*		X509toRSApub(uchar*, int, char*, int);
-uchar*		RSApubtoasn1(RSApub*, int*);
 RSApub*		asn1toRSApub(uchar*, int);
 RSApriv*	asn1toRSApriv(uchar*, int);
 void		asn1dump(uchar *der, int len);
 uchar*		decodePEM(char *s, char *type, int *len, char **new_s);
 PEMChain*	decodepemchain(char *s, char *type);
-uchar*		X509gen(RSApriv *priv, char *subj, ulong valid[2], int *certlen);
-uchar*		X509req(RSApriv *priv, char *subj, int *certlen);
-char*		X509verify(uchar *cert, int ncert, RSApub *pk);
+uchar*		X509rsagen(RSApriv *priv, char *subj, ulong valid[2], int *certlen);
+uchar*		X509rsareq(RSApriv *priv, char *subj, int *certlen);
+char*		X509rsaverify(uchar *cert, int ncert, RSApub *pk);
+char*		X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk);
+
 void		X509dump(uchar *cert, int ncert);
 
+mpint*		pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		asn1encodeRSApub(RSApub *pk, uchar *buf, int len);
+int		asn1encodeRSApriv(RSApriv *k, uchar *buf, int len);
+int		asn1encodedigest(DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*),
+			uchar *digest, uchar *buf, int len);
+
+int		X509digestSPKI(uchar *, int, DigestState* (*)(uchar*, ulong, uchar*, DigestState*), uchar *);
+
 /*
  * elgamal
  */
@@ -409,21 +472,26 @@
  */
 typedef struct Thumbprint{
 	struct Thumbprint *next;
-	uchar	sha1[SHA1dlen];
+	uchar	hash[SHA2_256dlen];
+	uchar	len;
 } Thumbprint;
 
 typedef struct TLSconn{
 	char	dir[40];	/* connection directory */
 	uchar	*cert;	/* certificate (local on input, remote on output) */
 	uchar	*sessionID;
+	uchar	*psk;
 	int	certlen;
 	int	sessionIDlen;
+	int	psklen;
 	int	(*trace)(char*fmt, ...);
 	PEMChain*chain;	/* optional extra certificate evidence for servers to present */
 	char	*sessionType;
 	uchar	*sessionKey;
 	int	sessionKeylen;
 	char	*sessionConst;
+	char	*serverName;
+	char	*pskID;
 } TLSconn;
 
 /* tlshand.c */
@@ -431,14 +499,104 @@
 int tlsServer(int fd, TLSconn *c);
 
 /* thumb.c */
-Thumbprint* initThumbprints(char *ok, char *crl);
+Thumbprint* initThumbprints(char *ok, char *crl, char *tag);
 void	freeThumbprints(Thumbprint *ok);
-int	okThumbprint(uchar *sha1, Thumbprint *ok);
+int	okThumbprint(uchar *hash, int len, Thumbprint *ok);
+int	okCertificate(uchar *cert, int len, Thumbprint *ok);
 
 /* readcert.c */
 uchar	*readcert(char *filename, int *pcertlen);
 PEMChain*readcertchain(char *filename);
 
+typedef struct ECpoint{
+	int inf;
+	mpint *x;
+	mpint *y;
+	mpint *z;	/* nil when using affine coordinates */
+} ECpoint;
+
+typedef ECpoint ECpub;
+typedef struct ECpriv{
+	ECpoint;
+	mpint *d;
+} ECpriv;
+
+typedef struct ECdomain{
+	mpint *p;
+	mpint *a;
+	mpint *b;
+	ECpoint G;
+	mpint *n;
+	mpint *h;
+} ECdomain;
+
+void	ecdominit(ECdomain *, void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h));
+void	ecdomfree(ECdomain *);
+
+void	ecassign(ECdomain *, ECpoint *old, ECpoint *new);
+void	ecadd(ECdomain *, ECpoint *a, ECpoint *b, ECpoint *s);
+void	ecmul(ECdomain *, ECpoint *a, mpint *k, ECpoint *s);
+ECpoint*	strtoec(ECdomain *, char *, char **, ECpoint *);
+ECpriv*	ecgen(ECdomain *, ECpriv*);
+int	ecverify(ECdomain *, ECpoint *);
+int	ecpubverify(ECdomain *, ECpub *);
+void	ecdsasign(ECdomain *, ECpriv *, uchar *, int, mpint *, mpint *);
+int	ecdsaverify(ECdomain *, ECpub *, uchar *, int, mpint *, mpint *);
+void	base58enc(uchar *, char *, int);
+int	base58dec(char *, uchar *, int);
+
+ECpub*	ecdecodepub(ECdomain *dom, uchar *, int);
+int	ecencodepub(ECdomain *dom, ECpub *, uchar *, int);
+void	ecpubfree(ECpub *);
+
+ECpub*	X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom);
+char*	X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pub);
+char*	X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub);
+
+/* curves */
+void	secp256r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp256k1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp384r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+
+/*
+ * Diffie-Hellman key exchange
+ */
+
+typedef struct DHstate DHstate;
+struct DHstate
+{
+	mpint	*g;	/* base g */
+	mpint	*p;	/* large prime */
+	mpint	*q;	/* subgroup prime */
+	mpint	*x;	/* random secret */
+	mpint	*y;	/* public key y = g**x % p */
+};
+
+/* generate new public key: y = g**x % p */
+mpint* dh_new(DHstate *dh, mpint *p, mpint *q, mpint *g);
+
+/* calculate shared key: k = y**x % p */
+mpint* dh_finish(DHstate *dh, mpint *y);
+
+/* Curve25519 elliptic curve, public key function */
+void curve25519(uchar mypublic[32], uchar secret[32], uchar basepoint[32]);
+
+/* Curve25519 diffie hellman */
+void curve25519_dh_new(uchar x[32], uchar y[32]);
+int curve25519_dh_finish(uchar x[32], uchar y[32], uchar z[32]);
+
 /* password-based key derivation function 2 (rfc2898) */
 void pbkdf2_x(uchar *p, ulong plen, uchar *s, ulong slen, ulong rounds, uchar *d, ulong dlen,
 	DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*), int xlen);
+
+/* scrypt password-based key derivation function */
+char* scrypt(uchar *p, ulong plen, uchar *s, ulong slen,
+	ulong N, ulong R, ulong P,
+	uchar *d, ulong dlen);
+
+/* hmac-based key derivation function (rfc5869) */
+void hkdf_x(uchar *salt, ulong nsalt, uchar *info, ulong ninfo, uchar *key, ulong nkey, uchar *d, ulong dlen,
+	DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*), int xlen);
+
+/* timing safe memcmp() */
+int tsmemcmp(void*, void*, ulong);
--- sys/include/mp.h	Tue Sep  8 01:29:15 2009
+++ /sys/include/mp.h	Mon Jun 28 17:35:52 2021
@@ -23,6 +23,10 @@
 enum
 {
 	MPstatic=	0x01,
+	MPnorm=		0x02,	/* normalization status */
+	MPtimesafe=	0x04,	/* request time invariant computation */
+	MPfield=	0x08,	/* this mpint is a field modulus */
+
 	Dbytes=		sizeof(mpdigit),	/* bytes per digit */
 	Dbits=		Dbytes*8		/* bits per digit */
 };
@@ -32,12 +36,14 @@
 mpint*	mpnew(int n);		/* create a new mpint with at least n bits */
 void	mpfree(mpint *b);
 void	mpbits(mpint *b, int n);	/* ensure that b has at least n bits */
-void	mpnorm(mpint *b);		/* dump leading zeros */
+mpint*	mpnorm(mpint *b);		/* dump leading zeros */
 mpint*	mpcopy(mpint *b);
 void	mpassign(mpint *old, mpint *new);
 
 /* random bits */
 mpint*	mprand(int bits, void (*gen)(uchar*, int), mpint *b);
+/* return uniform random [0..n-1] */
+mpint*	mpnrand(mpint *n, void (*gen)(uchar*, int), mpint *b);
 
 /* conversion */
 mpint*	strtomp(char*, char**, int, mpint*);	/* ascii */
@@ -47,6 +53,7 @@
 int	mptole(mpint*, uchar*, uint, uchar**);
 mpint*	betomp(uchar*, uint, mpint*);	/* byte array, little-endian */
 int	mptobe(mpint*, uchar*, uint, uchar**);
+void	mptober(mpint *b, uchar *p, int n);
 uint	mptoui(mpint*);			/* unsigned int */
 mpint*	uitomp(uint, mpint*);
 int	mptoi(mpint*);			/* int */
@@ -69,6 +76,11 @@
 void	mpexp(mpint *b, mpint *e, mpint *m, mpint *res);	/* res = b**e mod m */
 void	mpmod(mpint *b, mpint *m, mpint *remainder);	/* remainder = b mod m */
 
+/* modular arithmetic, time invariant when 0≤b1≤m-1 and 0≤b2≤m-1 */
+void	mpmodadd(mpint *b1, mpint *b2, mpint *m, mpint *sum);	/* sum = b1+b2 % m */
+void	mpmodsub(mpint *b1, mpint *b2, mpint *m, mpint *diff);	/* diff = b1-b2 % m */
+void	mpmodmul(mpint *b1, mpint *b2, mpint *m, mpint *prod);	/* prod = b1*b2 % m */
+
 /* quotient = dividend/divisor, remainder = dividend % divisor */
 void	mpdiv(mpint *dividend, mpint *divisor,  mpint *quotient, mpint *remainder);
 
@@ -139,5 +151,18 @@
 void	crtprefree(CRTpre*);
 void	crtresfree(CRTres*);
 
+/* fast field arithmetic */
+typedef struct Mfield	Mfield;
+
+struct Mfield
+{
+	mpint;
+	int	(*reduce)(Mfield*, mpint*, mpint*);
+};
+
+mpint *mpfield(mpint*);
+
+Mfield *gmfield(mpint*);
+Mfield *cnfield(mpint*);
 
 #pragma	varargck	type	"B"	mpint*
--- sys/src/9/port/alloc.c	Sat Jan 28 15:52:57 2006
+++ /sys/src/9/port/alloc.c	Sat Jun 26 21:21:40 2021
@@ -53,8 +53,27 @@
 	.private=	&pimagpriv,
 };
 
+static Private psecrpriv;
+static Pool psecrmem = {
+	.name=	"Secrets",
+	.maxsize=	16*1024*1024,
+	.minarena=	64*1024,
+	.quantum=	32,
+	.alloc=	xalloc,
+	.merge=	xmerge,
+	.flags=	POOL_ANTAGONISM,
+
+	.lock=	plock,
+	.unlock=	punlock,
+	.print=	poolprint,
+	.panic=	ppanic,
+
+	.private=	&psecrpriv,
+};
+
 Pool*	mainmem = &pmainmem;
 Pool*	imagmem = &pimagmem;
+Pool*	secrmem = &psecrmem;
 
 /*
  * because we can't print while we're holding the locks, 
@@ -263,6 +282,34 @@
 msize(void *v)
 {
 	return poolmsize(mainmem, (ulong*)v-Npadlong)-Npadlong*sizeof(ulong);
+}
+
+/* secret memory, used to back cryptographic keys and cipher states */
+void*
+secalloc(ulong size)
+{
+	void *v;
+
+	while((v = poolalloc(secrmem, size+Npadlong*sizeof(ulong))) == nil){
+		if(!waserror()){
+			resrcwait(nil);
+			poperror();
+		}
+	}
+	if(Npadlong){
+		v = (ulong*)v+Npadlong;
+		setmalloctag(v, getcallerpc(&size));
+		setrealloctag(v, 0);
+	}
+	memset(v, 0, size);
+	return v;
+}
+
+void
+secfree(void *v)
+{
+	if(v != nil)
+		poolfree(secrmem, (ulong*)v-Npadlong);
 }
 
 void*
--- sys/src/9/port/devtls.c	Thu Jul  1 17:28:54 2021
+++ /sys/src/9/port/devtls.c	Sat Jun 26 21:21:40 2021
@@ -1,5 +1,5 @@
 /*
- *  devtls - record layer for transport layer security 1.0, 1.1, 1.2 and secure sockets layer 3.0
+ *  devtls - record layer for transport layer security 1.2 and secure sockets layer 3.0
  */
 #include	"u.h"
 #include	"../port/lib.h"
@@ -11,23 +11,23 @@
 #include	<libsec.h>
 
 typedef struct OneWay	OneWay;
-typedef struct Secret		Secret;
+typedef struct Secret	Secret;
 typedef struct TlsRec	TlsRec;
 typedef struct TlsErrs	TlsErrs;
 
 enum {
 	Statlen=	1024,		/* max. length of status or stats message */
 	/* buffer limits */
-	MaxRecLen		= 1<<14,	/* max payload length of a record layer message */
+	MaxRecLen	= 1<<14,	/* max payload length of a record layer message */
 	MaxCipherRecLen	= MaxRecLen + 2048,
-	RecHdrLen		= 5,
-	MaxMacLen		= SHA2_256dlen,
+	RecHdrLen	= 5,
+	MaxMacLen	= SHA2_256dlen,
 
 	/* protocol versions we can accept */
-	SSL3Version		= 0x0300,
-	TLS10Version		= 0x0301,
-	TLS11Version		= 0x0302,
-	TLS12Version		= 0x0303,
+	SSL3Version	= 0x0300,
+	TLS10Version	= 0x0301,
+	TLS11Version	= 0x0302,
+	TLS12Version	= 0x0303,
 	MinProtoVersion	= 0x0300,	/* limits on version we accept */
 	MaxProtoVersion	= 0x03ff,
 
@@ -83,21 +83,27 @@
 {
 	char		*encalg;	/* name of encryption alg */
 	char		*hashalg;	/* name of hash alg */
+
+	int		(*aead_enc)(Secret*, uchar*, int, uchar*, uchar*, int);
+	int		(*aead_dec)(Secret*, uchar*, int, uchar*, uchar*, int);
+
 	int		(*enc)(Secret*, uchar*, int);
 	int		(*dec)(Secret*, uchar*, int);
 	int		(*unpad)(uchar*, int, int);
-	DigestState	*(*mac)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+	DigestState*	(*mac)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+
 	int		block;		/* encryption block len, 0 if none */
-	int		maclen;
+	int		maclen;		/* # bytes of record mac / authentication tag */
+	int		recivlen;	/* # bytes of record iv for AEAD ciphers */
 	void		*enckey;
-	uchar	mackey[MaxMacLen];
+	uchar		mackey[MaxMacLen];
 };
 
 struct OneWay
 {
 	QLock		io;		/* locks io access */
 	QLock		seclock;	/* locks secret paramaters */
-	ulong		seq;
+	u64int		seq;
 	Secret		*sec;		/* cipher in use */
 	Secret		*new;		/* cipher waiting for enable */
 };
@@ -119,8 +125,11 @@
 	int		state;
 	int		debug;
 
-	/* record layer mac functions for different protocol versions */
-	void		(*packMac)(Secret*, uchar*, uchar*, uchar*, uchar*, int, uchar*);
+	/*
+	 * function to genrate authenticated data blob for different
+	 * protocol versions
+	 */
+	int		(*packAAD)(u64int, uchar*, uchar*);
 
 	/* input side -- protected by in.io */
 	OneWay		in;
@@ -180,7 +189,7 @@
 enum
 {
 	/* max. open tls connections */
-	MaxTlsDevs	= 16*1024
+	MaxTlsDevs	= 1024
 };
 
 static	Lock	tdlock;
@@ -221,13 +230,13 @@
 static DigestState*sslmac_md5(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s);
 static DigestState*sslmac_sha1(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s);
 static DigestState*nomac(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s);
-static void	sslPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac);
-static void	tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac);
-static void	put64(uchar *p, vlong x);
+static int	sslPackAAD(u64int, uchar*, uchar*);
+static int	tlsPackAAD(u64int, uchar*, uchar*);
+static void	packMac(Secret*, uchar*, int, uchar*, int, uchar*);
+static void	put64(uchar *p, u64int);
 static void	put32(uchar *p, u32int);
 static void	put24(uchar *p, int);
 static void	put16(uchar *p, int);
-static u32int	get32(uchar *p);
 static int	get16(uchar *p);
 static void	tlsSetState(TlsRec *tr, int new, int old);
 static void	rcvAlert(TlsRec *tr, int err);
@@ -238,6 +247,10 @@
 static int	des3dec(Secret *sec, uchar *buf, int n);
 static int	aesenc(Secret *sec, uchar *buf, int n);
 static int	aesdec(Secret *sec, uchar *buf, int n);
+static int	ccpoly_aead_enc(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len);
+static int	ccpoly_aead_dec(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len);
+static int	aesgcm_aead_enc(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len);
+static int	aesgcm_aead_dec(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len);
 static int	noenc(Secret *sec, uchar *buf, int n);
 static int	sslunpad(uchar *buf, int n, int block);
 static int	tlsunpad(uchar *buf, int n, int block);
@@ -326,7 +339,7 @@
 			nm = eve;
 		if((name = trnames[s]) == nil){
 			name = trnames[s] = smalloc(16);
-			snprint(name, 16, "%d", s);
+			sprint(name, "%d", s);
 		}
 		devdir(c, q, name, 0, nm, 0555, dp);
 		unlock(&tdlock);
@@ -410,21 +423,7 @@
 tlsopen(Chan *c, int omode)
 {
 	TlsRec *tr, **pp;
-	int t, perm;
-
-	perm = 0;
-	omode &= 3;
-	switch(omode) {
-	case OREAD:
-		perm = 4;
-		break;
-	case OWRITE:
-		perm = 2;
-		break;
-	case ORDWR:
-		perm = 6;
-		break;
-	}
+	int t;
 
 	t = TYPE(c->qid);
 	switch(t) {
@@ -457,10 +456,7 @@
 		tr = *pp;
 		if(tr == nil)
 			error("must open connection using clone");
-		if((perm & (tr->perm>>6)) != perm
-		&& (strcmp(up->user, tr->user) != 0
-		    || (perm & tr->perm) != perm))
-			error(Eperm);
+		devpermcheck(tr->user, tr->perm, omode);
 		if(t == Qhand){
 			if(waserror()){
 				unlock(&tr->hqlock);
@@ -489,7 +485,7 @@
 	c->mode = openmode(omode);
 	c->flag |= COPEN;
 	c->offset = 0;
-	c->iounit = qiomaxatomic;
+	c->iounit = MaxRecLen;
 	return c;
 }
 
@@ -520,7 +516,7 @@
 		error(Eshortstat);
 	if(!emptystr(d->uid))
 		kstrdup(&tr->user, d->uid);
-	if(d->mode != ~0UL)
+	if(d->mode != -1)
 		tr->perm = d->mode;
 
 	free(d);
@@ -734,9 +730,10 @@
 {
 	OneWay *volatile in;
 	Block *volatile b;
-	uchar *p, seq[8], header[RecHdrLen], hmac[MaxMacLen];
+	uchar *p, aad[8+RecHdrLen], header[RecHdrLen], hmac[MaxMacLen];
 	int volatile nconsumed;
-	int len, type, ver, unpad_len;
+	int len, type, ver, unpad_len, aadlen, ivlen;
+	Secret *sec;
 
 	nconsumed = 0;
 	if(waserror()){
@@ -798,35 +795,46 @@
 	}
 	qlock(&in->seclock);
 	p = b->rp;
-	if(in->sec != nil) {
+	sec = in->sec;
+	if(sec != nil) {
 		/* to avoid Canvel-Hiltgen-Vaudenay-Vuagnoux attack, all errors here
 		        should look alike, including timing of the response. */
-		unpad_len = (*in->sec->dec)(in->sec, p, len);
+		if(sec->aead_dec != nil)
+			unpad_len = len;
+		else {
+			unpad_len = (*sec->dec)(sec, p, len);
+if(tr->debug) pprint("decrypted %d\n", unpad_len);
+if(tr->debug) pdump(unpad_len, p, "decrypted:");
+		}
 
-		/* excplicit iv */
+		ivlen = sec->recivlen;
 		if(tr->version >= TLS11Version){
-			len -= in->sec->block;
-			if(len < 0)
-				rcvError(tr, EDecodeError, "runt record message");
-
-			unpad_len -= in->sec->block;
-			p += in->sec->block;
+			if(ivlen == 0)
+				ivlen = sec->block;
 		}
+		len -= ivlen;
+		if(len < 0)
+			rcvError(tr, EDecodeError, "runt record message");
+		unpad_len -= ivlen;
+		p += ivlen;
 
-		if(unpad_len >= in->sec->maclen)
-			len = unpad_len - in->sec->maclen;
-if(tr->debug) pprint("decrypted %d\n", unpad_len);
-if(tr->debug) pdump(unpad_len, p, "decrypted:");
+		if(unpad_len >= sec->maclen)
+			len = unpad_len - sec->maclen;
 
 		/* update length */
 		put16(header+3, len);
-		put64(seq, in->seq);
-		in->seq++;
-		(*tr->packMac)(in->sec, in->sec->mackey, seq, header, p, len, hmac);
-		if(unpad_len < in->sec->maclen)
-			rcvError(tr, EBadRecordMac, "short record mac");
-		if(memcmp(hmac, p+len, in->sec->maclen) != 0)
-			rcvError(tr, EBadRecordMac, "record mac mismatch");
+		aadlen = (*tr->packAAD)(in->seq++, header, aad);
+		if(sec->aead_dec != nil) {
+			len = (*sec->aead_dec)(sec, aad, aadlen, p - ivlen, p, unpad_len);
+			if(len < 0)
+				rcvError(tr, EBadRecordMac, "record mac mismatch");
+		} else {
+			packMac(sec, aad, aadlen, p, len, hmac);
+			if(unpad_len < sec->maclen)
+				rcvError(tr, EBadRecordMac, "short record mac");
+			if(tsmemcmp(hmac, p + len, sec->maclen) != 0)
+				rcvError(tr, EBadRecordMac, "record mac mismatch");
+		}
 		b->rp = p;
 		b->wp = p+len;
 	}
@@ -862,7 +870,7 @@
 			rcvError(tr, EIllegalParameter, "invalid alert fatal code");
 
 		/*
-		 * propate non-fatal alerts to handshaker
+		 * propagate non-fatal alerts to handshaker
 		 */
 		switch(p[1]){
 		case ECloseNotify:
@@ -1171,7 +1179,9 @@
 		if(tr->out.sec != nil)
 			s = seprint(s, e, "EncOut: %s\nHashOut: %s\n", tr->out.sec->encalg, tr->out.sec->hashalg);
 		if(tr->out.new != nil)
-			seprint(s, e, "NewEncOut: %s\nNewHashOut: %s\n", tr->out.new->encalg, tr->out.new->hashalg);
+			s = seprint(s, e, "NewEncOut: %s\nNewHashOut: %s\n", tr->out.new->encalg, tr->out.new->hashalg);
+		if(tr->c != nil)
+			seprint(s, e, "Chan: %s\n", chanpath(tr->c));
 		qunlock(&tr->in.seclock);
 		qunlock(&tr->out.seclock);
 		n = readstr(offset, a, n, buf);
@@ -1223,13 +1233,6 @@
 	return n;
 }
 
-static void
-randfill(uchar *buf, int len)
-{
-	while(len-- > 0)
-		*buf++ = nrand(256);
-}
-
 /*
  *  write a block in tls records
  */
@@ -1238,9 +1241,10 @@
 {
 	Block *volatile bb;
 	Block *nb;
-	uchar *p, seq[8];
+	uchar *p, aad[8+RecHdrLen];
 	OneWay *volatile out;
-	int n, ivlen, maclen, pad, ok;
+	int n, ivlen, maclen, aadlen, pad, ok;
+	Secret *sec;
 
 	out = &tr->out;
 	bb = b;
@@ -1274,11 +1278,15 @@
 		maclen = 0;
 		pad = 0;
 		ivlen = 0;
-		if(out->sec != nil){
-			maclen = out->sec->maclen;
-			pad = maclen + out->sec->block;
-			if(tr->version >= TLS11Version)
-				ivlen = out->sec->block;
+		sec = out->sec;
+		if(sec != nil){
+			maclen = sec->maclen;
+			pad = maclen + sec->block;
+			ivlen = sec->recivlen;
+			if(tr->version >= TLS11Version){
+				if(ivlen == 0)
+					ivlen = sec->block;
+			}
 		}
 		n = BLEN(bb);
 		if(n > MaxRecLen){
@@ -1303,20 +1311,16 @@
 		put16(p+1, tr->version);
 		put16(p+3, n);
 
-		if(out->sec != nil){
-			put64(seq, out->seq);
-			out->seq++;
-			(*tr->packMac)(out->sec, out->sec->mackey, seq, p, p + RecHdrLen + ivlen, n, p + RecHdrLen + ivlen + n);
-			n += maclen;
-
-			/* explicit iv */
-			if(ivlen > 0){
-				randfill(p + RecHdrLen, ivlen);
-				n += ivlen;
+		if(sec != nil){
+			aadlen = (*tr->packAAD)(out->seq++, p, aad);
+			if(sec->aead_enc != nil)
+				n = (*sec->aead_enc)(sec, aad, aadlen, p + RecHdrLen, p + RecHdrLen + ivlen, n) + ivlen;
+			else {
+				if(ivlen > 0)
+					prng(p + RecHdrLen, ivlen);
+				packMac(sec, aad, aadlen, p + RecHdrLen + ivlen, n, p + RecHdrLen + ivlen + n);
+				n = (*sec->enc)(sec, p + RecHdrLen, ivlen + n + maclen);
 			}
-
-			/* encrypt */
-			n = (*out->sec->enc)(out->sec, p + RecHdrLen, n);
 			nb->wp = p + RecHdrLen + n;
 
 			/* update length */
@@ -1402,7 +1406,6 @@
 static void
 initclearmac(Hashalg *, int, Secret *s, uchar *)
 {
-	s->maclen = 0;
 	s->mac = nomac;
 }
 
@@ -1429,10 +1432,10 @@
 
 static Hashalg hashtab[] =
 {
-	{ "clear", 0, initclearmac, },
-	{ "md5", MD5dlen, initmd5key, },
-	{ "sha1", SHA1dlen, initsha1key, },
-	{ "sha256", SHA2_256dlen, initsha2_256key, },
+	{ "clear",	0,		initclearmac, },
+	{ "md5",	MD5dlen,	initmd5key, },
+	{ "sha1",	SHA1dlen,	initsha1key, },
+	{ "sha256",	SHA2_256dlen,	initsha2_256key, },
 	{ 0 }
 };
 
@@ -1460,17 +1463,16 @@
 static void
 initRC4key(Encalg *ea, Secret *s, uchar *p, uchar *)
 {
-	s->enckey = smalloc(sizeof(RC4state));
+	s->enckey = secalloc(sizeof(RC4state));
 	s->enc = rc4enc;
 	s->dec = rc4enc;
-	s->block = 0;
 	setupRC4state(s->enckey, p, ea->keylen);
 }
 
 static void
 initDES3key(Encalg *, Secret *s, uchar *p, uchar *iv)
 {
-	s->enckey = smalloc(sizeof(DES3state));
+	s->enckey = secalloc(sizeof(DES3state));
 	s->enc = des3enc;
 	s->dec = des3dec;
 	s->block = 8;
@@ -1480,7 +1482,7 @@
 static void
 initAESkey(Encalg *ea, Secret *s, uchar *p, uchar *iv)
 {
-	s->enckey = smalloc(sizeof(AESstate));
+	s->enckey = secalloc(sizeof(AESstate));
 	s->enc = aesenc;
 	s->dec = aesdec;
 	s->block = 16;
@@ -1488,11 +1490,40 @@
 }
 
 static void
+initccpolykey(Encalg *ea, Secret *s, uchar *p, uchar *iv)
+{
+	s->enckey = secalloc(sizeof(Chachastate));
+	s->aead_enc = ccpoly_aead_enc;
+	s->aead_dec = ccpoly_aead_dec;
+	s->maclen = Poly1305dlen;
+	if(ea->ivlen == 0) {
+		/* older draft version, iv is 64-bit sequence number */
+		setupChachastate(s->enckey, p, ea->keylen, nil, 64/8, 20);
+	} else {
+		/* IETF standard, 96-bit iv xored with sequence number */
+		memmove(s->mackey, iv, ea->ivlen);
+		setupChachastate(s->enckey, p, ea->keylen, iv, ea->ivlen, 20);
+	}
+}
+
+static void
+initaesgcmkey(Encalg *ea, Secret *s, uchar *p, uchar *iv)
+{
+	s->enckey = secalloc(sizeof(AESGCMstate));
+	s->aead_enc = aesgcm_aead_enc;
+	s->aead_dec = aesgcm_aead_dec;
+	s->maclen = 16;
+	s->recivlen = 8;
+	memmove(s->mackey, iv, ea->ivlen);
+	prng(s->mackey + ea->ivlen, s->recivlen);
+	setupAESGCMstate(s->enckey, p, ea->keylen, nil, 0);
+}
+
+static void
 initclearenc(Encalg *, Secret *s, uchar *, uchar *)
 {
 	s->enc = noenc;
 	s->dec = noenc;
-	s->block = 0;
 }
 
 static Encalg encrypttab[] =
@@ -1502,6 +1533,10 @@
 	{ "3des_ede_cbc", 3 * 8, 8, initDES3key },
 	{ "aes_128_cbc", 128/8, 16, initAESkey },
 	{ "aes_256_cbc", 256/8, 16, initAESkey },
+	{ "ccpoly64_aead", 256/8, 0, initccpolykey },
+	{ "ccpoly96_aead", 256/8, 96/8, initccpolykey },
+	{ "aes_128_gcm_aead", 128/8, 4, initaesgcmkey },
+	{ "aes_256_gcm_aead", 256/8, 4, initaesgcmkey },
 	{ 0 }
 };
 
@@ -1543,8 +1578,8 @@
 		e = p + n;
 		do{
 			m = e - p;
-			if(m > MaxRecLen)
-				m = MaxRecLen;
+			if(m > c->iounit)
+				m = c->iounit;
 
 			b = allocb(m);
 			if(waserror()){
@@ -1606,9 +1641,9 @@
 		if(m < MinProtoVersion || m > MaxProtoVersion)
 			error("unsupported version");
 		if(m == SSL3Version)
-			tr->packMac = sslPackMac;
+			tr->packAAD = sslPackAAD;
 		else
-			tr->packMac = tlsPackMac;
+			tr->packAAD = tlsPackAAD;
 		tr->verset = 1;
 		tr->version = m;
 	}else if(strcmp(cb->f[0], "secret") == 0){
@@ -1630,32 +1665,34 @@
 		ea = parseencalg(cb->f[2]);
 
 		p = cb->f[4];
-		m = (strlen(p)*3)/2;
-		x = smalloc(m);
-		tos = nil;
-		toc = nil;
+		m = (strlen(p)*3)/2 + 1;
+		x = secalloc(m);
+		tos = secalloc(sizeof(Secret));
+		toc = secalloc(sizeof(Secret));
 		if(waserror()){
+			secfree(x);
 			freeSec(tos);
 			freeSec(toc);
-			free(x);
 			nexterror();
 		}
+
 		m = dec64(x, m, p, strlen(p));
+		memset(p, 0, strlen(p));
 		if(m < 2 * ha->maclen + 2 * ea->keylen + 2 * ea->ivlen)
 			error("not enough secret data provided");
 
-		tos = smalloc(sizeof(Secret));
-		toc = smalloc(sizeof(Secret));
 		if(!ha->initkey || !ea->initkey)
 			error("misimplemented secret algorithm");
+
 		(*ha->initkey)(ha, tr->version, tos, &x[0]);
 		(*ha->initkey)(ha, tr->version, toc, &x[ha->maclen]);
 		(*ea->initkey)(ea, tos, &x[2 * ha->maclen], &x[2 * ha->maclen + 2 * ea->keylen]);
 		(*ea->initkey)(ea, toc, &x[2 * ha->maclen + ea->keylen], &x[2 * ha->maclen + 2 * ea->keylen + ea->ivlen]);
 
-		if(!tos->mac || !tos->enc || !tos->dec
-		|| !toc->mac || !toc->enc || !toc->dec)
-			error("missing algorithm implementations");
+		if(!tos->aead_enc || !tos->aead_dec || !toc->aead_enc || !toc->aead_dec)
+			if(!tos->mac || !tos->enc || !tos->dec || !toc->mac || !toc->enc || !toc->dec)
+				error("missing algorithm implementations");
+
 		if(strtol(cb->f[3], nil, 0) == 0){
 			tr->in.new = tos;
 			tr->out.new = toc;
@@ -1675,7 +1712,7 @@
 		tos->encalg = ea->name;
 		tos->hashalg = ha->name;
 
-		free(x);
+		secfree(x);
 		poperror();
 	}else if(strcmp(cb->f[0], "changecipher") == 0){
 		if(cb->nf != 1)
@@ -1756,12 +1793,8 @@
 	struct Hashalg *h;
 	int n;
 	char *cp;
-	static int already;
 
-	if(!already){
-		fmtinstall('H', encodefmt);
-		already = 1;
-	}
+	fmtinstall('H', encodefmt);
 
 	tlsdevs = smalloc(sizeof(TlsRec*) * maxtlsdevs);
 	trnames = smalloc((sizeof *trnames) * maxtlsdevs);
@@ -1828,7 +1861,7 @@
 	fd = strtoul(p, 0, 0);
 	if(fd < 0)
 		error(Ebadarg);
-	c = fdtochan(fd, -1, 0, 1);	/* error check and inc ref */
+	c = fdtochan(fd, ORDWR, 1, 1);	/* error check and inc ref */
 	return c;
 }
 
@@ -1872,7 +1905,7 @@
 {
 	int s;
 
-if(tr->debug)pprint("tleError %s\n", msg);
+if(tr->debug)pprint("tlsError %s\n", msg);
 	lock(&tr->statelk);
 	s = tr->state;
 	tr->state = SError;
@@ -2004,10 +2037,10 @@
 static void
 freeSec(Secret *s)
 {
-	if(s != nil){
-		free(s->enckey);
-		free(s);
-	}
+	if(s == nil)
+		return;
+	secfree(s->enckey);
+	secfree(s);
 }
 
 static int
@@ -2062,7 +2095,7 @@
 		buf[n++] = pad;
 	return nn;
 }
-
+		
 static int
 des3enc(Secret *sec, uchar *buf, int n)
 {
@@ -2093,6 +2126,83 @@
 	return (*sec->unpad)(buf, n, 16);
 }
 
+static void
+ccpoly_aead_setiv(Secret *sec, uchar seq[8])
+{
+	uchar iv[ChachaIVlen];
+	Chachastate *cs;
+	int i;
+
+	cs = (Chachastate*)sec->enckey;
+	if(cs->ivwords == 2){
+		chacha_setiv(cs, seq);
+		return;
+	}
+
+	memmove(iv, sec->mackey, ChachaIVlen);
+	for(i=0; i<8; i++)
+		iv[i+(ChachaIVlen-8)] ^= seq[i];
+
+	chacha_setiv(cs, iv);
+
+	memset(iv, 0, sizeof(iv));
+}
+
+static int
+ccpoly_aead_enc(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len)
+{
+	USED(reciv);
+	ccpoly_aead_setiv(sec, aad);
+	ccpoly_encrypt(data, len, aad, aadlen, data+len, sec->enckey);
+	return len + sec->maclen;
+}
+
+static int
+ccpoly_aead_dec(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len)
+{
+	USED(reciv);
+	len -= sec->maclen;
+	if(len < 0)
+		return -1;
+	ccpoly_aead_setiv(sec, aad);
+	if(ccpoly_decrypt(data, len, aad, aadlen, data+len, sec->enckey) != 0)
+		return -1;
+	return len;
+}
+
+static int
+aesgcm_aead_enc(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len)
+{
+	uchar iv[12];
+	int i;
+
+	memmove(iv, sec->mackey, 4+8);
+	for(i=0; i<8; i++) iv[4+i] ^= aad[i];
+	memmove(reciv, iv+4, 8);
+	aesgcm_setiv(sec->enckey, iv, 12);
+	memset(iv, 0, sizeof(iv));
+	aesgcm_encrypt(data, len, aad, aadlen, data+len, sec->enckey);
+	return len + sec->maclen;
+}
+
+static int
+aesgcm_aead_dec(Secret *sec, uchar *aad, int aadlen, uchar *reciv, uchar *data, int len)
+{
+	uchar iv[12];
+
+	len -= sec->maclen;
+	if(len < 0)
+		return -1;
+	memmove(iv, sec->mackey, 4);
+	memmove(iv+4, reciv, 8);
+	aesgcm_setiv(sec->enckey, iv, 12);
+	memset(iv, 0, sizeof(iv));
+	if(aesgcm_decrypt(data, len, aad, aadlen, data+len, sec->enckey) != 0)
+		return -1;
+	return len;
+}
+
+
 static DigestState*
 nomac(uchar *, ulong, uchar *, ulong, uchar *, DigestState *)
 {
@@ -2152,32 +2262,35 @@
 	return sslmac_x(p, len, key, klen, digest, s, md5, MD5dlen, 48);
 }
 
-static void
-sslPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac)
+static int
+sslPackAAD(u64int seq, uchar *hdr, uchar *aad)
 {
-	DigestState *s;
-	uchar buf[11];
-
-	memmove(buf, seq, 8);
-	buf[8] = header[0];
-	buf[9] = header[3];
-	buf[10] = header[4];
+	put64(aad, seq);
+	aad[8] = hdr[0];
+	aad[9] = hdr[3];
+	aad[10] = hdr[4];
+	return 11;
+}
 
-	s = (*sec->mac)(buf, 11, mackey, sec->maclen, 0, 0);
-	(*sec->mac)(body, len, mackey, sec->maclen, mac, s);
+static int
+tlsPackAAD(u64int seq, uchar *hdr, uchar *aad)
+{
+	put64(aad, seq);
+	aad[8] = hdr[0];
+	aad[9] = hdr[1];
+	aad[10] = hdr[2];
+	aad[11] = hdr[3];
+	aad[12] = hdr[4];
+	return 13;
 }
 
 static void
-tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac)
+packMac(Secret *sec, uchar *aad, int aadlen, uchar *body, int bodylen, uchar *mac)
 {
 	DigestState *s;
-	uchar buf[13];
-
-	memmove(buf, seq, 8);
-	memmove(&buf[8], header, 5);
 
-	s = (*sec->mac)(buf, 13, mackey, sec->maclen, 0, 0);
-	(*sec->mac)(body, len, mackey, sec->maclen, mac, s);
+	s = (*sec->mac)(aad, aadlen, sec->mackey, sec->maclen, nil, nil);
+	(*sec->mac)(body, bodylen, sec->mackey, sec->maclen, mac, s);
 }
 
 static void
@@ -2190,10 +2303,10 @@
 }
 
 static void
-put64(uchar *p, vlong x)
+put64(uchar *p, u64int x)
 {
-	put32(p, (u32int)(x >> 32));
-	put32(p+4, (u32int)x);
+	put32(p, x >> 32);
+	put32(p+4, x);
 }
 
 static void
@@ -2209,12 +2322,6 @@
 {
 	p[0] = x>>8;
 	p[1] = x;
-}
-
-static u32int
-get32(uchar *p)
-{
-	return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
 }
 
 static int
--- sys/src/ape/lib/sec/port/mkfile	Thu Sep 19 23:24:49 2013
+++ /sys/src/ape/lib/sec/port/mkfile	Sat Jun 26 21:21:40 2021
@@ -5,20 +5,38 @@
 
 LIBSECCFILES =\
 	des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
-	aes.c blowfish.c \
+	aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
+	blowfish.c \
 	hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
 	sha2_64.c sha2_128.c sha2block64.c sha2block128.c\
-	sha1pickle.c md5pickle.c\
+	poly1305.c\
 	rc4.c\
+	chacha.c chachablock.c\
+	salsa.c\
 	genrandom.c prng.c fastrand.c nfastrand.c\
 	probably_prime.c smallprimetest.c genprime.c dsaprimes.c\
 	gensafeprime.c genstrongprime.c\
 	rsagen.c rsafill.c rsaencrypt.c rsadecrypt.c rsaalloc.c \
-	rsaprivtopub.c decodepem.c \
+	rsaprivtopub.c x509.c decodepem.c \
 	eggen.c egencrypt.c egdecrypt.c egalloc.c egprivtopub.c \
 	egsign.c egverify.c \
 	dsagen.c dsaalloc.c dsaprivtopub.c dsasign.c dsaverify.c \
 	tlshand.c thumb.c readcert.c \
+	aes_xts.c  \
+	ecc.c\
+	jacobian.c\
+	ripemd.c\
+	dh.c\
+	curve25519.c\
+	curve25519_dh.c\
+	pbkdf2.c\
+	scrypt.c\
+	hkdf.c\
+	ccpoly.c\
+	tsmemcmp.c\
+	secp256r1.c\
+	secp384r1.c\
+	secp256k1.c\
 
 CFILES=\
 	$LIBSECCFILES\
--- sys/src/ape/lib/sec/port/x509-ape.c	Thu Sep 19 23:15:01 2013
+++ /sys/src/ape/lib/sec/port/x509-ape.c	Sat Jun 26 21:25:59 2021
@@ -3,11 +3,6 @@
 #include <mp.h>
 #include <libsec.h>
 
-typedef DigestState*(*DigestFun)(uchar*,ulong,uchar*,DigestState*);
-
-/* ANSI offsetof, backwards. */
-#define	OFFSETOF(a, b)	offsetof(b, a)
-
 /*=============================================================*/
 /*  general ASN1 declarations and parsing
  *
@@ -58,18 +53,18 @@
 
 struct Bytes {
 	int	len;
-	uchar	data[1];
+	uchar	data[];
 };
 
 struct Ints {
 	int	len;
-	int	data[1];
+	int	data[];
 };
 
 struct Bits {
 	int	len;		/* number of bytes */
 	int	unusedbits;	/* unused bits in last byte */
-	uchar	data[1];	/* most-significant bit first */
+	uchar	data[];		/* most-significant bit first */
 };
 
 struct Tag {
@@ -135,14 +130,11 @@
 static int	is_string(Elem* pe, char** pstring);
 static int	is_time(Elem* pe, char** ptime);
 static int	decode(uchar* a, int alen, Elem* pelem);
-static int	decode_seq(uchar* a, int alen, Elist** pelist);
-static int	decode_value(uchar* a, int alen, int kind, int isconstr, Value* pval);
 static int	encode(Elem e, Bytes** pbytes);
 static int	oid_lookup(Ints* o, Ints** tab);
 static void	freevalfields(Value* v);
 static mpint	*asn1mpint(Elem *e);
-
-
+static void	edump(Elem);
 
 #define TAG_MASK 0x1F
 #define CONSTR_MASK 0x20
@@ -169,9 +161,8 @@
 	if(n==0)
 		n=1;
 	p = malloc(n);
-	if(p == nil){
-		exits("out of memory");
-	}
+	if(p == nil)
+		sysfatal("out of memory");
 	memset(p, 0, n);
 	setmalloctag(p, getcallerpc(&n));
 	return p;
@@ -180,14 +171,13 @@
 static char*
 estrdup(char *s)
 {
-	char *d, *d0;
+	char *d;
+	int n;
 
-	if(!s)
-		return 0;
-	d = d0 = emalloc(strlen(s)+1);
-	while(*d++ = *s++)
-		;
-	return d0;
+	n = strlen(s)+1;
+	d = emalloc(n);
+	memmove(d, s, n);
+	return d;
 }
 
 
@@ -201,37 +191,12 @@
 decode(uchar* a, int alen, Elem* pelem)
 {
 	uchar* p = a;
+	int err;
 
-	return  ber_decode(&p, &a[alen], pelem);
-}
-
-/*
- * Like decode, but continue decoding after first element
- * of array ends.
- */
-static int
-decode_seq(uchar* a, int alen, Elist** pelist)
-{
-	uchar* p = a;
-
-	return seq_decode(&p, &a[alen], -1, 1, pelist);
-}
-
-/*
- * Decode the whole array as a BER encoding of an ASN1 value,
- * (i.e., the part after the tag and length).
- * Assume the value is encoded as universal tag "kind".
- * The constr arg is 1 if the value is constructed, 0 if primitive.
- * If there's an error, the return string will contain the error.
- * Depending on the error, the returned value may or may not
- * be nil.
- */
-static int
-decode_value(uchar* a, int alen, int kind, int isconstr, Value* pval)
-{
-	uchar* p = a;
-
-	return value_decode(&p, &a[alen], alen, kind, isconstr, pval);
+	err = ber_decode(&p, &a[alen], pelem);
+	if(err == ASN_OK && p != &a[alen])
+		err = ASN_EVALLEN;
+	return err;
 }
 
 /*
@@ -257,15 +222,14 @@
 	Tag tag;
 	Value val;
 
+	memset(pelem, 0, sizeof(*pelem));
 	err = tag_decode(pp, pend, &tag, &isconstr);
 	if(err == ASN_OK) {
 		err = length_decode(pp, pend, &length);
 		if(err == ASN_OK) {
-			if(tag.class == Universal) {
+			if(tag.class == Universal)
 				err = value_decode(pp, pend, length, tag.num, isconstr, &val);
-				if(val.tag == VSeq || val.tag == VSet)
-					setmalloctag(val.u.seqval, getcallerpc(&pp));
-			}else
+			else
 				err = value_decode(pp, pend, length, OCTET_STRING, 0, &val);
 			if(err == ASN_OK) {
 				pelem->tag = tag;
@@ -400,8 +364,7 @@
 				pval->u.bitstringval = makebits(0, 0, 0);
 				p += 2;
 			}
-			else
-				/* TODO: recurse and concat results */
+			else	/* TODO: recurse and concat results */
 				err = ASN_EUNIMPL;
 		}
 		else {
@@ -502,7 +465,6 @@
 
 	case SEQUENCE:
 		err = seq_decode(&p, pend, length, isconstr, &vl);
-		setmalloctag(vl, getcallerpc(&pp));
 		if(err == ASN_OK) {
 			pval->tag = VSeq ;
 			pval->u.seqval = vl;
@@ -511,12 +473,12 @@
 
 	case SETOF:
 		err = seq_decode(&p, pend, length, isconstr, &vl);
-		setmalloctag(vl, getcallerpc(&pp));
 		if(err == ASN_OK) {
 			pval->tag = VSet;
 			pval->u.setval = vl;
 		}
 		break;
+
 	case UTF8String:
 	case NumericString:
 	case PrintableString:
@@ -530,13 +492,64 @@
 	case GeneralString:
 	case UniversalString:
 	case BMPString:
-		/* TODO: figure out when character set conversion is necessary */
 		err = octet_decode(&p, pend, length, isconstr, &va);
 		if(err == ASN_OK) {
-			pval->tag = VString;
-			pval->u.stringval = (char*)emalloc(va->len+1);
-			memmove(pval->u.stringval, va->data, va->len);
-			pval->u.stringval[va->len] = 0;
+			uchar *s;
+			char *d;
+			Rune r;
+			int n;
+
+			switch(kind){
+			case UniversalString:
+				n = va->len / 4;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<24 | s[1]<<16 | s[2]<<8 | s[3];
+					if(r == 0)
+						break;
+					n--;
+					s += 4;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			case BMPString:
+				n = va->len / 2;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<8 | s[1];
+					if(r == 0)
+						break;
+					n--;
+					s += 2;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			default:
+				n = va->len;
+				d = emalloc(n+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					if((*d = *s) == 0)
+						break;
+					n--;
+					s++;
+					d++;
+				}
+				*d = 0;
+				break;
+			}
+			if(n != 0){
+				err = ASN_EINVAL;
+				free(pval->u.stringval);
+			} else 
+				pval->tag = VString;
 			free(va);
 		}
 		break;
@@ -665,25 +678,27 @@
 			switch(elem.val.tag) {
 			case VOctets:
 				newans = catbytes(ans, elem.val.u.octetsval);
+				freevalfields(&elem.val);
 				freebytes(ans);
 				ans = newans;
 				break;
 
 			case VEOC:
-				if(length != -1) {
-					p = pold;
-					err = ASN_EINVAL;
-				}
-				goto cloop_done;
-
+				if(length == -1)
+					goto cloop_done;
+				/* no break */
 			default:
+				freevalfields(&elem.val);
 				p = pold;
 				err = ASN_EINVAL;
 				goto cloop_done;
 			}
 		}
 cloop_done:
-		;
+		if(err != ASN_OK){
+			freebytes(ans);
+			ans = nil;
+		}
 	}
 	*pp = p;
 	*pbytes = ans;
@@ -736,7 +751,9 @@
 			else
 				lve = mkel(elem, lve);
 		}
-		if(err == ASN_OK) {
+		if(err != ASN_OK)
+			freeelist(lve);
+		else {
 			/* reverse back to original order */
 			while(lve != nil) {
 				lveold = lve;
@@ -748,7 +765,6 @@
 	}
 	*pp = p;
 	*pelist = ans;
-	setmalloctag(ans, getcallerpc(&pp));
 	return err;
 }
 
@@ -977,8 +993,8 @@
 				memmove(p, bb->data, bb->len);
 			p += bb->len;
 		}
-			else
-				err = ASN_EINVAL;
+		else
+			err = ASN_EINVAL;
 		break;
 
 	case NULLTAG:
@@ -1195,21 +1211,8 @@
 static int
 is_bigint(Elem* pe, Bytes** pbigint)
 {
-	int v, n, i;
-
-	if(pe->tag.class == Universal && pe->tag.num == INTEGER) {
-		if(pe->val.tag == VBigInt)
-			*pbigint = pe->val.u.bigintval;
-		else if(pe->val.tag == VInt){
-			v = pe->val.u.intval;
-			for(n = 1; n < 4; n++)
-				if((1 << (8 * n)) > v)
-					break;
-			*pbigint = newbytes(n);
-			for(i = 0; i < n; i++)
-				(*pbigint)->data[i] = (v >> ((n - 1 - i) * 8));
-		}else
-			return 0;
+	if(pe->tag.class == Universal && pe->tag.num == INTEGER && pe->val.tag == VBigInt) {
+		*pbigint = pe->val.u.bigintval;
 		return 1;
 	}
 	return 0;
@@ -1292,7 +1295,9 @@
 {
 	Bytes* ans;
 
-	ans = (Bytes*)emalloc(OFFSETOF(data[0], Bytes) + len);
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bytes) + len);
 	ans->len = len;
 	return ans;
 }
@@ -1313,8 +1318,7 @@
 static void
 freebytes(Bytes* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 /*
@@ -1352,7 +1356,9 @@
 {
 	Ints* ans;
 
-	ans = (Ints*)emalloc(OFFSETOF(data[0], Ints) + len*sizeof(int));
+	if(len < 0 || len > ((uint)-1>>1)/sizeof(int))
+		abort();
+	ans = emalloc(sizeof(Ints) + len*sizeof(int));
 	ans->len = len;
 	return ans;
 }
@@ -1363,16 +1369,14 @@
 	Ints* ans;
 
 	ans = newints(len);
-	if(len > 0)
-		memmove(ans->data, buf, len*sizeof(int));
+	memmove(ans->data, buf, len*sizeof(int));
 	return ans;
 }
 
 static void
 freeints(Ints* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 /* len is number of bytes */
@@ -1381,7 +1385,9 @@
 {
 	Bits* ans;
 
-	ans = (Bits*)emalloc(OFFSETOF(data[0], Bits) + len);
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bits) + len);
 	ans->len = len;
 	ans->unusedbits = 0;
 	return ans;
@@ -1401,8 +1407,7 @@
 static void
 freebits(Bits* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 static Elist*
@@ -1469,24 +1474,35 @@
 		freeints(v->u.objidval);
 		break;
 	case VString:
-		if(v->u.stringval)
-			free(v->u.stringval);
+		free(v->u.stringval);
 		break;
 	case VSeq:
 		el = v->u.seqval;
 		for(l = el; l != nil; l = l->tl)
 			freevalfields(&l->hd.val);
-		if(el)
-			freeelist(el);
+		freeelist(el);
 		break;
 	case VSet:
 		el = v->u.setval;
 		for(l = el; l != nil; l = l->tl)
 			freevalfields(&l->hd.val);
-		if(el)
-			freeelist(el);
+		freeelist(el);
 		break;
 	}
+	memset(v, 0, sizeof(*v));
+}
+
+static mpint*
+asn1mpint(Elem *e)
+{
+	Bytes *b;
+	int v;
+
+	if(is_int(e, &v))
+		return itomp(v, nil);
+	if(is_bigint(e, &b))
+		return betomp(b->data, b->len, nil);
+	return nil;
 }
 
 /* end of general ASN1 functions */
@@ -1569,9 +1585,10 @@
 	char*	validity_end;
 	char*	subject;
 	int	publickey_alg;
-	Bytes*	publickey;
+	Bits*	publickey;
 	int	signature_alg;
-	Bytes*	signature;
+	Bits*	signature;
+	int	curve;
 } CertX509;
 
 /* Algorithm object-ids */
@@ -1580,48 +1597,144 @@
 	ALG_md2WithRSAEncryption,
 	ALG_md4WithRSAEncryption,
 	ALG_md5WithRSAEncryption,
+
 	ALG_sha1WithRSAEncryption,
 	ALG_sha1WithRSAEncryptionOiw,
+
+	ALG_sha256WithRSAEncryption,
+	ALG_sha384WithRSAEncryption,
+	ALG_sha512WithRSAEncryption,
+	ALG_sha224WithRSAEncryption,
+
+	ALG_ecPublicKey,
+	ALG_sha1WithECDSA,
+	ALG_sha256WithECDSA,
+	ALG_sha384WithECDSA,
+	ALG_sha512WithECDSA,
+
 	ALG_md5,
+	ALG_sha1,
+	ALG_sha256,
+	ALG_sha384,
+	ALG_sha512,
+	ALG_sha224,
+
 	NUMALGS
 };
-typedef struct Ints7 {
+
+typedef struct Ints15 {
 	int		len;
-	int		data[7];
-} Ints7;
-static Ints7 oid_rsaEncryption = {7, 1, 2, 840, 113549, 1, 1, 1 };
-static Ints7 oid_md2WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 2 };
-static Ints7 oid_md4WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 3 };
-static Ints7 oid_md5WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 4 };
-static Ints7 oid_sha1WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 5 };
-static Ints7 oid_sha1WithRSAEncryptionOiw ={6, 1, 3, 14, 3, 2, 29 };
-static Ints7 oid_md5 ={6, 1, 2, 840, 113549, 2, 5, 0 };
+	int		data[15];
+} Ints15;
+
+typedef struct DigestAlg {
+	int		alg;
+	DigestState*	(*fun)(uchar*,ulong,uchar*,DigestState*);
+	int		len;
+} DigestAlg;
+
+static DigestAlg alg_md5 = { ALG_md5, md5, MD5dlen};
+static DigestAlg alg_sha1 = { ALG_sha1, sha1, SHA1dlen };
+static DigestAlg alg_sha256 = { ALG_sha256, sha2_256, SHA2_256dlen };
+static DigestAlg alg_sha384 = { ALG_sha384, sha2_384, SHA2_384dlen };
+static DigestAlg alg_sha512 = { ALG_sha512, sha2_512, SHA2_512dlen };
+static DigestAlg alg_sha224 = { ALG_sha224, sha2_224, SHA2_224dlen };
+
+/* maximum length of digest output of the digest algs above */
+enum {
+	MAXdlen = SHA2_512dlen,
+};
+
+static Ints15 oid_rsaEncryption = {7, 1, 2, 840, 113549, 1, 1, 1 };
+
+static Ints15 oid_md2WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 2 };
+static Ints15 oid_md4WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 3 };
+static Ints15 oid_md5WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 4 };
+static Ints15 oid_sha1WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 5 };
+static Ints15 oid_sha1WithRSAEncryptionOiw ={6, 1, 3, 14, 3, 2, 29 };
+static Ints15 oid_sha256WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 11 };
+static Ints15 oid_sha384WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 12 };
+static Ints15 oid_sha512WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 13 };
+static Ints15 oid_sha224WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 14 };
+
+static Ints15 oid_ecPublicKey = {6, 1, 2, 840, 10045, 2, 1 };
+static Ints15 oid_sha1WithECDSA = {6, 1, 2, 840, 10045, 4, 1 };
+static Ints15 oid_sha256WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 2 };
+static Ints15 oid_sha384WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 3 };
+static Ints15 oid_sha512WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 4 };
+
+static Ints15 oid_md5 = {6, 1, 2, 840, 113549, 2, 5 };
+static Ints15 oid_sha1 = {6, 1, 3, 14, 3, 2, 26 };
+static Ints15 oid_sha256= {9, 2, 16, 840, 1, 101, 3, 4, 2, 1 };
+static Ints15 oid_sha384= {9, 2, 16, 840, 1, 101, 3, 4, 2, 2 };
+static Ints15 oid_sha512= {9, 2, 16, 840, 1, 101, 3, 4, 2, 3 };
+static Ints15 oid_sha224= {9, 2, 16, 840, 1, 101, 3, 4, 2, 4 };
+
 static Ints *alg_oid_tab[NUMALGS+1] = {
 	(Ints*)&oid_rsaEncryption,
 	(Ints*)&oid_md2WithRSAEncryption,
 	(Ints*)&oid_md4WithRSAEncryption,
 	(Ints*)&oid_md5WithRSAEncryption,
+
 	(Ints*)&oid_sha1WithRSAEncryption,
 	(Ints*)&oid_sha1WithRSAEncryptionOiw,
+
+	(Ints*)&oid_sha256WithRSAEncryption,
+	(Ints*)&oid_sha384WithRSAEncryption,
+	(Ints*)&oid_sha512WithRSAEncryption,
+	(Ints*)&oid_sha224WithRSAEncryption,
+
+	(Ints*)&oid_ecPublicKey,
+	(Ints*)&oid_sha1WithECDSA,
+	(Ints*)&oid_sha256WithECDSA,
+	(Ints*)&oid_sha384WithECDSA,
+	(Ints*)&oid_sha512WithECDSA,
+
 	(Ints*)&oid_md5,
+	(Ints*)&oid_sha1,
+	(Ints*)&oid_sha256,
+	(Ints*)&oid_sha384,
+	(Ints*)&oid_sha512,
+	(Ints*)&oid_sha224,
+	nil
+};
+
+static DigestAlg *digestalg[NUMALGS+1] = {
+	&alg_md5, &alg_md5, &alg_md5, &alg_md5,
+	&alg_sha1, &alg_sha1,
+	&alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
+	&alg_sha256, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512,
+	&alg_md5, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
 	nil
 };
-static DigestFun digestalg[NUMALGS+1] = { md5, md5, md5, md5, sha1, sha1, md5, nil };
+
+static Bytes* encode_digest(DigestAlg *da, uchar *digest);
+
+static Ints15 oid_secp256r1 = {7, 1, 2, 840, 10045, 3, 1, 7};
+static Ints15 oid_secp384r1 = {5, 1, 3, 132, 0, 34};
+
+static Ints *namedcurves_oid_tab[] = {
+	(Ints*)&oid_secp256r1,
+	(Ints*)&oid_secp384r1,
+	nil,
+};
+static void (*namedcurves[])(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h) = {
+	secp256r1,
+	secp384r1,
+	nil,
+};
 
 static void
 freecert(CertX509* c)
 {
-	if(!c) return;
-	if(c->issuer != nil)
-		free(c->issuer);
-	if(c->validity_start != nil)
-		free(c->validity_start);
-	if(c->validity_end != nil)
-		free(c->validity_end);
-	if(c->subject != nil)
-		free(c->subject);
-	freebytes(c->publickey);
-	freebytes(c->signature);
+	if(c == nil)
+		return;
+	free(c->issuer);
+	free(c->validity_start);
+	free(c->validity_end);
+	free(c->subject);
+	freebits(c->publickey);
+	freebits(c->signature);
 	free(c);
 }
 
@@ -1702,12 +1815,22 @@
 	return oid_lookup(oid, alg_oid_tab);
 }
 
+static int
+parse_curve(Elem* e)
+{
+	Elist* el;
+	Ints* oid;
+
+	if(!is_seq(e, &el) || elistlen(el)<2 || !is_oid(&el->tl->hd, &oid))
+		return -1;
+	return oid_lookup(oid, namedcurves_oid_tab);
+}
+
 static CertX509*
-decode_cert(Bytes* a)
+decode_cert(uchar *buf, int len)
 {
 	int ok = 0;
 	int n;
-	CertX509* c = nil;
 	Elem  ecert;
 	Elem* ecertinfo;
 	Elem* esigalg;
@@ -1725,8 +1848,9 @@
 	Bits* bits = nil;
 	Bytes* b;
 	Elem* e;
+	CertX509* c = nil;
 
-	if(decode(a->data, a->len, &ecert) != ASN_OK)
+	if(decode(buf, len, &ecert) != ASN_OK)
 		goto errret;
 
 	c = (CertX509*)emalloc(sizeof(CertX509));
@@ -1776,7 +1900,7 @@
  	esubj = &el->hd;
  	el = el->tl;
  	epubkey = &el->hd;
- 	if(!is_int(eserial, &c->serial)) {
+	if(!is_int(eserial, &c->serial)) {
 		if(!is_bigint(eserial, &b))
 			goto errret;
 		c->serial = -1;	/* else we have to change cert struct */
@@ -1804,7 +1928,7 @@
 		goto errret;
 
 	/* SubjectPublicKeyInfo */
- 	if(!is_seq(epubkey, &elpubkey))
+	if(!is_seq(epubkey, &elpubkey))
 		goto errret;
 	if(elistlen(elpubkey) != 2)
 		goto errret;
@@ -1812,18 +1936,25 @@
 	c->publickey_alg = parse_alg(&elpubkey->hd);
 	if(c->publickey_alg < 0)
 		goto errret;
-  	if(!is_bitstring(&elpubkey->tl->hd, &bits))
-		goto errret;
-	if(bits->unusedbits != 0)
+	c->curve = -1;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		c->curve = parse_curve(&elpubkey->hd);
+		if(c->curve < 0)
+			goto errret;
+	}
+	elpubkey = elpubkey->tl;
+	if(!is_bitstring(&elpubkey->hd, &bits))
 		goto errret;
- 	c->publickey = makebytes(bits->data, bits->len);
+	elpubkey->hd.val.u.bitstringval = nil;	/* transfer ownership */
+	c->publickey = bits;
 
 	/*resume Certificate */
 	if(c->signature_alg < 0)
 		goto errret;
- 	if(!is_bitstring(esig, &bits))
+	if(!is_bitstring(esig, &bits))
 		goto errret;
- 	c->signature = makebytes(bits->data, bits->len);
+	esig->val.u.bitstringval = nil;	/* transfer ownership */
+	c->signature = bits;
 	ok = 1;
 
 errret:
@@ -1836,45 +1967,38 @@
 }
 
 /*
- *	RSAPublickKey :: SEQUENCE {
+ *	RSAPublickKey ::= SEQUENCE {
  *		modulus INTEGER,
  *		publicExponent INTEGER
  *	}
  */
-static RSApub*
-decode_rsapubkey(Bytes* a)
+RSApub*
+asn1toRSApub(uchar *buf, int len)
 {
 	Elem e;
-	Elist *el, *l;
-	mpint *mp;
+	Elist *el;
 	RSApub* key;
 
-	l = nil;
-	key = rsapuballoc();
-	if(decode(a->data, a->len, &e) != ASN_OK)
+	key = nil;
+	if(decode(buf, len, &e) != ASN_OK)
 		goto errret;
 	if(!is_seq(&e, &el) || elistlen(el) != 2)
 		goto errret;
 
-	l = el;
-
-	key->n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	key = rsapuballoc();
+	if((key->n = asn1mpint(&el->hd)) == nil)
 		goto errret;
-
 	el = el->tl;
-	key->ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->ek = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
-	if(l != nil)
-		freeelist(l);
+	freevalfields(&e.val);
 	return key;
 errret:
-	if(l != nil)
-		freeelist(l);
+	freevalfields(&e.val);
 	rsapubfree(key);
 	return nil;
+
 }
 
 /*
@@ -1889,65 +2013,72 @@
  *		exponent2 INTEGER, -- d mod (q-1)
  *		coefficient INTEGER -- (inverse of q) mod p }
  */
-static RSApriv*
-decode_rsaprivkey(Bytes* a)
+RSApriv*
+asn1toRSApriv(uchar *buf, int len)
 {
 	int version;
 	Elem e;
 	Elist *el;
-	mpint *mp;
-	RSApriv* key;
+	Bytes *b;
+	RSApriv* key = nil;
 
-	key = rsaprivalloc();
-	if(decode(a->data, a->len, &e) != ASN_OK)
+	if(decode(buf, len, &e) != ASN_OK)
 		goto errret;
-	if(!is_seq(&e, &el) || elistlen(el) != 9)
+	if(!is_seq(&e, &el))
 		goto errret;
+
 	if(!is_int(&el->hd, &version) || version != 0)
 		goto errret;
 
+	if(elistlen(el) != 9){
+		if(elistlen(el) == 3
+		&& parse_alg(&el->tl->hd) == ALG_rsaEncryption
+		&& is_octetstring(&el->tl->tl->hd, &b)){
+			key = asn1toRSApriv(b->data, b->len);
+			if(key != nil)
+				goto done;
+		}
+		goto errret;
+	}
+
+	key = rsaprivalloc();
 	el = el->tl;
-	key->pub.n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->pub.n = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->pub.ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->pub.ek = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->dk = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->dk = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->q = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->q = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->p = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->p = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->kq = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->kq = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->kp = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->kp = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->c2 = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->c2 = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
+done:
+	freevalfields(&e.val);
 	return key;
 errret:
+	freevalfields(&e.val);
 	rsaprivfree(key);
 	return nil;
 }
@@ -2014,56 +2145,6 @@
 	return nil;
 }
 
-static mpint*
-asn1mpint(Elem *e)
-{
-	Bytes *b;
-	mpint *mp;
-	int v;
-
-	if(is_int(e, &v))
-		return itomp(v, nil);
-	if(is_bigint(e, &b)) {
-		mp = betomp(b->data, b->len, nil);
-		freebytes(b);
-		return mp;
-	}
-	return nil;
-}
-
-static mpint*
-pkcs1pad(Bytes *b, mpint *modulus)
-{
-	int n = (mpsignif(modulus)+7)/8;
-	int pm1, i;
-	uchar *p;
-	mpint *mp;
-
-	pm1 = n - 1 - b->len;
-	p = (uchar*)emalloc(n);
-	p[0] = 0;
-	p[1] = 1;
-	for(i = 2; i < pm1; i++)
-		p[i] = 0xFF;
-	p[pm1] = 0;
-	memcpy(&p[pm1+1], b->data, b->len);
-	mp = betomp(p, n, nil);
-	free(p);
-	return mp;
-}
-
-RSApriv*
-asn1toRSApriv(uchar *kd, int kn)
-{
-	Bytes *b;
-	RSApriv *key;
-
-	b = makebytes(kd, kn);
-	key = decode_rsaprivkey(b);
-	freebytes(b);
-	return key;
-}
-
 DSApriv*
 asn1toDSApriv(uchar *kd, int kn)
 {
@@ -2081,219 +2162,244 @@
  * Our ASN.1 library doesn't return pointers into the original
  * data array, so we need to do a little hand decoding.
  */
-static void
-digest_certinfo(Bytes *cert, DigestFun digestfun, uchar *digest)
+static int
+digest_certinfo(uchar *cert, int ncert, DigestAlg *da, uchar *digest)
 {
 	uchar *info, *p, *pend;
-	ulong infolen;
 	int isconstr, length;
 	Tag tag;
 	Elem elem;
 
-	p = cert->data;
-	pend = cert->data + cert->len;
+	p = cert;
+	pend = cert + ncert;
 	if(tag_decode(&p, pend, &tag, &isconstr) != ASN_OK ||
 	   tag.class != Universal || tag.num != SEQUENCE ||
 	   length_decode(&p, pend, &length) != ASN_OK ||
 	   p+length > pend ||
 	   p+length < p)
-		return;
+		return -1;
 	info = p;
 	if(ber_decode(&p, pend, &elem) != ASN_OK)
-		return;
+		return -1;
 	freevalfields(&elem.val);
 	if(elem.tag.num != SEQUENCE)
-		return;
-	infolen = p - info;
-	(*digestfun)(info, infolen, digest, nil);
+		return -1;
+	(*da->fun)(info, p - info, digest, nil);
+	return da->len;
 }
 
-static char*
-verify_signature(Bytes* signature, RSApub *pk, uchar *edigest, Elem **psigalg)
+mpint*
+pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype)
 {
-	Elem e;
-	Elist *el;
+	int i, n = (mpsignif(modulus)-1)/8;
+	int pad = n - 2 - len;
+	uchar *p;
+	mpint *mp;
+
+	if(pad < 8){
+		werrstr("rsa modulus too small");
+		return nil;
+	}
+	if((p = malloc(n)) == nil)
+		return nil;
+	p[0] = blocktype;
+	switch(blocktype){
+	default:
+	case 1:
+		memset(p+1, 0xFF, pad);
+		break;
+	case 2:
+		for(i=1; i <= pad; i++)
+			p[i] = 1 + nfastrand(255);
+		break;
+	}
+	p[1+pad] = 0;
+	memmove(p+2+pad, buf, len);
+	mp = betomp(p, n, nil);
+	free(p);
+	return mp;
+}
+
+int
+pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype)
+{
+	uchar *p = buf + 1, *e = buf + len;
+
+	if(len < 1 || len != (mpsignif(modulus)-1)/8 || buf[0] != blocktype)
+		return -1;
+	switch(blocktype){
+	default:
+	case 1:
+		while(p < e && *p == 0xFF)
+			p++;
+		break;
+	case 2:
+		while(p < e && *p != 0x00)
+			p++;
+		break;
+	}
+	if(p - buf <= 8 || p >= e || *p++ != 0x00)
+		return -1;
+	memmove(buf, p, len = e - p);
+	return len;
+}
+
+static char Ebadsig[] = "bad signature";
+
+char*
+X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk)
+{
+	mpint *x, *y;
+	DigestAlg **dp;
 	Bytes *digest;
-	uchar *pkcs1buf, *buf;
-	int buflen;
-	mpint *pkcs1;
-	int nlen;
+	uchar *buf;
+	int len;
 	char *err;
 
-	err = nil;
-	pkcs1buf = nil;
+	x = betomp(sig, siglen, nil);
+	y = rsaencrypt(pk, x, nil);
+	mpfree(x);
+	len = mptobe(y, nil, 0, &buf);
+	mpfree(y);	
+
+	err = Ebadsig;
+	len = pkcs1unpadbuf(buf, len, pk->n, 1);
+	if(len == edigestlen && tsmemcmp(buf, edigest, edigestlen) == 0)
+		err = nil;
+	for(dp = digestalg; err != nil && *dp != nil; dp++){
+		if((*dp)->len != edigestlen)
+			continue;
+		digest = encode_digest(*dp, edigest);
+		if(digest->len == len && tsmemcmp(digest->data, buf, len) == 0)
+			err = nil;
+		freebytes(digest);
+	}
+	free(buf);
+	return err;
+}
 
-	/* one less than the byte length of the modulus */
-	nlen = (mpsignif(pk->n)-1)/8;
+char*
+X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub)
+{
+	Elem e;
+	Elist *el;
+	mpint *r, *s;
+	char *err;
 
-	/* see 9.2.1 of rfc2437 */
-	pkcs1 = betomp(signature->data, signature->len, nil);
-	mpexp(pkcs1, pk->ek, pk->n, pkcs1);
-	buflen = mptobe(pkcs1, nil, 0, &pkcs1buf);
-	buf = pkcs1buf;
-	if(buflen != nlen || buf[0] != 1) {
-		err = "expected 1";
+	r = s = nil;
+	err = Ebadsig;
+	if(decode(sig, siglen, &e) != ASN_OK)
 		goto end;
-	}
-	buf++;
-	while(buf[0] == 0xff)
-		buf++;
-	if(buf[0] != 0) {
-		err = "expected 0";
+	if(!is_seq(&e, &el) || elistlen(el) != 2)
 		goto end;
-	}
-	buf++;
-	buflen -= buf-pkcs1buf;
-	if(decode(buf, buflen, &e) != ASN_OK || !is_seq(&e, &el) || elistlen(el) != 2 ||
-			!is_octetstring(&el->tl->hd, &digest)) {
-		err = "signature parse error";
+	r = asn1mpint(&el->hd);
+	if(r == nil)
 		goto end;
-	}
-	*psigalg = &el->hd;
-	if(memcmp(digest->data, edigest, digest->len) == 0)
+	el = el->tl;
+	s = asn1mpint(&el->hd);
+	if(s == nil)
 		goto end;
-	err = "digests did not match";
-
+	if(ecdsaverify(dom, pub, edigest, edigestlen, r, s))
+		err = nil;
 end:
-	if(pkcs1 != nil)
-		mpfree(pkcs1);
-	if(pkcs1buf != nil)
-		free(pkcs1buf);
+	freevalfields(&e.val);
+	mpfree(s);
+	mpfree(r);
 	return err;
 }
 
-RSApub*
-X509toRSApub(uchar *cert, int ncert, char *name, int nname)
+static void
+copysubject(char *name, int nname, char *subject)
 {
 	char *e;
-	Bytes *b;
+
+	if(name == nil)
+		return;
+	memset(name, 0, nname);
+	if(subject == nil)
+		return;
+	strncpy(name, subject, nname-1);
+	e = strchr(name, ',');
+	if(e != nil)
+		*e = 0;	/* take just CN part of Distinguished Name */
+}
+
+ECpub*
+X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom)
+{
 	CertX509 *c;
-	RSApub *pk;
+	ECpub *pub;
 
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil)
 		return nil;
-	if(name != nil && c->subject != nil){
-		e = strchr(c->subject, ',');
-		if(e != nil)
-			*e = 0;	/* take just CN part of Distinguished Name */
-		strncpy(name, c->subject, nname);
+	copysubject(name, nname, c->subject);
+	pub = nil;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		ecdominit(dom, namedcurves[c->curve]);
+		pub = ecdecodepub(dom, c->publickey->data, c->publickey->len);
+		if(pub == nil)
+			ecdomfree(dom);
 	}
-	pk = decode_rsapubkey(c->publickey);
 	freecert(c);
-	return pk;
+	return pub;
 }
 
-int
-getalgo(Elem *e)
+char*
+X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pk)
 {
-	Value *v;
-	Elist *el;
-	int a;
+	char *e;
+	CertX509 *c;
+	int digestlen;
+	uchar digest[MAXdlen];
 
-	if((a = parse_alg(e)) >= 0)
-		return a;
-	v = &e->val;
-	if(v->tag == VSeq){
-		print("Seq\n");
-		for(el = v->u.seqval; el!=nil; el = el->tl){
-			if((a = getalgo(&el->hd)) >= 0)
-				return a;
-		}
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return "cannot decode cert";
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
 	}
-	return -1;
+	e = X509ecdsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, dom, pk);
+	freecert(c);
+	return e;
 }
 
-static void edump(Elem e);
-
 RSApub*
-asn1toRSApub(uchar *der, int nder)
+X509toRSApub(uchar *cert, int ncert, char *name, int nname)
 {
-	Elem e;
-	Elist *el, *l;
-	int n;
-	Bits *b;
-	RSApub *key;
-	mpint *mp;
+	CertX509 *c;
+	RSApub *pub;
 
-	if(decode(der, nder, &e) != ASN_OK){
-		print("didn't parse\n");
-		return nil;
-	}
-	if(!is_seq(&e, &el)){
-		print("no seq");
-		return nil;
-	}
-	if((n = elistlen(el)) != 2){
-		print("bad length %d\n", n);
-		return nil;
-	}
-	if((n = getalgo(&el->hd)) < 0){
-		print("no algo\n");
-		return nil;
-	}
-	if(n != 0){
-		print("cant do algorithm %d\n", n);
-		return nil;
-	}
-	if(!is_bitstring(&el->tl->hd, &b)){
-		print("no bits\n");
-		return nil;
-	}
-	if(decode(b->data, b->len, &e) != ASN_OK){
-		print("no second decode\n");
-		return nil;
-	}
-	if(!is_seq(&e, &el)){
-		print("no second seq\n");
-		return nil;
-	}
-	if(elistlen(el) != 2){
-		print("no second length\n");
+	c = decode_cert(cert, ncert);
+	if(c == nil)
 		return nil;
-	}
-	key = rsapuballoc();
-
-	l = el;
-
-	key->n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
-		goto errret;
-
-	el = el->tl;
-	key->ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
-		goto errret;
-
-	if(l != nil)
-		freeelist(l);
-	return key;
-errret:
-	if(l != nil)
-		freeelist(l);
-	rsapubfree(key);
-	return nil;
+	copysubject(name, nname, c->subject);
+	pub = nil;
+	if(c->publickey_alg == ALG_rsaEncryption)
+		pub = asn1toRSApub(c->publickey->data, c->publickey->len);
+	freecert(c);
+	return pub;
 }
 
 char*
-X509verify(uchar *cert, int ncert, RSApub *pk)
+X509rsaverify(uchar *cert, int ncert, RSApub *pk)
 {
 	char *e;
-	Bytes *b;
 	CertX509 *c;
-	uchar digest[SHA1dlen];
-	Elem *sigalg;
+	int digestlen;
+	uchar digest[MAXdlen];
 
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	if(c != nil)
-		digest_certinfo(b, digestalg[c->signature_alg], digest);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil)
 		return "cannot decode cert";
-	e = verify_signature(c->signature, pk, digest, &sigalg);
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
+	}
+	e = X509rsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, pk);
 	freecert(c);
 	return e;
 }
@@ -2326,25 +2432,50 @@
 mkbigint(mpint *p)
 {
 	Elem e;
-	uchar *buf;
-	int buflen;
 
 	e.tag.class = Universal;
 	e.tag.num = INTEGER;
 	e.val.tag = VBigInt;
-	buflen = mptobe(p, nil, 0, &buf);
-	e.val.u.bigintval = makebytes(buf, buflen);
-	free(buf);
+	e.val.u.bigintval = newbytes((mpsignif(p)+8)/8);
+	if(p->sign < 0){
+		mpint *s = mpnew(e.val.u.bigintval->len*8+1);
+		mpleft(mpone, e.val.u.bigintval->len*8, s);
+		mpadd(p, s, s);
+		mptober(s, e.val.u.bigintval->data, e.val.u.bigintval->len);
+		mpfree(s);
+	} else {
+		mptober(p, e.val.u.bigintval->data, e.val.u.bigintval->len);
+	}
 	return e;
 }
 
+static int
+printable(char *s)
+{
+	int c;
+
+	while((c = (uchar)*s++) != 0){
+		if((c >= 'a' && c <= 'z')
+		|| (c >= 'A' && c <= 'Z')
+		|| (c >= '0' && c <= '9')
+		|| strchr("'=()+,-./:? ", c) != nil)
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+#define DirectoryString 0
+
 static Elem
-mkstring(char *s)
+mkstring(char *s, int t)
 {
 	Elem e;
 
+	if(t == DirectoryString)
+		t = printable(s) ? PrintableString : UTF8String;
 	e.tag.class = Universal;
-	e.tag.num = IA5String;
+	e.tag.num = t;
 	e.val.tag = VString;
 	e.val.u.stringval = estrdup(s);
 	return e;
@@ -2384,7 +2515,7 @@
 	e.tag.class = Universal;
 	e.tag.num = UTCTime;
 	e.val.tag = VString;
-	snprint(utc, 50, "%.2d%.2d%.2d%.2d%.2d%.2dZ",
+	snprint(utc, sizeof(utc), "%.2d%.2d%.2d%.2d%.2d%.2dZ",
 		tm->year % 100, tm->mon+1, tm->mday, tm->hour, tm->min, tm->sec);
 	e.val.u.stringval = estrdup(utc);
 	return e;
@@ -2433,24 +2564,26 @@
 }
 
 typedef struct Ints7pref {
-	int		len;
-	int		data[7];
+	int	len;
+	int	data[7];
 	char	prefix[4];
+	int	stype;
 } Ints7pref;
 Ints7pref DN_oid[] = {
-	{4, 2, 5, 4, 6, 0, 0, 0,  "C="},
-	{4, 2, 5, 4, 8, 0, 0, 0,  "ST="},
-	{4, 2, 5, 4, 7, 0, 0, 0,  "L="},
-	{4, 2, 5, 4, 10, 0, 0, 0, "O="},
-	{4, 2, 5, 4, 11, 0, 0, 0, "OU="},
-	{4, 2, 5, 4, 3, 0, 0, 0,  "CN="},
- 	{7, 1,2,840,113549,1,9,1, "E="},
+	{4, 2, 5, 4, 6, 0, 0, 0,        "C=", PrintableString},
+	{4, 2, 5, 4, 8, 0, 0, 0,        "ST=",DirectoryString},
+	{4, 2, 5, 4, 7, 0, 0, 0,        "L=", DirectoryString},
+	{4, 2, 5, 4, 10, 0, 0, 0,       "O=", DirectoryString},
+	{4, 2, 5, 4, 11, 0, 0, 0,       "OU=",DirectoryString},
+	{4, 2, 5, 4, 3, 0, 0, 0,        "CN=",DirectoryString},
+	{7, 1,2,840,113549,1,9,1,       "E=", IA5String},
+	{7, 0,9,2342,19200300,100,1,25,	"DC=",IA5String},
 };
 
 static Elem
 mkname(Ints7pref *oid, char *subj)
 {
-	return mkset(mkel(mkseq(mkel(mkoid((Ints*)oid), mkel(mkstring(subj), nil))), nil));
+	return mkset(mkel(mkseq(mkel(mkoid((Ints*)oid), mkel(mkstring(subj, oid->stype), nil))), nil));
 }
 
 static Elem
@@ -2474,167 +2607,388 @@
 	return mkseq(el);
 }
 
-uchar*
-RSApubtoasn1(RSApub *pub, int *keylen)
+/*
+ * DigestInfo ::= SEQUENCE {
+ *	digestAlgorithm AlgorithmIdentifier,
+ *	digest OCTET STRING }
+ */
+static Bytes*
+encode_digest(DigestAlg *da, uchar *digest)
 {
-	Elem pubkey;
-	Bytes *pkbytes;
-	uchar *key;
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkalg(da->alg),
+		mkel(mkoctet(digest, da->len),
+		nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
 
-	key = nil;
-	pubkey = mkseq(mkel(mkbigint(pub->n),mkel(mkint(mptoi(pub->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
+int
+asn1encodedigest(DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest, uchar *buf, int len)
+{
+	Bytes *bytes;
+	DigestAlg **dp;
+
+	for(dp = digestalg; *dp != nil; dp++){
+		if((*dp)->fun != fun)
+			continue;
+		bytes = encode_digest(*dp, digest);
+		if(bytes == nil)
+			break;
+		if(bytes->len > len){
+			freebytes(bytes);
+			break;
+		}
+		len = bytes->len;
+		memmove(buf, bytes->data, len);
+		freebytes(bytes);
+		return len;
+	}
+	return -1;
+}
+
+static Elem
+mkcont(Elem e, int num)
+{
+	e = mkseq(mkel(e, nil));
+	e.tag.class = Context;
+	e.tag.num = num;
+	return e;
+}
+
+static Elem
+mkaltname(char *s)
+{
+	Elem e;
+	int i;
+
+	for(i=0; i<nelem(DN_oid); i++){
+		if(strstr(s, DN_oid[i].prefix) != nil)
+			return mkcont(mkDN(s), 4); /* DN */
+	}
+	e = mkstring(s, IA5String);
+	e.tag.class = Context;
+	e.tag.num = strchr(s, '@') != nil ? 1 : 2; /* email : DNS */
+	return e;
+}
+
+static Elist*
+mkaltnames(char *alts)
+{
+	Elist *el;
+	char *s, *p;
+
+	if(alts == nil)
+		return nil;
+
+	el = nil;
+	alts = estrdup(alts);
+	for(s = alts; s != nil; s = p){
+		while(*s == ' ')
+			s++;
+		if(*s == '\0')
+			break;
+		if((p = strchr(s, ',')) != nil)
+			*p++ = 0;
+		el = mkel(mkaltname(s), el);
+	}
+	free(alts);
+	return el;
+}
+
+static Elist*
+mkextel(Elem e, Ints *oid, Elist *el)
+{
+	Bytes *b = nil;
+
+	if(encode(e, &b) == ASN_OK){
+		el = mkel(mkseq(
+			mkel(mkoid(oid),
+			mkel(mkoctet(b->data, b->len),
+			nil))), el);
+		freebytes(b);
+	}
+	freevalfields(&e.val);
+	return el;
+}
+
+static Ints15 oid_subjectAltName = {4, 2, 5, 29, 17 };
+static Ints15 oid_extensionRequest = { 7, 1, 2, 840, 113549, 1, 9, 14};
+
+static Elist*
+mkextensions(char *alts, int req)
+{
+	Elist *sl, *xl;
+
+	xl = nil;
+	if((sl = mkaltnames(alts)) != nil)
+		xl = mkextel(mkseq(sl), (Ints*)&oid_subjectAltName, xl);
+	if(xl != nil){
+		if(req) return mkel(mkcont(mkseq(
+			mkel(mkoid((Ints*)&oid_extensionRequest),
+			mkel(mkset(mkel(mkseq(xl), nil)), nil))), 0), nil);
+		return mkel(mkcont(mkseq(xl), 3), nil);
+	}
+	return nil;
+}
+
+static char*
+splitalts(char *s)
+{
+	int q;
+
+	for(q = 0; *s != '\0'; s++){
+		if(*s == '\'')
+			q ^= 1;
+		else if(q == 0 && *s == ','){
+			*s++ = 0;
+			return s;
+		}
+	}
+	return nil;
+}
+
+static Bytes*
+encode_rsapubkey(RSApub *pk)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
 		nil)));
-	freebytes(pkbytes);
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	if(keylen)
-		*keylen = pkbytes->len;
-	key = malloc(pkbytes->len);
-	memmove(key, pkbytes->data, pkbytes->len);
-	free(pkbytes);
-errret:
-	freevalfields(&pubkey.val);
-	return key;
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+static Bytes*
+encode_rsaprivkey(RSApriv *k)
+{
+	Bytes *b = nil;
+	RSApub *pk = &k->pub;
+	Elem e = mkseq(
+		mkel(mkint(0),
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
+		mkel(mkbigint(k->dk),
+		mkel(mkbigint(k->p),
+		mkel(mkbigint(k->q),
+		mkel(mkbigint(k->kp),
+		mkel(mkbigint(k->kq),
+		mkel(mkbigint(k->c2),
+		nil))))))))));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+int
+asn1encodeRSApub(RSApub *pk, uchar *buf, int len)
+{
+	Bytes *b = encode_rsapubkey(pk);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
+}
+
+int
+asn1encodeRSApriv(RSApriv *k, uchar *buf, int len)
+{
+	Bytes *b;
+	b = encode_rsaprivkey(k);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
 }
 
 uchar*
-X509gen(RSApriv *priv, char *subj, ulong valid[2], int *certlen)
+X509rsagen(RSApriv *priv, char *subj, ulong valid[2], int *certlen)
 {
-	int serial = 0;
+	int serial = 0, sigalg = ALG_sha256WithRSAEncryption;
 	uchar *cert = nil;
-	RSApub *pk = rsaprivtopub(priv);
 	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
-	Elem e, certinfo, issuer, subject, pubkey, validity, sig;
-	uchar digest[MD5dlen], *buf;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
 	int buflen;
 	mpint *pkcs1;
+	char *alts;
 
-	e.val.tag = VInt;  /* so freevalfields at errret is no-op */
-	issuer = mkDN(subj);
-	subject = mkDN(subj);
-	pubkey = mkseq(mkel(mkbigint(pk->n),mkel(mkint(mptoi(pk->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
-		nil)));
-	freebytes(pkbytes);
-	validity = mkseq(
-		mkel(mkutc(valid[0]),
-		mkel(mkutc(valid[1]),
-		nil)));
-	certinfo = mkseq(
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
+		mkel(mkcont(mkint(2), 0),
 		mkel(mkint(serial),
-		mkel(mkalg(ALG_md5WithRSAEncryption),
-		mkel(issuer,
-		mkel(validity,
-		mkel(subject,
-		mkel(pubkey,
-		nil)))))));
-	if(encode(certinfo, &certinfobytes) != ASN_OK)
+		mkel(mkalg(sigalg),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkutc(valid[0]),
+			mkel(mkutc(valid[1]),
+			nil))),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 0)))))))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
 		goto errret;
-	md5(certinfobytes->data, certinfobytes->len, digest, 0);
+
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
 	freebytes(certinfobytes);
-	sig = mkseq(
-		mkel(mkalg(ALG_md5),
-		mkel(mkoctet(digest, MD5dlen),
-		nil)));
-	if(encode(sig, &sigbytes) != ASN_OK)
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
 		goto errret;
-	pkcs1 = pkcs1pad(sigbytes, pk->n);
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
 	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
 	rsadecrypt(priv, pkcs1, pkcs1);
 	buflen = mptobe(pkcs1, nil, 0, &buf);
 	mpfree(pkcs1);
 	e = mkseq(
 		mkel(certinfo,
-		mkel(mkalg(ALG_md5WithRSAEncryption),
+		mkel(mkalg(sigalg),
 		mkel(mkbits(buf, buflen),
 		nil))));
 	free(buf);
 	if(encode(e, &certbytes) != ASN_OK)
 		goto errret;
-	if(certlen)
+	if(certlen != nil)
 		*certlen = certbytes->len;
-	cert = certbytes->data;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
 errret:
 	freevalfields(&e.val);
+	free(subj);
 	return cert;
 }
 
 uchar*
-X509req(RSApriv *priv, char *subj, int *certlen)
+X509rsareq(RSApriv *priv, char *subj, int *certlen)
 {
 	/* RFC 2314, PKCS #10 Certification Request Syntax */
-	int version = 0;
+	int version = 0, sigalg = ALG_sha256WithRSAEncryption;
 	uchar *cert = nil;
-	RSApub *pk = rsaprivtopub(priv);
 	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
-	Elem e, certinfo, subject, pubkey, sig;
-	uchar digest[MD5dlen], *buf;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
 	int buflen;
 	mpint *pkcs1;
+	char *alts;
 
-	e.val.tag = VInt;  /* so freevalfields at errret is no-op */
-	subject = mkDN(subj);
-	pubkey = mkseq(mkel(mkbigint(pk->n),mkel(mkint(mptoi(pk->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
-		nil)));
-	freebytes(pkbytes);
-	certinfo = mkseq(
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
 		mkel(mkint(version),
-		mkel(subject,
-		mkel(pubkey,
-		nil))));
-	if(encode(certinfo, &certinfobytes) != ASN_OK)
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 1)))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
 		goto errret;
-	md5(certinfobytes->data, certinfobytes->len, digest, 0);
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
 	freebytes(certinfobytes);
-	sig = mkseq(
-		mkel(mkalg(ALG_md5),
-		mkel(mkoctet(digest, MD5dlen),
-		nil)));
-	if(encode(sig, &sigbytes) != ASN_OK)
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
 		goto errret;
-	pkcs1 = pkcs1pad(sigbytes, pk->n);
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
 	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
 	rsadecrypt(priv, pkcs1, pkcs1);
 	buflen = mptobe(pkcs1, nil, 0, &buf);
 	mpfree(pkcs1);
 	e = mkseq(
 		mkel(certinfo,
-		mkel(mkalg(ALG_md5),
+		mkel(mkalg(sigalg),
 		mkel(mkbits(buf, buflen),
 		nil))));
 	free(buf);
 	if(encode(e, &certbytes) != ASN_OK)
 		goto errret;
-	if(certlen)
+	if(certlen != nil)
 		*certlen = certbytes->len;
-	cert = certbytes->data;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
 errret:
 	freevalfields(&e.val);
+	free(subj);
 	return cert;
 }
 
+static void
+digestSPKI(int alg, uchar *pubkey, int npubkey, DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(mkel(mkalg(alg), mkel(mkbits(pubkey, npubkey), nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	(*fun)(b->data, b->len, digest, nil);
+	freebytes(b);
+}
+
+int
+X509digestSPKI(uchar *cert, int ncert, DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest)
+{
+	CertX509 *c;
+
+	c = decode_cert(cert, ncert);
+	if(c == nil){
+		werrstr("cannot decode cert");
+		return -1;
+	}
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, fun, digest);
+	freecert(c);
+	return 0;
+}
+
 static char*
 tagdump(Tag tag)
 {
-	if(tag.class != Universal)
-		return smprint("class%d,num%d", tag.class, tag.num);
+	static char buf[32];
+
+	if(tag.class != Universal){
+		snprint(buf, sizeof(buf), "class%d,num%d", tag.class, tag.num);
+		return buf;
+	}
 	switch(tag.num){
 	case BOOLEAN: return "BOOLEAN";
 	case INTEGER: return "INTEGER";
@@ -2663,7 +3017,8 @@
 	case UniversalString: return "UniversalString";
 	case BMPString: return "BMPString";
 	default:
-		return smprint("Universal,num%d", tag.num);
+		snprint(buf, sizeof(buf), "Universal,num%d", tag.num);
+		return buf;
 	}
 }
 
@@ -2683,10 +3038,7 @@
 	case VBigInt: print("BigInt[%d] %.2x%.2x...",v.u.bigintval->len,v.u.bigintval->data[0],v.u.bigintval->data[1]); break;
 	case VReal: print("Real..."); break;
 	case VOther: print("Other..."); break;
-	case VBitString: print("BitString");
-		for(i = 0; i<v.u.bitstringval->len; i++)
-			print(" %02x", v.u.bitstringval->data[i]);
-		break;
+	case VBitString: print("BitString[%d]...", v.u.bitstringval->len*8 - v.u.bitstringval->unusedbits); break;
 	case VNull: print("Null"); break;
 	case VEOC: print("EOC..."); break;
 	case VObjId: print("ObjId");
@@ -2722,20 +3074,24 @@
 X509dump(uchar *cert, int ncert)
 {
 	char *e;
-	Bytes *b;
 	CertX509 *c;
-	RSApub *pk;
-	uchar digest[SHA1dlen];
-	Elem *sigalg;
+	RSApub *rsapub;
+	ECpub *ecpub;
+	ECdomain ecdom;
+	int digestlen;
+	uchar digest[MAXdlen];
 
 	print("begin X509dump\n");
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	if(c != nil)
-		digest_certinfo(b, digestalg[c->signature_alg], digest);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil){
-		print("cannot decode cert");
+		print("cannot decode cert\n");
+		return;
+	}
+
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		print("cannot decode certinfo\n");
 		return;
 	}
 
@@ -2743,20 +3099,47 @@
 	print("issuer %s\n", c->issuer);
 	print("validity %s %s\n", c->validity_start, c->validity_end);
 	print("subject %s\n", c->subject);
-	pk = decode_rsapubkey(c->publickey);
-	print("pubkey e=%B n(%d)=%B\n", pk->ek, mpsignif(pk->n), pk->n);
-
-	print("sigalg=%d digest=%.*H\n", c->signature_alg, MD5dlen, digest);
-	e = verify_signature(c->signature, pk, digest, &sigalg);
-	if(e==nil){
-		e = "nil (meaning ok)";
-		print("sigalg=\n");
-		if(sigalg)
-			edump(*sigalg);
+	print("sigalg=%d digest=%.*H\n", c->signature_alg, digestlen, digest);
+	print("publickey_alg=%d pubkey[%d] %.*H\n", c->publickey_alg, c->publickey->len,
+		c->publickey->len, c->publickey->data);
+
+	switch(c->publickey_alg){
+	case ALG_rsaEncryption:
+		rsapub = asn1toRSApub(c->publickey->data, c->publickey->len);
+		if(rsapub != nil){
+			print("rsa pubkey e=%B n(%d)=%B\n", rsapub->ek, mpsignif(rsapub->n), rsapub->n);
+			e = X509rsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, rsapub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509rsaverifydigest returns: %s\n", e);
+			rsapubfree(rsapub);
+		}
+		break;
+	case ALG_ecPublicKey:
+		ecdominit(&ecdom, namedcurves[c->curve]);
+		ecpub = ecdecodepub(&ecdom, c->publickey->data, c->publickey->len);
+		if(ecpub != nil){
+			e = X509ecdsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, &ecdom, ecpub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509ecdsaverifydigest returns: %s\n", e);
+			ecpubfree(ecpub);
+		}
+		ecdomfree(&ecdom);
+		break;
 	}
-	print("self-signed verify_signature returns: %s\n", e);
 
-	rsapubfree(pk);
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, sha2_256, digest);
+	print("publickey_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha2_256(cert, ncert, digest, nil);
+	print("cert_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha1(cert, ncert, digest, nil);
+	print("cert_thumbprint sha1=%.*H\n", SHA1dlen, digest);
+
 	freecert(c);
 	print("end X509dump\n");
 }
--- sys/src/cmd/auth/rsa2csr.c	Tue Mar 25 18:56:03 2003
+++ /sys/src/cmd/auth/rsa2csr.c	Sat Jun 26 21:21:40 2021
@@ -34,7 +34,7 @@
 	if((key = getkey(argc-1, argv+1, 1, nil)) == nil)
 		sysfatal("%r");
 
-	cert = X509req(key, argv[0], &len);
+	cert = X509rsareq(key, argv[0], &len);
 	if(cert == nil)
 		sysfatal("X509req: %r");
 
--- sys/src/cmd/auth/rsa2x509.c	Wed Jun 18 17:43:00 2008
+++ /sys/src/cmd/auth/rsa2x509.c	Sat Jun 26 21:21:40 2021
@@ -41,7 +41,7 @@
 	if((key = getkey(argc-1, argv+1, 1, nil)) == nil)
 		sysfatal("%r");
 
-	cert = X509gen(key, argv[0], valid, &len);
+	cert = X509rsagen(key, argv[0], valid, &len);
 	if(cert == nil)
 		sysfatal("X509gen: %r");
 
--- sys/src/cmd/tlsclient.c	Tue Jun 18 05:39:09 2002
+++ /sys/src/cmd/tlsclient.c	Sat Jun 26 21:21:40 2021
@@ -2,11 +2,16 @@
 #include <libc.h>
 #include <mp.h>
 #include <libsec.h>
+#include <auth.h>
+
+int debug, auth, dialfile;
+char *keyspec = "";
+char *servername, *file, *filex, *ccert, *dumpcert;
 
 void
 usage(void)
 {
-	fprint(2, "usage: tlsclient [-t /sys/lib/tls/xxx] [-x /sys/lib/tls/xxx.exclude] dialstring\n");
+	fprint(2, "usage: tlsclient [-D] [-a [-k keyspec] ] [-c clientcert.pem] [-d servercert] [-t /sys/lib/tls/xxx] [-x /sys/lib/tls/xxx.exclude] [-n servername] [-o] dialstring [cmd [args...]]\n");
 	exits("usage");
 }
 
@@ -21,65 +26,140 @@
 			break;
 }
 
+static int
+reporter(char *fmt, ...)
+{
+	va_list ap;
+	
+	va_start(ap, fmt);
+	fprint(2, "%s:  tls reports ", argv0);
+	vfprint(2, fmt, ap);
+	fprint(2, "\n");
+
+	va_end(ap);
+	return 0;
+}
+
 void
 main(int argc, char **argv)
 {
-	int fd, netfd;
-	uchar digest[20];
-	TLSconn conn;
-	char *addr, *file, *filex;
+	int fd, dfd;
+	char *addr;
+	TLSconn *conn;
 	Thumbprint *thumb;
+	AuthInfo *ai = nil;
+
+	fmtinstall('[', encodefmt);
+	fmtinstall('H', encodefmt);
 
-	file = nil;
-	filex = nil;
-	thumb = nil;
 	ARGBEGIN{
+	case 'D':
+		debug++;
+		break;
+	case 'a':
+		auth++;
+		break;
+	case 'k':
+		keyspec = EARGF(usage());
+		break;
 	case 't':
 		file = EARGF(usage());
 		break;
 	case 'x':
 		filex = EARGF(usage());
 		break;
+	case 'c':
+		ccert = EARGF(usage());
+		break;
+	case 'd':
+		dumpcert = EARGF(usage());
+		break;
+	case 'n':
+		servername = EARGF(usage());
+		break;
+	case 'o':
+		dialfile = 1;
+		break;
 	default:
 		usage();
 	}ARGEND
 
-	if(argc != 1)
+	if(argc < 1)
 		usage();
 
 	if(filex && !file)	
 		sysfatal("specifying -x without -t is useless");
+
 	if(file){
-		thumb = initThumbprints(file, filex);
+		thumb = initThumbprints(file, filex, "x509");
 		if(thumb == nil)
 			sysfatal("initThumbprints: %r");
-	}
+	} else
+		thumb = nil;
 
-	addr = argv[0];
-	if((netfd = dial(addr, 0, 0, 0)) < 0)
+	addr = *argv++;
+	if((fd = dialfile? open(addr, ORDWR): dial(addr, 0, 0, 0)) < 0)
 		sysfatal("dial %s: %r", addr);
 
-	memset(&conn, 0, sizeof conn);
-	fd = tlsClient(netfd, &conn);
+	conn = (TLSconn*)mallocz(sizeof *conn, 1);
+	conn->serverName = servername;
+	if(ccert){
+		conn->cert = readcert(ccert, &conn->certlen);
+		if(conn->cert == nil)
+			sysfatal("readcert: %r");
+	}
+
+	if(auth){
+		ai = auth_proxy(fd, auth_getkey, "proto=p9any role=client %s", keyspec);
+		if(ai == nil)
+			sysfatal("auth_proxy: %r");
+
+		conn->pskID = "p9secret";
+		conn->psk = ai->secret;
+		conn->psklen = ai->nsecret;
+	}
+
+	if(debug)
+		conn->trace = reporter;
+
+	fd = tlsClient(fd, conn);
 	if(fd < 0)
 		sysfatal("tlsclient: %r");
+
+	if(dumpcert){
+		if((dfd = create(dumpcert, OWRITE, 0666)) < 0)
+			sysfatal("create: %r");
+		if(conn->cert != nil)
+			write(dfd, conn->cert, conn->certlen);
+		write(dfd, "", 0);
+		close(dfd);
+	}
+
 	if(thumb){
-		if(conn.cert==nil || conn.certlen<=0)
-			sysfatal("server did not provide TLS certificate");
-		sha1(conn.cert, conn.certlen, digest, nil);
-		if(!okThumbprint(digest, thumb)){
-			fmtinstall('H', encodefmt);
-			sysfatal("server certificate %.*H not recognized", SHA1dlen, digest);
-		}
+		if(!okCertificate(conn->cert, conn->certlen, thumb))
+			sysfatal("cert for %s not recognized: %r", servername ? servername : addr);
+		freeThumbprints(thumb);
+	}
+
+	free(conn->cert);
+	free(conn->sessionID);
+	free(conn);
+	if(ai != nil)
+		auth_freeAI(ai);
+
+	if(*argv){
+		dup(fd, 0);
+		dup(fd, 1);
+		if(fd > 1)
+			close(fd);
+		exec(*argv, argv);
+		sysfatal("exec: %r");
 	}
-	free(conn.cert);
-	close(netfd);
 
 	rfork(RFNOTEG);
 	switch(fork()){
 	case -1:
-		fprint(2, "%s: fork: %r\n", argv0);
-		exits("dial");
+		sysfatal("fork: %r");
 	case 0:
 		xfer(0, fd);
 		break;
--- sys/src/cmd/upas/fs/imap4.c	Thu Mar 29 00:14:53 2012
+++ /sys/src/cmd/upas/fs/imap4.c	Sat Jun 26 21:21:40 2021
@@ -416,7 +416,7 @@
 	 * don't do this any more.  our local it people are rotating their
 	 * certificates faster than we can keep up.
 	 */
-	if(0 && (!imap->thumb || !okThumbprint(digest, imap->thumb))){
+	if(0 && (!imap->thumb || !okThumbprint(digest, SHA1dlen, imap->thumb))){
 		close(sfd);
 		werrstr("server certificate %.*H not recognized",
 			SHA1dlen, digest);
@@ -740,7 +740,7 @@
 	if(argc==1 && strcmp(argv[0], "thumbprint")==0){
 		if(imap->thumb)
 			freeThumbprints(imap->thumb);
-		imap->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude");
+		imap->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude", "x509");
 	}
 	if(strcmp(argv[0], "refresh")==0){
 		if(argc==1){
@@ -816,7 +816,7 @@
 		imap->mbox = "Inbox";
 	else
 		imap->mbox = f[4];
-	imap->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude");
+	imap->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude", "x509");
 
 	mb->aux = imap;
 	mb->sync = imap4sync;
--- sys/src/cmd/upas/fs/pop3.c	Thu Mar 29 00:14:53 2012
+++ /sys/src/cmd/upas/fs/pop3.c	Sat Jun 26 21:21:40 2021
@@ -134,7 +134,7 @@
 	 * don't do this any more.  our local it people are rotating their
 	 * certificates faster than we can keep up.
 	 */
-	if(0 && (!pop->thumb || !okThumbprint(digest, pop->thumb))){
+	if(0 && (!pop->thumb || !okThumbprint(digest, SHA1dlen, pop->thumb))){
 		fmtinstall('H', encodefmt);
 		close(fd);
 		free(conn.cert);
@@ -603,7 +603,7 @@
 	if(argc==1 && strcmp(argv[0], "thumbprint")==0){
 		if(pop->thumb)
 			freeThumbprints(pop->thumb);
-		pop->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude");
+		pop->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude", "x509");
 	}
 	if(strcmp(argv[0], "refresh")==0){
 		if(argc==1){
@@ -678,7 +678,7 @@
 	pop->needtls = poptls || apoptls;
 	pop->refreshtime = 60;
 	pop->notls = popnotls || apopnotls;
-	pop->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude");
+	pop->thumb = initThumbprints("/sys/lib/tls/mail", "/sys/lib/tls/mail.exclude", "x509");
 
 	mb->aux = pop;
 	mb->sync = pop3sync;
--- sys/src/cmd/upas/smtp/smtp.c	Fri Nov  8 23:54:09 2013
+++ /sys/src/cmd/upas/smtp/smtp.c	Sat Jun 26 21:21:40 2021
@@ -327,7 +327,7 @@
 	uchar hash[SHA1dlen];
 
 	err = nil;
-	goodcerts = initThumbprints(smtpthumbs, smtpexclthumbs);
+	goodcerts = initThumbprints(smtpthumbs, smtpexclthumbs, "x509");
 	if (goodcerts == nil) {
 		if (!okunksecure)
 			syslog(0, "smtp", "bad thumbprints in %s", smtpthumbs);
@@ -336,7 +336,7 @@
 
 	/* compute sha1 hash of remote's certificate, see if we know it */
 	sha1(c->cert, c->certlen, hash, nil);
-	if (!okThumbprint(hash, goodcerts) && !okunksecure) {
+	if (!okThumbprint(hash, SHA1dlen, goodcerts) && !okunksecure) {
 		h = malloc(2*sizeof hash + 1);
 		if (h != nil) {
 			enc16(h, 2*sizeof hash + 1, hash, sizeof hash);
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/cnfield.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,114 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/*
+ * fast reduction for crandall numbers of the form: 2^n - c
+ */
+
+enum {
+	MAXDIG = 1024 / Dbits,
+};
+
+typedef struct CNfield CNfield;
+struct CNfield
+{
+	Mfield;	
+
+	mpint	m[1];
+
+	int	s;
+	mpdigit	c;
+};
+
+static int
+cnreduce(Mfield *m, mpint *a, mpint *r)
+{
+	mpdigit q[MAXDIG-1], t[MAXDIG], d;
+	CNfield *f = (CNfield*)m;
+	int qn, tn, k;
+
+	k = f->top;
+	if((a->top - k) >= MAXDIG)
+		return -1;
+
+	mpleft(a, f->s, r);
+	if(r->top <= k)
+		mpbits(r, (k+1)*Dbits);
+
+	/* q = hi(r) */
+	qn = r->top - k;
+	memmove(q, r->p+k, qn*Dbytes);
+
+	/* r = lo(r) */
+	r->top = k;
+	r->sign = 1;
+
+	do {
+		/* t = q*c */
+		tn = qn+1;
+		memset(t, 0, tn*Dbytes);
+		mpvecdigmuladd(q, qn, f->c, t);
+
+		/* q = hi(t) */
+		qn = tn - k;
+		if(qn <= 0) qn = 0;
+		else memmove(q, t+k, qn*Dbytes);
+
+		/* r += lo(t) */
+		if(tn > k)
+			tn = k;
+		mpvecadd(r->p, k, t, tn, r->p);
+
+		/* if(r >= m) r -= m */
+		mpvecsub(r->p, k+1, f->m->p, k, t);
+		d = t[k];
+		for(tn = 0; tn < k; tn++)
+			r->p[tn] = (r->p[tn] & d) | (t[tn] & ~d);
+	} while(qn > 0);
+
+	if(f->s != 0)
+		mpright(r, f->s, r);
+	mpnorm(r);
+
+	return 0;
+}
+
+Mfield*
+cnfield(mpint *N)
+{
+	mpint *M, *C;
+	CNfield *f;
+	mpdigit d;
+	int s;
+
+	if(N->top <= 2 || N->top >= MAXDIG)
+		return nil;
+	f = nil;
+	d = N->p[N->top-1];
+	for(s = 0; (d & (mpdigit)1<<Dbits-1) == 0; s++)
+		d <<= 1;
+	C = mpnew(0);
+	M = mpcopy(N);
+	mpleft(N, s, M);
+	mpleft(mpone, M->top*Dbits, C);
+	mpsub(C, M, C);
+	if(C->top != 1)
+		goto out;
+	f = mallocz(sizeof(CNfield) + M->top*sizeof(mpdigit), 1);
+	if(f == nil)
+		goto out;
+	f->s = s;
+	f->c = C->p[0];
+	f->m->size = M->top;
+	f->m->p = (mpdigit*)&f[1];
+	mpassign(M, f->m);
+	mpassign(N, f);
+	f->reduce = cnreduce;
+	f->flags |= MPfield;
+out:
+	mpfree(M);
+	mpfree(C);
+
+	return f;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/gmfield.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,173 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/*
+ * fast reduction for generalized mersenne numbers (GM)
+ * using a series of additions and subtractions.
+ */
+
+enum {
+	MAXDIG = 1024/Dbits,
+};
+
+typedef struct GMfield GMfield;
+struct GMfield
+{
+	Mfield;	
+
+	mpint	m2[1];
+
+	int	nadd;
+	int	nsub;
+	int	indx[256];
+};
+
+static int
+gmreduce(Mfield *m, mpint *a, mpint *r)
+{
+	GMfield *g = (GMfield*)m;
+	mpdigit d0, t[MAXDIG];
+	int i, j, d, *x;
+
+	if(mpmagcmp(a, g->m2) >= 0)
+		return -1;
+
+	if(a != r)
+		mpassign(a, r);
+
+	d = g->top;
+	mpbits(r, (d+1)*Dbits*2);
+	memmove(t+d, r->p+d, d*Dbytes);
+
+	r->sign = 1;
+	r->top = d;
+	r->p[d] = 0;
+
+	if(g->nsub > 0)
+		mpvecdigmuladd(g->p, d, g->nsub, r->p);
+
+	x = g->indx;
+	for(i=0; i<g->nadd; i++){
+		t[0] = 0;
+		d0 = t[*x++];
+		for(j=1; j<d; j++)
+			t[j] = t[*x++];
+		t[0] = d0;
+
+		mpvecadd(r->p, d+1, t, d, r->p);
+	}
+
+	for(i=0; i<g->nsub; i++){
+		t[0] = 0;
+		d0 = t[*x++];
+		for(j=1; j<d; j++)
+			t[j] = t[*x++];
+		t[0] = d0;
+
+		mpvecsub(r->p, d+1, t, d, r->p);
+	}
+
+	mpvecdigmulsub(g->p, d, r->p[d], r->p);
+	r->p[d] = 0;
+
+	mpvecsub(r->p, d+1, g->p, d, r->p+d+1);
+	d0 = r->p[2*d+1];
+	for(j=0; j<d; j++)
+		r->p[j] = (r->p[j] & d0) | (r->p[j+d+1] & ~d0);
+
+	mpnorm(r);
+
+	return 0;
+}
+
+Mfield*
+gmfield(mpint *N)
+{
+	int i,j,d, s, *C, *X, *x, *e;
+	mpint *M, *T;
+	GMfield *g;
+
+	d = N->top;
+	if(d <= 2 || d > MAXDIG/2 || (mpsignif(N) % Dbits) != 0)
+		return nil;
+	g = nil;
+	T = mpnew(0);
+	M = mpcopy(N);
+	C = malloc(sizeof(int)*(d+1));
+	X = malloc(sizeof(int)*(d*d));
+	if(C == nil || X == nil)
+		goto out;
+
+	for(i=0; i<=d; i++){
+		if((M->p[i]>>8) != 0 && (~M->p[i]>>8) != 0)
+			goto out;
+		j = M->p[i];
+		C[d - i] = -j;
+		itomp(j, T);
+		mpleft(T, i*Dbits, T);
+		mpsub(M, T, M);
+	}
+	for(j=0; j<d; j++)
+		X[j] = C[d-j];
+	for(i=1; i<d; i++){
+		X[d*i] = X[d*(i-1) + d-1]*C[d];
+		for(j=1; j<d; j++)
+			X[d*i + j] = X[d*(i-1) + j-1] + X[d*(i-1) + d-1]*C[d-j];
+	}
+	g = mallocz(sizeof(GMfield) + (d+1)*sizeof(mpdigit)*2, 1);
+	if(g == nil)
+		goto out;
+
+	g->m2->p = (mpdigit*)&g[1];
+	g->m2->size = d*2+1;
+	mpmul(N, N, g->m2);
+	mpassign(N, g);
+	g->reduce = gmreduce;
+	g->flags |= MPfield;
+
+	s = 0;
+	x = g->indx;
+	e = x + nelem(g->indx) - d;
+	for(g->nadd=0; x <= e; x += d, g->nadd++){
+		s = 0;
+		for(i=0; i<d; i++){
+			for(j=0; j<d; j++){
+				if(X[d*i+j] > 0 && x[j] == 0){
+					X[d*i+j]--;
+					x[j] = d+i;
+					s = 1;
+					break;
+				}
+			}
+		}
+		if(s == 0)
+			break;
+	}
+	for(g->nsub=0; x <= e; x += d, g->nsub++){
+		s = 0;
+		for(i=0; i<d; i++){
+			for(j=0; j<d; j++){
+				if(X[d*i+j] < 0 && x[j] == 0){
+					X[d*i+j]++;
+					x[j] = d+i;
+					s = 1;
+					break;
+				}
+			}
+		}
+		if(s == 0)
+			break;
+	}
+	if(s != 0){
+		mpfree(g);
+		g = nil;
+	}
+out:
+	free(C);
+	free(X);
+	mpfree(M);
+	mpfree(T);
+	return g;
+}
+
--- sys/src/libmp/port/mkfile	Thu May 27 15:20:04 2004
+++ /sys/src/libmp/port/mkfile	Sat Jun 26 21:21:40 2021
@@ -3,9 +3,13 @@
 LIB=/$objtype/lib/libmp.a
 FILES=\
 	mpaux\
+	cnfield\
+	gmfield\
+	mpfield\
 	mpfmt\
 	strtomp\
 	mptobe\
+	mptober\
 	mptole\
 	betomp\
 	letomp\
@@ -14,6 +18,8 @@
 	mpcmp\
 	mpfactorial\
 	mpmul\
+	mpnrand\
+	mprand\
 	mpleft\
 	mpright\
 	mpvecadd\
@@ -24,6 +30,7 @@
 	mpdiv\
 	mpexp\
 	mpmod\
+	mpmodop\
 	mpextendedgcd\
 	mpinvert\
 	mprand\
--- sys/src/libmp/port/mpaux.c	Mon Sep 19 12:19:11 2005
+++ /sys/src/libmp/port/mpaux.c	Sat Jun 26 21:21:40 2021
@@ -107,17 +107,24 @@
 	free(b);
 }
 
-void
+mpint*
 mpnorm(mpint *b)
 {
 	int i;
 
+	if(b->flags & MPtimesafe){
+		assert(b->sign == 1);
+		b->flags &= ~MPnorm;
+		return b;
+	}
 	for(i = b->top-1; i >= 0; i--)
 		if(b->p[i] != 0)
 			break;
 	b->top = i+1;
 	if(b->top == 0)
 		b->sign = 1;
+	b->flags |= MPnorm;
+	return b;
 }
 
 mpint*
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/mpfield.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,21 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+mpint*
+mpfield(mpint *N)
+{
+	Mfield *f;
+
+	if(N == nil || N->flags & (MPfield|MPstatic))
+		return N;
+	if((f = cnfield(N)) != nil)
+		goto Exchange;
+	if((f = gmfield(N)) != nil)
+		goto Exchange;
+	return N;
+Exchange:
+	setmalloctag(f, getcallerpc(&N));
+	mpfree(N);
+	return f;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/mpmodop.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,95 @@
+#include "os.h"
+#include <mp.h>
+
+/* operands need to have m->top+1 digits of space and satisfy 0 ≤ a ≤ m-1 */
+static mpint*
+modarg(mpint *a, mpint *m)
+{
+	if(a->size <= m->top || a->sign < 0 || mpmagcmp(a, m) >= 0){
+		a = mpcopy(a);
+		mpmod(a, m, a);
+		mpbits(a, Dbits*(m->top+1));
+		a->top = m->top;
+	} else if(a->top < m->top){
+		memset(&a->p[a->top], 0, (m->top - a->top)*Dbytes);
+	}
+	return a;
+}
+
+void
+mpmodadd(mpint *b1, mpint *b2, mpint *m, mpint *sum)
+{
+	mpint *a, *b;
+	mpdigit d;
+	int i, j;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	sum->flags |= (a->flags | b->flags) & MPtimesafe;
+	mpbits(sum, Dbits*2*(m->top+1));
+
+	mpvecadd(a->p, m->top, b->p, m->top, sum->p);
+	mpvecsub(sum->p, m->top+1, m->p, m->top, sum->p+m->top+1);
+
+	d = sum->p[2*m->top+1];
+	for(i = 0, j = m->top+1; i < m->top; i++, j++)
+		sum->p[i] = (sum->p[i] & d) | (sum->p[j] & ~d);
+
+	sum->top = m->top;
+	sum->sign = 1;
+	mpnorm(sum);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
+
+void
+mpmodsub(mpint *b1, mpint *b2, mpint *m, mpint *diff)
+{
+	mpint *a, *b;
+	mpdigit d;
+	int i, j;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	diff->flags |= (a->flags | b->flags) & MPtimesafe;
+	mpbits(diff, Dbits*2*(m->top+1));
+
+	a->p[m->top] = 0;
+	mpvecsub(a->p, m->top+1, b->p, m->top, diff->p);
+	mpvecadd(diff->p, m->top, m->p, m->top, diff->p+m->top+1);
+
+	d = ~diff->p[m->top];
+	for(i = 0, j = m->top+1; i < m->top; i++, j++)
+		diff->p[i] = (diff->p[i] & d) | (diff->p[j] & ~d);
+
+	diff->top = m->top;
+	diff->sign = 1;
+	mpnorm(diff);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
+
+void
+mpmodmul(mpint *b1, mpint *b2, mpint *m, mpint *prod)
+{
+	mpint *a, *b;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	mpmul(a, b, prod);
+	mpmod(prod, m, prod);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/mpnrand.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,23 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/* return uniform random [0..n-1] */
+mpint*
+mpnrand(mpint *n, void (*gen)(uchar*, int), mpint *b)
+{
+	int bits;
+
+	bits = mpsignif(n);
+	if(bits == 0)
+		abort();
+	if(b == nil){
+		b = mpnew(bits);
+		setmalloctag(b, getcallerpc(&n));
+	}
+	do {
+		mprand(bits, gen, b);
+	} while(mpmagcmp(b, n) >= 0);
+
+	return b;
+}
--- sys/src/libmp/port/mprand.c	Sun Apr 23 22:44:05 2000
+++ /sys/src/libmp/port/mprand.c	Sat Jun 26 21:21:40 2021
@@ -1,42 +1,25 @@
 #include "os.h"
 #include <mp.h>
-#include <libsec.h>
 #include "dat.h"
 
 mpint*
 mprand(int bits, void (*gen)(uchar*, int), mpint *b)
 {
-	int n, m;
 	mpdigit mask;
-	uchar *p;
 
-	n = DIGITS(bits);
-	if(b == nil)
+	if(b == nil){
 		b = mpnew(bits);
-	else
+		setmalloctag(b, getcallerpc(&bits));
+	}else
 		mpbits(b, bits);
 
-	p = malloc(n*Dbytes);
-	if(p == nil)
-		return nil;
-	(*gen)(p, n*Dbytes);
-	betomp(p, n*Dbytes, b);
-	free(p);
+	b->sign = 1;
+	b->top = DIGITS(bits);
+	(*gen)((uchar*)b->p, b->top*Dbytes);
 
-	// make sure we don't give too many bits
-	m = bits%Dbits;
-	n--;
-	if(m > 0){
-		mask = 1;
-		mask <<= m;
-		mask--;
-		b->p[n] &= mask;
-	}
+	mask = ((mpdigit)1 << (bits%Dbits))-1;
+	if(mask != 0)
+		b->p[b->top-1] &= mask;
 
-	for(; n >= 0; n--)
-		if(b->p[n] != 0)
-			break;
-	b->top = n+1;
-	b->sign = 1;
-	return b;
+	return mpnorm(b);
 }
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libmp/port/mptober.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,34 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+void
+mptober(mpint *b, uchar *p, int n)
+{
+	int i, j, m;
+	mpdigit x;
+
+	memset(p, 0, n);
+
+	p += n;
+	m = b->top*Dbytes;
+	if(m < n)
+		n = m;
+
+	i = 0;
+	while(n >= Dbytes){
+		n -= Dbytes;
+		x = b->p[i++];
+		for(j = 0; j < Dbytes; j++){
+			*--p = x;
+			x >>= 8;
+		}
+	}
+	if(n > 0){
+		x = b->p[i];
+		for(j = 0; j < n; j++){
+			*--p = x;
+			x >>= 8;
+		}
+	}
+}
--- sys/src/libsec/port/aes.c	Thu Jul  1 17:29:01 2021
+++ /sys/src/libsec/port/aes.c	Mon Jun 28 16:58:00 2021
@@ -36,446 +36,12 @@
 typedef uchar	u8;
 typedef ulong	u32;
 
-#define FULL_UNROLL
-#define const
-
-static const u32 Td0[256];
-static const u32 Td1[256];
-static const u32 Td2[256];
-static const u32 Td3[256];
-static const u8  Te4[256];
-static uchar basekey[3][16] = {
-	{
-	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-	},
-	{
-	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-	},
-	{
-	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-	},
-};
-
-static int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
-		int keyBits);
-static int aes_setupDec(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
-		int keyBits);
-static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
-		const uchar cipherKey[], int keyBits);
-
-void	aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
-void	aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
-
-void
-setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
-{
-	memset(s, 0, sizeof(*s));
-	if(keybytes > AESmaxkey)
-		keybytes = AESmaxkey;
-	memmove(s->key, key, keybytes);
-	s->keybytes = keybytes;
-	s->ctrsz = 4;	/* default counter size from rfc3686 */
-	s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
-	if(ivec != nil)
-		memmove(s->ivec, ivec, AESbsize);
-	if(keybytes==16 || keybytes==24 || keybytes==32)
-		s->setup = 0xcafebabe;
-	/* else aes_setup was invalid */
-}
-
-/*
- * AES-XCBC-MAC-96 message authentication, per rfc3566.
- */
-
-void
-setupAESXCBCstate(AESstate *s)		/* was setupmac96 */
-{
-	int i, j;
-	uint q[16 / sizeof(uint)];
-	uchar *p;
-
-	assert(s->keybytes == 16);
-	for(i = 0; i < 3; i++)
-		aes_encrypt(s->ekey, s->rounds, basekey[i],
-			s->mackey + AESbsize*i);
-
-	p = s->mackey;
-	memset(q, 0, AESbsize);
-
-	/*
-	 * put the in the right endian.  once figured, probably better
-	 * to use some fcall macros.
-	 * keys for encryption in local endianness for the algorithm...
-	 * only key1 is used for encryption;
-	 * BUG!!: I think this is what I got wrong.
-	 */
-	for(i = 0; i < 16 / sizeof(uint); i ++){
-		for(j = 0; j < sizeof(uint); j++)
-			q[i] |= p[sizeof(uint)-j-1] << 8*j;
-		p += sizeof(uint);
-	}
-	memmove(s->mackey, q, 16);
-}
-
-/*
- * Not dealing with > 128-bit keys, not dealing with strange corner cases like
- * empty message.  Should be fine for AES-XCBC-MAC-96.
- */
-uchar*
-aesXCBCmac(uchar *p, int len, AESstate *s)
-{
-	uchar *p2, *ip, *eip, *mackey;
-	uchar q[AESbsize];
-
-	assert(s->keybytes == 16);	/* more complicated for bigger */
-	memset(s->ivec, 0, AESbsize);	/* E[0] is 0+ */
-
-	for(; len > AESbsize; len -= AESbsize){
-		memmove(q, p, AESbsize);
-		p2 = q;
-		ip = s->ivec;
-		for(eip = ip + AESbsize; ip < eip; )
-			*p2++ ^= *ip++;
-		aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
-		p += AESbsize;
-	}
-	/* the last one */
-
-	memmove(q, p, len);
-	p2 = q+len;
-	if(len == AESbsize)
-		mackey = s->mackey + AESbsize;	/* k2 */
-	else{
-		mackey = s->mackey+2*AESbsize;	/* k3 */
-		*p2++ = 1 << 7;			/* padding */
-		len = AESbsize - len - 1;
-		memset(p2, 0, len);
-	}
-
-	ip = s->ivec;
-	p2 = q;
-	for(eip = ip + AESbsize; ip < eip; )
-		*p2++ ^= *ip++ ^ *mackey++;
-	aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
-	return s->ivec;			/* only the 12 bytes leftmost */
-}
-
-/*
- * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
- * Because of the way that non-multiple-of-16 buffers are handled,
- * the decryptor must be fed buffers of the same size as the encryptor.
- */
-void
-aesCBCencrypt(uchar *p, int len, AESstate *s)
-{
-	uchar *p2, *ip, *eip;
-	uchar q[AESbsize];
-
-	for(; len >= AESbsize; len -= AESbsize){
-		p2 = p;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; )
-			*p2++ ^= *ip++;
-		aes_encrypt(s->ekey, s->rounds, p, q);
-		memmove(s->ivec, q, AESbsize);
-		memmove(p, q, AESbsize);
-		p += AESbsize;
-	}
-
-	if(len > 0){
-		ip = s->ivec;
-		aes_encrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
-		for(eip = ip+len; ip < eip; )
-			*p++ ^= *ip++;
-	}
-}
-
-void
-aesCBCdecrypt(uchar *p, int len, AESstate *s)
-{
-	uchar *ip, *eip, *tp;
-	uchar tmp[AESbsize], q[AESbsize];
-
-	for(; len >= AESbsize; len -= AESbsize){
-		memmove(tmp, p, AESbsize);
-		aes_decrypt(s->dkey, s->rounds, p, q);
-		memmove(p, q, AESbsize);
-		tp = tmp;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; ){
-			*p++ ^= *ip;
-			*ip++ = *tp++;
-		}
-	}
-
-	if(len > 0){
-		ip = s->ivec;
-		aes_encrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
-		for(eip = ip+len; ip < eip; )
-			*p++ ^= *ip++;
-	}
-}
-
-/*
- * AES-CTR mode, per rfc3686.
- * CTRs could be precalculated for efficiency
- * and there would also be less back and forth mp
- */
-
-static void
-incrementCTR(uchar *p, uint ctrsz)
-{
-	int len;
-	ulong c;
-	uchar *ctr;
-	mpint *mpctr, *mpctrsz;
-
-	ctr = p + AESbsize - ctrsz;
-	if(ctrsz == 4){
-		/*
-		 * If counter is 32 bits (as in rfc3686 and ssh2) there's
-		 * no need to use extended precision.
-		 */
-		c = 1 + (ctr[0]<<24 | ctr[1]<<16 | ctr[2]<<8 | ctr[3]);
-		ctr[0] = c>>24; ctr[1] = c>>16; ctr[2] = c>>8; ctr[3] = c;
-		return;
-	}
-	mpctr = betomp(ctr, ctrsz, nil);
-	mpctrsz = mpnew(ctrsz*8 + 1);
-	mpleft(mpone, ctrsz*8, mpctrsz);
-	mpadd(mpctr, mpone, mpctr);
-	mpmod(mpctr, mpctrsz, mpctr);
-	len = mptobe(mpctr, ctr, ctrsz, nil);
-	assert(len == ctrsz);
-	mpfree(mpctrsz);
-	mpfree(mpctr);
-}
-
-void
-aesCTRencrypt(uchar *p, int len, AESstate *s)
-{
-	uchar q[AESbsize];
-	uchar *ip, *eip, *ctr;
-
-	ctr = s->ivec;
-	for(; len >= AESbsize; len -= AESbsize){
-		ip = q;
-		aes_encrypt(s->ekey, s->rounds, ctr, q);
-		for(eip = p + AESbsize; p < eip; )
-			*p++ ^= *ip++;
-		incrementCTR(ctr, s->ctrsz);
-	}
-
-	if(len > 0){
-		ip = q;
-		aes_encrypt(s->ekey, s->rounds, ctr, q);
-		for(eip = p + len; p < eip; )
-			*p++ ^= *ip++;
-		incrementCTR(ctr, s->ctrsz);
-	}
-}
-
-void
-aesCTRdecrypt(uchar *p, int len, AESstate *s)
-{
-	aesCTRencrypt(p, len, s);
-}
-
-
-/* taken from sha1; TODO: verify suitability (esp. byte order) for aes */
-/*
- *	encodes input (ulong) into output (uchar). Assumes len is
- *	a multiple of 4.
- */
-static void
-encode(uchar *output, ulong *input, ulong len)
-{
-	ulong x;
-	uchar *e;
-
-	for(e = output + len; output < e;) {
-		x = *input++;
-		*output++ = x >> 24;
-		*output++ = x >> 16;
-		*output++ = x >> 8;
-		*output++ = x;
-	}
-}
-
-/* TODO: verify use of aes_encrypt here */
-AEShstate*
-aes(uchar *p, ulong len, uchar *digest, AEShstate *s)
-{
-	uchar buf[128];
-	ulong x[16];
-	int i;
-	uchar *e;
-
-	if(s == nil){
-		s = malloc(sizeof(*s));
-		if(s == nil)
-			return nil;
-		memset(s, 0, sizeof(*s));
-		s->malloced = 1;
-	}
-
-	if(s->seeded == 0){
-		/* seed the state, these constants would look nicer big-endian */
-		s->state[0] = 0x67452301;
-		s->state[1] = 0xefcdab89;
-		s->state[2] = 0x98badcfe;
-		s->state[3] = 0x10325476;
-		/* in sha1 (20-byte digest), but not md5 (16 bytes)*/
-		s->state[4] = 0xc3d2e1f0;
-		s->seeded = 1;
-	}
-
-	/* fill out the partial 64 byte block from previous calls */
-	if(s->blen){
-		i = 64 - s->blen;
-		if(len < i)
-			i = len;
-		memmove(s->buf + s->blen, p, i);
-		len -= i;
-		s->blen += i;
-		p += i;
-		if(s->blen == 64){
-			/* encrypt s->buf into s->state */
-			// _sha1block(s->buf, s->blen, s->state);
-			aes_encrypt((ulong *)s->buf, 1, s->buf, (uchar *)s->state);
-			s->len += s->blen;
-			s->blen = 0;
-		}
-	}
-
-	/* do 64 byte blocks */
-	i = len & ~0x3f;
-	if(i){
-		/* encrypt p into s->state */
-		// _sha1block(p, i, s->state);
-		aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
-		s->len += i;
-		len -= i;
-		p += i;
-	}
-
-	/* save the left overs if not last call */
-	if(digest == 0){
-		if(len){
-			memmove(s->buf, p, len);
-			s->blen += len;
-		}
-		return s;
-	}
-
-	/*
-	 *  this is the last time through, pad what's left with 0x80,
-	 *  0's, and the input count to create a multiple of 64 bytes
-	 */
-	if(s->blen){
-		p = s->buf;
-		len = s->blen;
-	} else {
-		memmove(buf, p, len);
-		p = buf;
-	}
-	s->len += len;
-	e = p + len;
-	if(len < 56)
-		i = 56 - len;
-	else
-		i = 120 - len;
-	memset(e, 0, i);
-	*e = 0x80;
-	len += i;
-
-	/* append the count */
-	x[0] = s->len>>29;		/* byte-order dependent */
-	x[1] = s->len<<3;
-	encode(p+len, x, 8);
-
-	/* digest the last part */
-	/* encrypt p into s->state */
-	// _sha1block(p, len+8, s->state);
-	aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
-	s->len += len+8;		/* sha1: +8 */
-
-	/* return result and free state */
-	encode((uchar *)digest, (ulong *)s->state, AESdlen);
-	if(s->malloced == 1)
-		free(s);
-	return nil;
-}
-
-DigestState*
-hmac_aes(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest,
-	DigestState *s)
-{
-	return hmac_x(p, len, key, klen, digest, s, aes, AESdlen);
-}
-
-
-
-/*
- * this function has been changed for plan 9.
- * Expand the cipher key into the encryption and decryption key schedules.
- *
- * @return	the number of rounds for the given cipher key size.
- */
-static int
-aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
-	const uchar cipherKey[], int keyBits)
-{
-	int Nr, i;
-
-	/* expand the cipher key: */
-	Nr = aes_setupEnc(erk, cipherKey, keyBits);
-
-	/*
-	 * invert the order of the round keys and apply the inverse MixColumn
-	 * transform to all round keys but the first and the last
-	 */
-	drk[0       ] = erk[4*Nr    ];
-	drk[1       ] = erk[4*Nr + 1];
-	drk[2       ] = erk[4*Nr + 2];
-	drk[3       ] = erk[4*Nr + 3];
-	drk[4*Nr    ] = erk[0       ];
-	drk[4*Nr + 1] = erk[1       ];
-	drk[4*Nr + 2] = erk[2       ];
-	drk[4*Nr + 3] = erk[3       ];
-	erk += 4 * Nr;
-	for (i = 1; i < Nr; i++) {
-		drk += 4;
-		erk -= 4;
-		drk[0] =
-		    Td0[Te4[(erk[0] >> 24)       ]] ^
-		    Td1[Te4[(erk[0] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[0] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[0]      ) & 0xff]];
-		drk[1] =
-		    Td0[Te4[(erk[1] >> 24)       ]] ^
-		    Td1[Te4[(erk[1] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[1] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[1]      ) & 0xff]];
-		drk[2] =
-		    Td0[Te4[(erk[2] >> 24)       ]] ^
-		    Td1[Te4[(erk[2] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[2] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[2]      ) & 0xff]];
-		drk[3] =
-		    Td0[Te4[(erk[3] >> 24)       ]] ^
-		    Td1[Te4[(erk[3] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[3] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[3]      ) & 0xff]];
-	}
-	return Nr;
-}
+#define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
+		    ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
+			 (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
 
+#define FULL_UNROLL
 
 /*
 Te0[x] = S [x].[02, 01, 01, 03];
@@ -491,696 +57,691 @@
 Td4[x] = Si[x]
 */
 
-static const u32 Te0[256] = {
-    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
-    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
-    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
-    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
-    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
-    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
-    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
-    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
-    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
-    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
-    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
-    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
-    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
-    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
-    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
-    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
-    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
-    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
-    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
-    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
-    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
-    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
-    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
-    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
-    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
-    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
-    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
-    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
-    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
-    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
-    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
-    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
-    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
-    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
-    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
-    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
-    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
-    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
-    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
-    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
-    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
-    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
-    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
-    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
-    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
-    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
-    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
-    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
-    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
-    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
-    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
-    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
-    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
-    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
-    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
-    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
-    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
-    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
-    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
-    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
-    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
-    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
-    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
-    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+static u32 Te0[256] = {
+	 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+	 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+	 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+	 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+	 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+	 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+	 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+	 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+	 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+	 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+	 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+	 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+	 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+	 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+	 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+	 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+	 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+	 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+	 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+	 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+	 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+	 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+	 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+	 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+	 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+	 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+	 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+	 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+	 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+	 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+	 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+	 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+	 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+	 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+	 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+	 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+	 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+	 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+	 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+	 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+	 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+	 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+	 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+	 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+	 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+	 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+	 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+	 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+	 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+	 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+	 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+	 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+	 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+	 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+	 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+	 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+	 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+	 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+	 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+	 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+	 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+	 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+	 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+	 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
 };
-static const u32 Te1[256] = {
-    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
-    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
-    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
-    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
-    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
-    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
-    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
-    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
-    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
-    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
-    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
-    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
-    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
-    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
-    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
-    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
-    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
-    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
-    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
-    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
-    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
-    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
-    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
-    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
-    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
-    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
-    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
-    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
-    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
-    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
-    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
-    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
-    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
-    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
-    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
-    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
-    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
-    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
-    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
-    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
-    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
-    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
-    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
-    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
-    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
-    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
-    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
-    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
-    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
-    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
-    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
-    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
-    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
-    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
-    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
-    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
-    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
-    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
-    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
-    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
-    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
-    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
-    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
-    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+static u32 Te1[256] = {
+	 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+	 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+	 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+	 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+	 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+	 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+	 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+	 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+	 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+	 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+	 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+	 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+	 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+	 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+	 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+	 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+	 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+	 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+	 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+	 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+	 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+	 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+	 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+	 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+	 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+	 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+	 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+	 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+	 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+	 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+	 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+	 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+	 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+	 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+	 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+	 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+	 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+	 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+	 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+	 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+	 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+	 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+	 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+	 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+	 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+	 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+	 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+	 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+	 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+	 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+	 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+	 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+	 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+	 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+	 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+	 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+	 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+	 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+	 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+	 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+	 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+	 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+	 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+	 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
 };
-static const u32 Te2[256] = {
-    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
-    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
-    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
-    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
-    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
-    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
-    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
-    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
-    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
-    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
-    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
-    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
-    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
-    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
-    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
-    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
-    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
-    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
-    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
-    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
-    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
-    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
-    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
-    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
-    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
-    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
-    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
-    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
-    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
-    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
-    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
-    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
-    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
-    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
-    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
-    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
-    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
-    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
-    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
-    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
-    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
-    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
-    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
-    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
-    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
-    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
-    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
-    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
-    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
-    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
-    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
-    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
-    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
-    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
-    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
-    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
-    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
-    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
-    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
-    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
-    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
-    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
-    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
-    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+static u32 Te2[256] = {
+	 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+	 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+	 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+	 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+	 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+	 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+	 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+	 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+	 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+	 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+	 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+	 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+	 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+	 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+	 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+	 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+	 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+	 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+	 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+	 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+	 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+	 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+	 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+	 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+	 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+	 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+	 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+	 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+	 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+	 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+	 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+	 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+	 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+	 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+	 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+	 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+	 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+	 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+	 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+	 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+	 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+	 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+	 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+	 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+	 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+	 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+	 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+	 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+	 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+	 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+	 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+	 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+	 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+	 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+	 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+	 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+	 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+	 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+	 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+	 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+	 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+	 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+	 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+	 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
 };
-static const u32 Te3[256] = {
+static u32 Te3[256] = {
 
-    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
-    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
-    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
-    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
-    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
-    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
-    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
-    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
-    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
-    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
-    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
-    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
-    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
-    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
-    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
-    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
-    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
-    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
-    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
-    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
-    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
-    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
-    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
-    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
-    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
-    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
-    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
-    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
-    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
-    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
-    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
-    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
-    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
-    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
-    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
-    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
-    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
-    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
-    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
-    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
-    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
-    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
-    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
-    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
-    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
-    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
-    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
-    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
-    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
-    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
-    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
-    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
-    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
-    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
-    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
-    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
-    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
-    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
-    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
-    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
-    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
-    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
-    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
-    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+	 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+	 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+	 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+	 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+	 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+	 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+	 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+	 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+	 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+	 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+	 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+	 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+	 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+	 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+	 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+	 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+	 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+	 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+	 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+	 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+	 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+	 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+	 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+	 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+	 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+	 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+	 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+	 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+	 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+	 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+	 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+	 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+	 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+	 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+	 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+	 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+	 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+	 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+	 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+	 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+	 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+	 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+	 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+	 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+	 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+	 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+	 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+	 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+	 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+	 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+	 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+	 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+	 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+	 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+	 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+	 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+	 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+	 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+	 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+	 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+	 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+	 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+	 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+	 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
 };
-static const u8 Te4[256] = {
-    0x63U, 0x7cU, 0x77U, 0x7bU,
-    0xf2U, 0x6bU, 0x6fU, 0xc5U,
-    0x30U, 0x01U, 0x67U, 0x2bU,
-    0xfeU, 0xd7U, 0xabU, 0x76U,
-    0xcaU, 0x82U, 0xc9U, 0x7dU,
-    0xfaU, 0x59U, 0x47U, 0xf0U,
-    0xadU, 0xd4U, 0xa2U, 0xafU,
-    0x9cU, 0xa4U, 0x72U, 0xc0U,
-    0xb7U, 0xfdU, 0x93U, 0x26U,
-    0x36U, 0x3fU, 0xf7U, 0xccU,
-    0x34U, 0xa5U, 0xe5U, 0xf1U,
-    0x71U, 0xd8U, 0x31U, 0x15U,
-    0x04U, 0xc7U, 0x23U, 0xc3U,
-    0x18U, 0x96U, 0x05U, 0x9aU,
-    0x07U, 0x12U, 0x80U, 0xe2U,
-    0xebU, 0x27U, 0xb2U, 0x75U,
-    0x09U, 0x83U, 0x2cU, 0x1aU,
-    0x1bU, 0x6eU, 0x5aU, 0xa0U,
-    0x52U, 0x3bU, 0xd6U, 0xb3U,
-    0x29U, 0xe3U, 0x2fU, 0x84U,
-    0x53U, 0xd1U, 0x00U, 0xedU,
-    0x20U, 0xfcU, 0xb1U, 0x5bU,
-    0x6aU, 0xcbU, 0xbeU, 0x39U,
-    0x4aU, 0x4cU, 0x58U, 0xcfU,
-    0xd0U, 0xefU, 0xaaU, 0xfbU,
-    0x43U, 0x4dU, 0x33U, 0x85U,
-    0x45U, 0xf9U, 0x02U, 0x7fU,
-    0x50U, 0x3cU, 0x9fU, 0xa8U,
-    0x51U, 0xa3U, 0x40U, 0x8fU,
-    0x92U, 0x9dU, 0x38U, 0xf5U,
-    0xbcU, 0xb6U, 0xdaU, 0x21U,
-    0x10U, 0xffU, 0xf3U, 0xd2U,
-    0xcdU, 0x0cU, 0x13U, 0xecU,
-    0x5fU, 0x97U, 0x44U, 0x17U,
-    0xc4U, 0xa7U, 0x7eU, 0x3dU,
-    0x64U, 0x5dU, 0x19U, 0x73U,
-    0x60U, 0x81U, 0x4fU, 0xdcU,
-    0x22U, 0x2aU, 0x90U, 0x88U,
-    0x46U, 0xeeU, 0xb8U, 0x14U,
-    0xdeU, 0x5eU, 0x0bU, 0xdbU,
-    0xe0U, 0x32U, 0x3aU, 0x0aU,
-    0x49U, 0x06U, 0x24U, 0x5cU,
-    0xc2U, 0xd3U, 0xacU, 0x62U,
-    0x91U, 0x95U, 0xe4U, 0x79U,
-    0xe7U, 0xc8U, 0x37U, 0x6dU,
-    0x8dU, 0xd5U, 0x4eU, 0xa9U,
-    0x6cU, 0x56U, 0xf4U, 0xeaU,
-    0x65U, 0x7aU, 0xaeU, 0x08U,
-    0xbaU, 0x78U, 0x25U, 0x2eU,
-    0x1cU, 0xa6U, 0xb4U, 0xc6U,
-    0xe8U, 0xddU, 0x74U, 0x1fU,
-    0x4bU, 0xbdU, 0x8bU, 0x8aU,
-    0x70U, 0x3eU, 0xb5U, 0x66U,
-    0x48U, 0x03U, 0xf6U, 0x0eU,
-    0x61U, 0x35U, 0x57U, 0xb9U,
-    0x86U, 0xc1U, 0x1dU, 0x9eU,
-    0xe1U, 0xf8U, 0x98U, 0x11U,
-    0x69U, 0xd9U, 0x8eU, 0x94U,
-    0x9bU, 0x1eU, 0x87U, 0xe9U,
-    0xceU, 0x55U, 0x28U, 0xdfU,
-    0x8cU, 0xa1U, 0x89U, 0x0dU,
-    0xbfU, 0xe6U, 0x42U, 0x68U,
-    0x41U, 0x99U, 0x2dU, 0x0fU,
-    0xb0U, 0x54U, 0xbbU, 0x16U,
+static u8 Te4[256] = {
+	 0x63U, 0x7cU, 0x77U, 0x7bU,
+	 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+	 0x30U, 0x01U, 0x67U, 0x2bU,
+	 0xfeU, 0xd7U, 0xabU, 0x76U,
+	 0xcaU, 0x82U, 0xc9U, 0x7dU,
+	 0xfaU, 0x59U, 0x47U, 0xf0U,
+	 0xadU, 0xd4U, 0xa2U, 0xafU,
+	 0x9cU, 0xa4U, 0x72U, 0xc0U,
+	 0xb7U, 0xfdU, 0x93U, 0x26U,
+	 0x36U, 0x3fU, 0xf7U, 0xccU,
+	 0x34U, 0xa5U, 0xe5U, 0xf1U,
+	 0x71U, 0xd8U, 0x31U, 0x15U,
+	 0x04U, 0xc7U, 0x23U, 0xc3U,
+	 0x18U, 0x96U, 0x05U, 0x9aU,
+	 0x07U, 0x12U, 0x80U, 0xe2U,
+	 0xebU, 0x27U, 0xb2U, 0x75U,
+	 0x09U, 0x83U, 0x2cU, 0x1aU,
+	 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+	 0x52U, 0x3bU, 0xd6U, 0xb3U,
+	 0x29U, 0xe3U, 0x2fU, 0x84U,
+	 0x53U, 0xd1U, 0x00U, 0xedU,
+	 0x20U, 0xfcU, 0xb1U, 0x5bU,
+	 0x6aU, 0xcbU, 0xbeU, 0x39U,
+	 0x4aU, 0x4cU, 0x58U, 0xcfU,
+	 0xd0U, 0xefU, 0xaaU, 0xfbU,
+	 0x43U, 0x4dU, 0x33U, 0x85U,
+	 0x45U, 0xf9U, 0x02U, 0x7fU,
+	 0x50U, 0x3cU, 0x9fU, 0xa8U,
+	 0x51U, 0xa3U, 0x40U, 0x8fU,
+	 0x92U, 0x9dU, 0x38U, 0xf5U,
+	 0xbcU, 0xb6U, 0xdaU, 0x21U,
+	 0x10U, 0xffU, 0xf3U, 0xd2U,
+	 0xcdU, 0x0cU, 0x13U, 0xecU,
+	 0x5fU, 0x97U, 0x44U, 0x17U,
+	 0xc4U, 0xa7U, 0x7eU, 0x3dU,
+	 0x64U, 0x5dU, 0x19U, 0x73U,
+	 0x60U, 0x81U, 0x4fU, 0xdcU,
+	 0x22U, 0x2aU, 0x90U, 0x88U,
+	 0x46U, 0xeeU, 0xb8U, 0x14U,
+	 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+	 0xe0U, 0x32U, 0x3aU, 0x0aU,
+	 0x49U, 0x06U, 0x24U, 0x5cU,
+	 0xc2U, 0xd3U, 0xacU, 0x62U,
+	 0x91U, 0x95U, 0xe4U, 0x79U,
+	 0xe7U, 0xc8U, 0x37U, 0x6dU,
+	 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+	 0x6cU, 0x56U, 0xf4U, 0xeaU,
+	 0x65U, 0x7aU, 0xaeU, 0x08U,
+	 0xbaU, 0x78U, 0x25U, 0x2eU,
+	 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+	 0xe8U, 0xddU, 0x74U, 0x1fU,
+	 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+	 0x70U, 0x3eU, 0xb5U, 0x66U,
+	 0x48U, 0x03U, 0xf6U, 0x0eU,
+	 0x61U, 0x35U, 0x57U, 0xb9U,
+	 0x86U, 0xc1U, 0x1dU, 0x9eU,
+	 0xe1U, 0xf8U, 0x98U, 0x11U,
+	 0x69U, 0xd9U, 0x8eU, 0x94U,
+	 0x9bU, 0x1eU, 0x87U, 0xe9U,
+	 0xceU, 0x55U, 0x28U, 0xdfU,
+	 0x8cU, 0xa1U, 0x89U, 0x0dU,
+	 0xbfU, 0xe6U, 0x42U, 0x68U,
+	 0x41U, 0x99U, 0x2dU, 0x0fU,
+	 0xb0U, 0x54U, 0xbbU, 0x16U,
 };
-static const u32 Td0[256] = {
-    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
-    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
-    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
-    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
-    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
-    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
-    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
-    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
-    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
-    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
-    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
-    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
-    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
-    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
-    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
-    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
-    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
-    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
-    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
-    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
-    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
-    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
-    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
-    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
-    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
-    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
-    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
-    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
-    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
-    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
-    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
-    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
-    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
-    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
-    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
-    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
-    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
-    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
-    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
-    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
-    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
-    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
-    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
-    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
-    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
-    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
-    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
-    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
-    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
-    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
-    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
-    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
-    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
-    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
-    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
-    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
-    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
-    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
-    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
-    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
-    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
-    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
-    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
-    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+static u32 Td0[256] = {
+	 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+	 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+	 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+	 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+	 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+	 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+	 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+	 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+	 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+	 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+	 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+	 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+	 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+	 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+	 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+	 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+	 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+	 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+	 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+	 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+	 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+	 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+	 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+	 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+	 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+	 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+	 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+	 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+	 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+	 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+	 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+	 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+	 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+	 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+	 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+	 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+	 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+	 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+	 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+	 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+	 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+	 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+	 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+	 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+	 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+	 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+	 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+	 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+	 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+	 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+	 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+	 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+	 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+	 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+	 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+	 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+	 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+	 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+	 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+	 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+	 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+	 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+	 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+	 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
 };
-static const u32 Td1[256] = {
-    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
-    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
-    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
-    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
-    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
-    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
-    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
-    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
-    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
-    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
-    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
-    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
-    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
-    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
-    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
-    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
-    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
-    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
-    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
-    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
-    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
-    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
-    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
-    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
-    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
-    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
-    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
-    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
-    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
-    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
-    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
-    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
-    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
-    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
-    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
-    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
-    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
-    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
-    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
-    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
-    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
-    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
-    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
-    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
-    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
-    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
-    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
-    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
-    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
-    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
-    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
-    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
-    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
-    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
-    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
-    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
-    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
-    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
-    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
-    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
-    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
-    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
-    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
-    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+static u32 Td1[256] = {
+	 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+	 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+	 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+	 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+	 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+	 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+	 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+	 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+	 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+	 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+	 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+	 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+	 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+	 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+	 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+	 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+	 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+	 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+	 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+	 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+	 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+	 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+	 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+	 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+	 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+	 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+	 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+	 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+	 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+	 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+	 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+	 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+	 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+	 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+	 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+	 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+	 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+	 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+	 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+	 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+	 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+	 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+	 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+	 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+	 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+	 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+	 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+	 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+	 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+	 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+	 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+	 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+	 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+	 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+	 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+	 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+	 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+	 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+	 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+	 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+	 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+	 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+	 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+	 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
 };
-static const u32 Td2[256] = {
-    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
-    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
-    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
-    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
-    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
-    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
-    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
-    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
-    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
-    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
-    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
-    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
-    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
-    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
-    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
-    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
-    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
-    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
-    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
-    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-
-    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
-    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
-    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
-    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
-    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
-    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
-    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
-    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
-    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
-    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
-    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
-    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
-    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
-    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
-    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
-    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
-    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
-    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
-    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
-    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
-    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
-    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
-    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
-    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
-    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
-    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
-    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
-    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
-    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
-    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
-    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
-    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
-    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
-    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
-    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
-    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
-    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
-    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
-    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
-    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
-    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
-    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
-    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
-    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+static u32 Td2[256] = {
+	 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+	 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+	 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+	 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+	 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+	 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+	 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+	 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+	 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+	 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+	 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+	 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+	 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+	 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+	 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+	 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+	 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+	 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+	 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+	 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+	 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+	 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+	 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+	 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+	 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+	 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+	 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+	 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+	 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+	 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+	 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+	 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+	 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+	 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+	 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+	 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+	 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+	 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+	 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+	 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+	 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+	 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+	 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+	 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+	 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+	 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+	 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+	 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+	 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+	 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+	 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+	 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+	 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+	 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+	 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+	 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+	 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+	 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+	 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+	 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+	 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+	 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+	 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+	 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
 };
-static const u32 Td3[256] = {
-    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
-    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
-    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
-    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
-    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
-    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
-    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
-    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
-    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
-    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
-    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
-    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
-    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
-    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
-    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
-    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
-    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
-    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
-    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
-    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
-    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
-    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
-    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
-    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
-    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
-    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
-    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
-    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
-    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
-    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
-    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
-    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
-    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
-    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
-    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
-    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
-    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
-    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
-    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
-    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
-    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
-    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
-    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
-    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
-    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
-    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
-    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
-    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
-    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
-    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
-    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
-    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
-    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
-    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
-    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
-    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
-    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
-    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
-    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
-    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
-    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
-    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
-    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
-    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+static u32 Td3[256] = {
+	 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+	 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+	 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+	 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+	 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+	 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+	 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+	 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+	 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+	 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+	 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+	 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+	 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+	 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+	 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+	 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+	 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+	 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+	 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+	 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+	 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+	 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+	 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+	 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+	 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+	 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+	 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+	 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+	 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+	 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+	 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+	 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+	 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+	 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+	 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+	 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+	 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+	 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+	 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+	 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+	 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+	 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+	 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+	 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+	 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+	 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+	 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+	 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+	 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+	 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+	 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+	 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+	 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+	 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+	 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+	 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+	 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+	 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+	 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+	 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+	 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+	 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+	 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+	 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
 };
-static const u8 Td4[256] = {
-    0x52U, 0x09U, 0x6aU, 0xd5U,
-    0x30U, 0x36U, 0xa5U, 0x38U,
-    0xbfU, 0x40U, 0xa3U, 0x9eU,
-    0x81U, 0xf3U, 0xd7U, 0xfbU,
-    0x7cU, 0xe3U, 0x39U, 0x82U,
-    0x9bU, 0x2fU, 0xffU, 0x87U,
-    0x34U, 0x8eU, 0x43U, 0x44U,
-    0xc4U, 0xdeU, 0xe9U, 0xcbU,
-    0x54U, 0x7bU, 0x94U, 0x32U,
-    0xa6U, 0xc2U, 0x23U, 0x3dU,
-    0xeeU, 0x4cU, 0x95U, 0x0bU,
-    0x42U, 0xfaU, 0xc3U, 0x4eU,
-    0x08U, 0x2eU, 0xa1U, 0x66U,
-    0x28U, 0xd9U, 0x24U, 0xb2U,
-    0x76U, 0x5bU, 0xa2U, 0x49U,
-    0x6dU, 0x8bU, 0xd1U, 0x25U,
-    0x72U, 0xf8U, 0xf6U, 0x64U,
-    0x86U, 0x68U, 0x98U, 0x16U,
-    0xd4U, 0xa4U, 0x5cU, 0xccU,
-    0x5dU, 0x65U, 0xb6U, 0x92U,
-    0x6cU, 0x70U, 0x48U, 0x50U,
-    0xfdU, 0xedU, 0xb9U, 0xdaU,
-    0x5eU, 0x15U, 0x46U, 0x57U,
-    0xa7U, 0x8dU, 0x9dU, 0x84U,
-    0x90U, 0xd8U, 0xabU, 0x00U,
-    0x8cU, 0xbcU, 0xd3U, 0x0aU,
-    0xf7U, 0xe4U, 0x58U, 0x05U,
-    0xb8U, 0xb3U, 0x45U, 0x06U,
-    0xd0U, 0x2cU, 0x1eU, 0x8fU,
-    0xcaU, 0x3fU, 0x0fU, 0x02U,
-    0xc1U, 0xafU, 0xbdU, 0x03U,
-    0x01U, 0x13U, 0x8aU, 0x6bU,
-    0x3aU, 0x91U, 0x11U, 0x41U,
-    0x4fU, 0x67U, 0xdcU, 0xeaU,
-    0x97U, 0xf2U, 0xcfU, 0xceU,
-    0xf0U, 0xb4U, 0xe6U, 0x73U,
-    0x96U, 0xacU, 0x74U, 0x22U,
-    0xe7U, 0xadU, 0x35U, 0x85U,
-    0xe2U, 0xf9U, 0x37U, 0xe8U,
-    0x1cU, 0x75U, 0xdfU, 0x6eU,
-    0x47U, 0xf1U, 0x1aU, 0x71U,
-    0x1dU, 0x29U, 0xc5U, 0x89U,
-    0x6fU, 0xb7U, 0x62U, 0x0eU,
-    0xaaU, 0x18U, 0xbeU, 0x1bU,
-    0xfcU, 0x56U, 0x3eU, 0x4bU,
-    0xc6U, 0xd2U, 0x79U, 0x20U,
-    0x9aU, 0xdbU, 0xc0U, 0xfeU,
-    0x78U, 0xcdU, 0x5aU, 0xf4U,
-    0x1fU, 0xddU, 0xa8U, 0x33U,
-    0x88U, 0x07U, 0xc7U, 0x31U,
-    0xb1U, 0x12U, 0x10U, 0x59U,
-    0x27U, 0x80U, 0xecU, 0x5fU,
-    0x60U, 0x51U, 0x7fU, 0xa9U,
-    0x19U, 0xb5U, 0x4aU, 0x0dU,
-    0x2dU, 0xe5U, 0x7aU, 0x9fU,
-    0x93U, 0xc9U, 0x9cU, 0xefU,
-    0xa0U, 0xe0U, 0x3bU, 0x4dU,
-    0xaeU, 0x2aU, 0xf5U, 0xb0U,
-    0xc8U, 0xebU, 0xbbU, 0x3cU,
-    0x83U, 0x53U, 0x99U, 0x61U,
-    0x17U, 0x2bU, 0x04U, 0x7eU,
-    0xbaU, 0x77U, 0xd6U, 0x26U,
-    0xe1U, 0x69U, 0x14U, 0x63U,
-    0x55U, 0x21U, 0x0cU, 0x7dU,
+static u8 Td4[256] = {
+	 0x52U, 0x09U, 0x6aU, 0xd5U,
+	 0x30U, 0x36U, 0xa5U, 0x38U,
+	 0xbfU, 0x40U, 0xa3U, 0x9eU,
+	 0x81U, 0xf3U, 0xd7U, 0xfbU,
+	 0x7cU, 0xe3U, 0x39U, 0x82U,
+	 0x9bU, 0x2fU, 0xffU, 0x87U,
+	 0x34U, 0x8eU, 0x43U, 0x44U,
+	 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+	 0x54U, 0x7bU, 0x94U, 0x32U,
+	 0xa6U, 0xc2U, 0x23U, 0x3dU,
+	 0xeeU, 0x4cU, 0x95U, 0x0bU,
+	 0x42U, 0xfaU, 0xc3U, 0x4eU,
+	 0x08U, 0x2eU, 0xa1U, 0x66U,
+	 0x28U, 0xd9U, 0x24U, 0xb2U,
+	 0x76U, 0x5bU, 0xa2U, 0x49U,
+	 0x6dU, 0x8bU, 0xd1U, 0x25U,
+	 0x72U, 0xf8U, 0xf6U, 0x64U,
+	 0x86U, 0x68U, 0x98U, 0x16U,
+	 0xd4U, 0xa4U, 0x5cU, 0xccU,
+	 0x5dU, 0x65U, 0xb6U, 0x92U,
+	 0x6cU, 0x70U, 0x48U, 0x50U,
+	 0xfdU, 0xedU, 0xb9U, 0xdaU,
+	 0x5eU, 0x15U, 0x46U, 0x57U,
+	 0xa7U, 0x8dU, 0x9dU, 0x84U,
+	 0x90U, 0xd8U, 0xabU, 0x00U,
+	 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+	 0xf7U, 0xe4U, 0x58U, 0x05U,
+	 0xb8U, 0xb3U, 0x45U, 0x06U,
+	 0xd0U, 0x2cU, 0x1eU, 0x8fU,
+	 0xcaU, 0x3fU, 0x0fU, 0x02U,
+	 0xc1U, 0xafU, 0xbdU, 0x03U,
+	 0x01U, 0x13U, 0x8aU, 0x6bU,
+	 0x3aU, 0x91U, 0x11U, 0x41U,
+	 0x4fU, 0x67U, 0xdcU, 0xeaU,
+	 0x97U, 0xf2U, 0xcfU, 0xceU,
+	 0xf0U, 0xb4U, 0xe6U, 0x73U,
+	 0x96U, 0xacU, 0x74U, 0x22U,
+	 0xe7U, 0xadU, 0x35U, 0x85U,
+	 0xe2U, 0xf9U, 0x37U, 0xe8U,
+	 0x1cU, 0x75U, 0xdfU, 0x6eU,
+	 0x47U, 0xf1U, 0x1aU, 0x71U,
+	 0x1dU, 0x29U, 0xc5U, 0x89U,
+	 0x6fU, 0xb7U, 0x62U, 0x0eU,
+	 0xaaU, 0x18U, 0xbeU, 0x1bU,
+	 0xfcU, 0x56U, 0x3eU, 0x4bU,
+	 0xc6U, 0xd2U, 0x79U, 0x20U,
+	 0x9aU, 0xdbU, 0xc0U, 0xfeU,
+	 0x78U, 0xcdU, 0x5aU, 0xf4U,
+	 0x1fU, 0xddU, 0xa8U, 0x33U,
+	 0x88U, 0x07U, 0xc7U, 0x31U,
+	 0xb1U, 0x12U, 0x10U, 0x59U,
+	 0x27U, 0x80U, 0xecU, 0x5fU,
+	 0x60U, 0x51U, 0x7fU, 0xa9U,
+	 0x19U, 0xb5U, 0x4aU, 0x0dU,
+	 0x2dU, 0xe5U, 0x7aU, 0x9fU,
+	 0x93U, 0xc9U, 0x9cU, 0xefU,
+	 0xa0U, 0xe0U, 0x3bU, 0x4dU,
+	 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+	 0xc8U, 0xebU, 0xbbU, 0x3cU,
+	 0x83U, 0x53U, 0x99U, 0x61U,
+	 0x17U, 0x2bU, 0x04U, 0x7eU,
+	 0xbaU, 0x77U, 0xd6U, 0x26U,
+	 0xe1U, 0x69U, 0x14U, 0x63U,
+	 0x55U, 0x21U, 0x0cU, 0x7dU,
 };
-static const u32 rcon[] = {
+static u32 rcon[] = {
 	0x01000000, 0x02000000, 0x04000000, 0x08000000,
 	0x10000000, 0x20000000, 0x40000000, 0x80000000,
 	0x1B000000, 0x36000000,
 	/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
 };
 
-#define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
-		    ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
-#define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
-			 (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
-
 /*
  * Expand the cipher key into the encryption key schedule.
  *
  * @return	the number of rounds for the given cipher key size.
  */
 static int
-aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
+setupEnc(ulong rk[/*4*(Nr + 1)*/], uchar key[], int nkey)
 {
 	int i = 0;
 	u32 temp;
 
-	rk[0] = GETU32(cipherKey     );
-	rk[1] = GETU32(cipherKey +  4);
-	rk[2] = GETU32(cipherKey +  8);
-	rk[3] = GETU32(cipherKey + 12);
-	if (keyBits == 128) {
+	rk[0] = GETU32(key     );
+	rk[1] = GETU32(key +  4);
+	rk[2] = GETU32(key +  8);
+	rk[3] = GETU32(key + 12);
+	if (nkey == 16) {
 		for (;;) {
 			temp  = rk[3];
 			rk[4] = rk[0] ^
@@ -1198,9 +759,9 @@
 			rk += 4;
 		}
 	}
-	rk[4] = GETU32(cipherKey + 16);
-	rk[5] = GETU32(cipherKey + 20);
-	if (keyBits == 192) {
+	rk[4] = GETU32(key + 16);
+	rk[5] = GETU32(key + 20);
+	if (nkey == 24) {
 		for (;;) {
 			temp = rk[ 5];
 			rk[ 6] = rk[ 0] ^
@@ -1220,92 +781,94 @@
 			rk += 6;
 		}
 	}
-	rk[6] = GETU32(cipherKey + 24);
-	rk[7] = GETU32(cipherKey + 28);
-	if (keyBits == 256) {
-	        for (;;) {
-	        	temp = rk[ 7];
-	        	rk[ 8] = rk[ 0] ^
-	        		(Te4[(temp >> 16) & 0xff] << 24) ^
-	        		(Te4[(temp >>  8) & 0xff] << 16) ^
-	        		(Te4[(temp      ) & 0xff] <<  8) ^
-	        		(Te4[(temp >> 24)       ]      ) ^
-	        		rcon[i];
-	        	rk[ 9] = rk[ 1] ^ rk[ 8];
-	        	rk[10] = rk[ 2] ^ rk[ 9];
-	        	rk[11] = rk[ 3] ^ rk[10];
+	rk[6] = GETU32(key + 24);
+	rk[7] = GETU32(key + 28);
+	if (nkey == 32) {
+		for (;;) {
+			temp = rk[ 7];
+			rk[ 8] = rk[ 0] ^
+				(Te4[(temp >> 16) & 0xff] << 24) ^
+				(Te4[(temp >>  8) & 0xff] << 16) ^
+				(Te4[(temp      ) & 0xff] <<  8) ^
+				(Te4[(temp >> 24)       ]      ) ^
+				rcon[i];
+			rk[ 9] = rk[ 1] ^ rk[ 8];
+			rk[10] = rk[ 2] ^ rk[ 9];
+			rk[11] = rk[ 3] ^ rk[10];
 			if (++i == 7) {
 				return 14;
 			}
-	        	temp = rk[11];
-	        	rk[12] = rk[ 4] ^
-	        		(Te4[(temp >> 24)       ] << 24) ^
-	        		(Te4[(temp >> 16) & 0xff] << 16) ^
-	        		(Te4[(temp >>  8) & 0xff] <<  8) ^
-	        		(Te4[(temp      ) & 0xff]      );
-	        	rk[13] = rk[ 5] ^ rk[12];
-	        	rk[14] = rk[ 6] ^ rk[13];
-	        	rk[15] = rk[ 7] ^ rk[14];
+			temp = rk[11];
+			rk[12] = rk[ 4] ^
+				(Te4[(temp >> 24)       ] << 24) ^
+				(Te4[(temp >> 16) & 0xff] << 16) ^
+				(Te4[(temp >>  8) & 0xff] <<  8) ^
+				(Te4[(temp      ) & 0xff]      );
+			rk[13] = rk[ 5] ^ rk[12];
+			rk[14] = rk[ 6] ^ rk[13];
+			rk[15] = rk[ 7] ^ rk[14];
 			rk += 8;
-	        }
+		}
 	}
 	return 0;
 }
 
-/**
- * Expand the cipher key into the decryption key schedule.
+/*
+ * Expand the cipher key into the encryption and decryption key schedules.
  *
  * @return	the number of rounds for the given cipher key size.
  */
 static int
-aes_setupDec(ulong rk[/* 4*(Nr + 1) */], const uchar cipherKey[], int keyBits)
+AESsetup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */], uchar key[], int nkey)
 {
-	int Nr, i, j;
-	ulong temp;
+	int Nr, i;
 
 	/* expand the cipher key: */
-	Nr = aes_setupEnc(rk, cipherKey, keyBits);
-	/* invert the order of the round keys: */
-	for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
-		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
-		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
-		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
-		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
-	}
+	Nr = setupEnc(erk, key, nkey);
+
 	/*
-	 * apply the inverse MixColumn transform to all round keys
-	 * but the first and the last:
+	 * invert the order of the round keys and apply the inverse MixColumn
+	 * transform to all round keys but the first and the last
 	 */
+	drk[0       ] = erk[4*Nr    ];
+	drk[1       ] = erk[4*Nr + 1];
+	drk[2       ] = erk[4*Nr + 2];
+	drk[3       ] = erk[4*Nr + 3];
+	drk[4*Nr    ] = erk[0       ];
+	drk[4*Nr + 1] = erk[1       ];
+	drk[4*Nr + 2] = erk[2       ];
+	drk[4*Nr + 3] = erk[3       ];
+	erk += 4 * Nr;
 	for (i = 1; i < Nr; i++) {
-		rk += 4;
-		rk[0] =
-			Td0[Te4[(rk[0] >> 24)       ]] ^
-			Td1[Te4[(rk[0] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[0] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[0]      ) & 0xff]];
-		rk[1] =
-			Td0[Te4[(rk[1] >> 24)       ]] ^
-			Td1[Te4[(rk[1] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[1] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[1]      ) & 0xff]];
-		rk[2] =
-			Td0[Te4[(rk[2] >> 24)       ]] ^
-			Td1[Te4[(rk[2] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[2] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[2]      ) & 0xff]];
-		rk[3] =
-			Td0[Te4[(rk[3] >> 24)       ]] ^
-			Td1[Te4[(rk[3] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[3] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[3]      ) & 0xff]];
+		drk += 4;
+		erk -= 4;
+		drk[0] =
+		 	Td0[Te4[(erk[0] >> 24)       ]] ^
+		 	Td1[Te4[(erk[0] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[0] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[0]      ) & 0xff]];
+		drk[1] =
+		 	Td0[Te4[(erk[1] >> 24)       ]] ^
+		 	Td1[Te4[(erk[1] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[1] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[1]      ) & 0xff]];
+		drk[2] =
+		 	Td0[Te4[(erk[2] >> 24)       ]] ^
+		 	Td1[Te4[(erk[2] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[2] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[2]      ) & 0xff]];
+		drk[3] =
+		 	Td0[Te4[(erk[3] >> 24)       ]] ^
+		 	Td1[Te4[(erk[3] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[3] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[3]      ) & 0xff]];
 	}
 	return Nr;
 }
 
 /* using round keys in rk, perform Nr rounds of encrypting pt into ct */
-void
-aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
-	uchar ct[16])
+static void
+AESencrypt(ulong rk[/* 4*(Nr + 1) */], int Nr, uchar pt[16], uchar ct[16])
 {
 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
 #ifndef FULL_UNROLL
@@ -1322,50 +885,50 @@
 	s3 = GETU32(pt + 12) ^ rk[3];
 #ifdef FULL_UNROLL
 	/* round 1: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
-   	/* round 2: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+	/* round 2: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
 	/* round 3: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
-   	/* round 4: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+	/* round 4: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
 	/* round 5: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
-   	/* round 6: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+	/* round 6: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
 	/* round 7: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
-   	/* round 8: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+	/* round 8: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
 	/* round 9: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
 	if (Nr > 10) {
 		/* round 10: */
 		s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
@@ -1397,59 +960,59 @@
 	 */
 	r = Nr >> 1;
 	for (;;) {
-	        t0 =
-	            Te0[(s0 >> 24)       ] ^
-	            Te1[(s1 >> 16) & 0xff] ^
-	            Te2[(s2 >>  8) & 0xff] ^
-	            Te3[(s3      ) & 0xff] ^
-	            rk[4];
-	        t1 =
-	            Te0[(s1 >> 24)       ] ^
-	            Te1[(s2 >> 16) & 0xff] ^
-	            Te2[(s3 >>  8) & 0xff] ^
-	            Te3[(s0      ) & 0xff] ^
-	            rk[5];
-	        t2 =
-	            Te0[(s2 >> 24)       ] ^
-	            Te1[(s3 >> 16) & 0xff] ^
-	            Te2[(s0 >>  8) & 0xff] ^
-	            Te3[(s1      ) & 0xff] ^
-	            rk[6];
-	        t3 =
-	            Te0[(s3 >> 24)       ] ^
-	            Te1[(s0 >> 16) & 0xff] ^
-	            Te2[(s1 >>  8) & 0xff] ^
-	            Te3[(s2      ) & 0xff] ^
-	            rk[7];
-
-	        rk += 8;
-	        if (--r == 0)
-	            break;
-
-	        s0 =
-	            Te0[(t0 >> 24)       ] ^
-	            Te1[(t1 >> 16) & 0xff] ^
-	            Te2[(t2 >>  8) & 0xff] ^
-	            Te3[(t3      ) & 0xff] ^
-	            rk[0];
-	        s1 =
-	            Te0[(t1 >> 24)       ] ^
-	            Te1[(t2 >> 16) & 0xff] ^
-	            Te2[(t3 >>  8) & 0xff] ^
-	            Te3[(t0      ) & 0xff] ^
-	            rk[1];
-	        s2 =
-	            Te0[(t2 >> 24)       ] ^
-	            Te1[(t3 >> 16) & 0xff] ^
-	            Te2[(t0 >>  8) & 0xff] ^
-	            Te3[(t1      ) & 0xff] ^
-	            rk[2];
-	        s3 =
-	            Te0[(t3 >> 24)       ] ^
-	            Te1[(t0 >> 16) & 0xff] ^
-	            Te2[(t1 >>  8) & 0xff] ^
-	            Te3[(t2      ) & 0xff] ^
-	            rk[3];
+		t0 =
+		 	Te0[(s0 >> 24)       ] ^
+		 	Te1[(s1 >> 16) & 0xff] ^
+		 	Te2[(s2 >>  8) & 0xff] ^
+		 	Te3[(s3      ) & 0xff] ^
+		 	rk[4];
+		t1 =
+		 	Te0[(s1 >> 24)       ] ^
+		 	Te1[(s2 >> 16) & 0xff] ^
+		 	Te2[(s3 >>  8) & 0xff] ^
+		 	Te3[(s0      ) & 0xff] ^
+		 	rk[5];
+		t2 =
+		 	Te0[(s2 >> 24)       ] ^
+		 	Te1[(s3 >> 16) & 0xff] ^
+		 	Te2[(s0 >>  8) & 0xff] ^
+		 	Te3[(s1      ) & 0xff] ^
+		 	rk[6];
+		t3 =
+		 	Te0[(s3 >> 24)       ] ^
+		 	Te1[(s0 >> 16) & 0xff] ^
+		 	Te2[(s1 >>  8) & 0xff] ^
+		 	Te3[(s2      ) & 0xff] ^
+		 	rk[7];
+
+		rk += 8;
+		if (--r == 0)
+			break;
+
+		s0 =
+		 	Te0[(t0 >> 24)       ] ^
+		 	Te1[(t1 >> 16) & 0xff] ^
+		 	Te2[(t2 >>  8) & 0xff] ^
+		 	Te3[(t3      ) & 0xff] ^
+		 	rk[0];
+		s1 =
+		 	Te0[(t1 >> 24)       ] ^
+		 	Te1[(t2 >> 16) & 0xff] ^
+		 	Te2[(t3 >>  8) & 0xff] ^
+		 	Te3[(t0      ) & 0xff] ^
+		 	rk[1];
+		s2 =
+		 	Te0[(t2 >> 24)       ] ^
+		 	Te1[(t3 >> 16) & 0xff] ^
+		 	Te2[(t0 >>  8) & 0xff] ^
+		 	Te3[(t1      ) & 0xff] ^
+		 	rk[2];
+		s3 =
+		 	Te0[(t3 >> 24)       ] ^
+		 	Te1[(t0 >> 16) & 0xff] ^
+		 	Te2[(t1 >>  8) & 0xff] ^
+		 	Te3[(t2      ) & 0xff] ^
+		 	rk[3];
 	}
 #endif					/* ?FULL_UNROLL */
 	/*
@@ -1486,9 +1049,8 @@
 	PUTU32(ct + 12, s3);
 }
 
-void
-aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
-	uchar pt[16])
+static void
+AESdecrypt(ulong rk[/* 4*(Nr + 1) */], int Nr, uchar ct[16], uchar pt[16])
 {
 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
 #ifndef FULL_UNROLL
@@ -1499,360 +1061,200 @@
 	 * map byte array block to cipher state
 	 * and add initial round key:
 	 */
-    s0 = GETU32(ct     ) ^ rk[0];
-    s1 = GETU32(ct +  4) ^ rk[1];
-    s2 = GETU32(ct +  8) ^ rk[2];
-    s3 = GETU32(ct + 12) ^ rk[3];
+	s0 = GETU32(ct     ) ^ rk[0];
+	s1 = GETU32(ct +  4) ^ rk[1];
+	s2 = GETU32(ct +  8) ^ rk[2];
+	s3 = GETU32(ct + 12) ^ rk[3];
 #ifdef FULL_UNROLL
-    /* round 1: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
-    /* round 2: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
-    /* round 3: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
-    /* round 4: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
-    /* round 5: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
-    /* round 6: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
-    /* round 7: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
-    /* round 8: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
-    /* round 9: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
-    if (Nr > 10) {
-        /* round 10: */
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
-        /* round 11: */
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
-        if (Nr > 12) {
-            /* round 12: */
-            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
-            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
-            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
-            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
-            /* round 13: */
-            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
-            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
-            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
-            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
-        }
-    }
-    rk += Nr << 2;
-#else					/* !FULL_UNROLL */
-    /*
-     * Nr - 1 full rounds:
-     */
-    r = Nr >> 1;
-    for (;;) {
-        t0 =
-            Td0[(s0 >> 24)       ] ^
-            Td1[(s3 >> 16) & 0xff] ^
-            Td2[(s2 >>  8) & 0xff] ^
-            Td3[(s1      ) & 0xff] ^
-            rk[4];
-        t1 =
-            Td0[(s1 >> 24)       ] ^
-            Td1[(s0 >> 16) & 0xff] ^
-            Td2[(s3 >>  8) & 0xff] ^
-            Td3[(s2      ) & 0xff] ^
-            rk[5];
-        t2 =
-            Td0[(s2 >> 24)       ] ^
-            Td1[(s1 >> 16) & 0xff] ^
-            Td2[(s0 >>  8) & 0xff] ^
-            Td3[(s3      ) & 0xff] ^
-            rk[6];
-        t3 =
-            Td0[(s3 >> 24)       ] ^
-            Td1[(s2 >> 16) & 0xff] ^
-            Td2[(s1 >>  8) & 0xff] ^
-            Td3[(s0      ) & 0xff] ^
-            rk[7];
-
-        rk += 8;
-        if (--r == 0)
-            break;
-
-        s0 =
-            Td0[(t0 >> 24)       ] ^
-            Td1[(t3 >> 16) & 0xff] ^
-            Td2[(t2 >>  8) & 0xff] ^
-            Td3[(t1      ) & 0xff] ^
-            rk[0];
-        s1 =
-            Td0[(t1 >> 24)       ] ^
-            Td1[(t0 >> 16) & 0xff] ^
-            Td2[(t3 >>  8) & 0xff] ^
-            Td3[(t2      ) & 0xff] ^
-            rk[1];
-        s2 =
-            Td0[(t2 >> 24)       ] ^
-            Td1[(t1 >> 16) & 0xff] ^
-            Td2[(t0 >>  8) & 0xff] ^
-            Td3[(t3      ) & 0xff] ^
-            rk[2];
-        s3 =
-            Td0[(t3 >> 24)       ] ^
-            Td1[(t2 >> 16) & 0xff] ^
-            Td2[(t1 >>  8) & 0xff] ^
-            Td3[(t0      ) & 0xff] ^
-            rk[3];
-    }
-#endif					/* ?FULL_UNROLL */
-	/*
-	 * apply last round and
-	 * map cipher state to byte array block:
-	 */
-   	s0 =
-   		(Td4[(t0 >> 24)       ] << 24) ^
-   		(Td4[(t3 >> 16) & 0xff] << 16) ^
-   		(Td4[(t2 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t1      ) & 0xff]      ) ^
-   		rk[0];
-	PUTU32(pt     , s0);
-   	s1 =
-   		(Td4[(t1 >> 24)       ] << 24) ^
-   		(Td4[(t0 >> 16) & 0xff] << 16) ^
-   		(Td4[(t3 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t2      ) & 0xff]      ) ^
-   		rk[1];
-	PUTU32(pt +  4, s1);
-   	s2 =
-   		(Td4[(t2 >> 24)       ] << 24) ^
-   		(Td4[(t1 >> 16) & 0xff] << 16) ^
-   		(Td4[(t0 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t3      ) & 0xff]      ) ^
-   		rk[2];
-	PUTU32(pt +  8, s2);
-   	s3 =
-   		(Td4[(t3 >> 24)       ] << 24) ^
-   		(Td4[(t2 >> 16) & 0xff] << 16) ^
-   		(Td4[(t1 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t0      ) & 0xff]      ) ^
-   		rk[3];
-	PUTU32(pt + 12, s3);
-}
-
-#ifdef INTERMEDIATE_VALUE_KAT
-
-static void
-aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
-	int rounds)
-{
-	int r;
-	u32 s0, s1, s2, s3, t0, t1, t2, t3;
-
-	/*
-	 * map byte array block to cipher state
-	 * and add initial round key:
-	 */
-	s0 = GETU32(block     ) ^ rk[0];
-	s1 = GETU32(block +  4) ^ rk[1];
-	s2 = GETU32(block +  8) ^ rk[2];
-	s3 = GETU32(block + 12) ^ rk[3];
-	rk += 4;
-
-	/*
-	 * Nr - 1 full rounds:
-	 */
-	for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
-		t0 =
-			Te0[(s0 >> 24)       ] ^
-			Te1[(s1 >> 16) & 0xff] ^
-			Te2[(s2 >>  8) & 0xff] ^
-			Te3[(s3      ) & 0xff] ^
-			rk[0];
-		t1 =
-			Te0[(s1 >> 24)       ] ^
-			Te1[(s2 >> 16) & 0xff] ^
-			Te2[(s3 >>  8) & 0xff] ^
-			Te3[(s0      ) & 0xff] ^
-			rk[1];
-		t2 =
-			Te0[(s2 >> 24)       ] ^
-			Te1[(s3 >> 16) & 0xff] ^
-			Te2[(s0 >>  8) & 0xff] ^
-			Te3[(s1      ) & 0xff] ^
-			rk[2];
-		t3 =
-			Te0[(s3 >> 24)       ] ^
-			Te1[(s0 >> 16) & 0xff] ^
-			Te2[(s1 >>  8) & 0xff] ^
-			Te3[(s2      ) & 0xff] ^
-			rk[3];
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
-		rk += 4;
-	}
-
-	/*
-	 * apply last round and
-	 * map cipher state to byte array block:
-	 */
-	if (rounds == Nr) {
-	    	t0 =
-	    		(Te4[(s0 >> 24)       ] << 24) ^
-	    		(Te4[(s1 >> 16) & 0xff] << 16) ^
-	    		(Te4[(s2 >>  8) & 0xff] <<  8) ^
-	    		(Te4[(s3      ) & 0xff]      ) ^
-	    		rk[0];
-	    	t1 =
-	    		(Te4[(s1 >> 24)       ] << 24) ^
-	    		(Te4[(s2 >> 16) & 0xff] << 16) ^
-	    		(Te4[(s3 >>  8) & 0xff] <<  8) ^
-	    		(Te4[(s0      ) & 0xff]      ) ^
-	    		rk[1];
-	    	t2 =
-	    		(Te4[(s2 >> 24)       ] << 24) ^
-	    		(Te4[(s3 >> 16) & 0xff] << 16) ^
-	    		(Te4[(s0 >>  8) & 0xff] <<  8) ^
-	    		(Te4[(s1      ) & 0xff]      ) ^
-	    		rk[2];
-	    	t3 =
-	    		(Te4[(s3 >> 24)       ] << 24) ^
-	    		(Te4[(s0 >> 16) & 0xff] << 16) ^
-	    		(Te4[(s1 >>  8) & 0xff] <<  8) ^
-	    		(Te4[(s2      ) & 0xff]      ) ^
-	    		rk[3];
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
+	/* round 1: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+	/* round 2: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+	/* round 3: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+	/* round 4: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+	/* round 5: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+	/* round 6: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+	/* round 7: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+	/* round 8: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+	/* round 9: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+	if (Nr > 10) {
+		/* round 10: */
+		s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+		s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+		s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+		s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+		/* round 11: */
+		t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+		t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+		t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+		t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+		if (Nr > 12) {
+			/* round 12: */
+			s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+			s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+			s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+			s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+			/* round 13: */
+			t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+			t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+			t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+			t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+		}
 	}
-
-	PUTU32(block     , s0);
-	PUTU32(block +  4, s1);
-	PUTU32(block +  8, s2);
-	PUTU32(block + 12, s3);
-}
-
-static void
-aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
-	int rounds)
-{
-	int r;
-	u32 s0, s1, s2, s3, t0, t1, t2, t3;
-
-	/*
-	 * map byte array block to cipher state
-	 * and add initial round key:
-	 */
-	s0 = GETU32(block     ) ^ rk[0];
-	s1 = GETU32(block +  4) ^ rk[1];
-	s2 = GETU32(block +  8) ^ rk[2];
-	s3 = GETU32(block + 12) ^ rk[3];
-	rk += 4;
-
+	rk += Nr << 2;
+#else					/* !FULL_UNROLL */
 	/*
 	 * Nr - 1 full rounds:
 	 */
-	for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
+	r = Nr >> 1;
+	for (;;) {
 		t0 =
 			Td0[(s0 >> 24)       ] ^
 			Td1[(s3 >> 16) & 0xff] ^
 			Td2[(s2 >>  8) & 0xff] ^
 			Td3[(s1      ) & 0xff] ^
-			rk[0];
+			rk[4];
 		t1 =
 			Td0[(s1 >> 24)       ] ^
 			Td1[(s0 >> 16) & 0xff] ^
 			Td2[(s3 >>  8) & 0xff] ^
 			Td3[(s2      ) & 0xff] ^
-			rk[1];
+			rk[5];
 		t2 =
 			Td0[(s2 >> 24)       ] ^
 			Td1[(s1 >> 16) & 0xff] ^
 			Td2[(s0 >>  8) & 0xff] ^
 			Td3[(s3      ) & 0xff] ^
-			rk[2];
+			rk[6];
 		t3 =
 			Td0[(s3 >> 24)       ] ^
 			Td1[(s2 >> 16) & 0xff] ^
 			Td2[(s1 >>  8) & 0xff] ^
 			Td3[(s0      ) & 0xff] ^
-			rk[3];
+			rk[7];
 
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
-		rk += 4;
+		rk += 8;
+		if (--r == 0)
+			break;
+
+		s0 =
+			Td0[(t0 >> 24)       ] ^
+			Td1[(t3 >> 16) & 0xff] ^
+			Td2[(t2 >>  8) & 0xff] ^
+			Td3[(t1      ) & 0xff] ^
+			rk[0];
+		s1 =
+			Td0[(t1 >> 24)       ] ^
+			Td1[(t0 >> 16) & 0xff] ^
+			Td2[(t3 >>  8) & 0xff] ^
+			Td3[(t2      ) & 0xff] ^
+			rk[1];
+		s2 =
+			Td0[(t2 >> 24)       ] ^
+			Td1[(t1 >> 16) & 0xff] ^
+			Td2[(t0 >>  8) & 0xff] ^
+			Td3[(t3      ) & 0xff] ^
+			rk[2];
+		s3 =
+			Td0[(t3 >> 24)       ] ^
+			Td1[(t2 >> 16) & 0xff] ^
+			Td2[(t1 >>  8) & 0xff] ^
+			Td3[(t0      ) & 0xff] ^
+			rk[3];
 	}
-
+#endif					/* ?FULL_UNROLL */
 	/*
-	 * complete the last round and
+	 * apply last round and
 	 * map cipher state to byte array block:
 	 */
-	t0 =
-		(Td4[(s0 >> 24)       ] << 24) ^
-		(Td4[(s3 >> 16) & 0xff] << 16) ^
-		(Td4[(s2 >>  8) & 0xff] <<  8) ^
-		(Td4[(s1      ) & 0xff]      );
-	t1 =
-		(Td4[(s1 >> 24)       ] << 24) ^
-		(Td4[(s0 >> 16) & 0xff] << 16) ^
-		(Td4[(s3 >>  8) & 0xff] <<  8) ^
-		(Td4[(s2      ) & 0xff]      );
-	t2 =
-		(Td4[(s2 >> 24)       ] << 24) ^
-		(Td4[(s1 >> 16) & 0xff] << 16) ^
-		(Td4[(s0 >>  8) & 0xff] <<  8) ^
-		(Td4[(s3      ) & 0xff]      );
-	t3 =
-		(Td4[(s3 >> 24)       ] << 24) ^
-		(Td4[(s2 >> 16) & 0xff] << 16) ^
-		(Td4[(s1 >>  8) & 0xff] <<  8) ^
-		(Td4[(s0      ) & 0xff]      );
-
-	if (rounds == Nr) {
-		t0 ^= rk[0];
-		t1 ^= rk[1];
-		t2 ^= rk[2];
-		t3 ^= rk[3];
-	}
-
-	PUTU32(block     , t0);
-	PUTU32(block +  4, t1);
-	PUTU32(block +  8, t2);
-	PUTU32(block + 12, t3);
+	s0 =
+		(Td4[(t0 >> 24)       ] << 24) ^
+		(Td4[(t3 >> 16) & 0xff] << 16) ^
+		(Td4[(t2 >>  8) & 0xff] <<  8) ^
+		(Td4[(t1      ) & 0xff]      ) ^
+		rk[0];
+	PUTU32(pt     , s0);
+	s1 =
+		(Td4[(t1 >> 24)       ] << 24) ^
+		(Td4[(t0 >> 16) & 0xff] << 16) ^
+		(Td4[(t3 >>  8) & 0xff] <<  8) ^
+		(Td4[(t2      ) & 0xff]      ) ^
+		rk[1];
+	PUTU32(pt +  4, s1);
+	s2 =
+		(Td4[(t2 >> 24)       ] << 24) ^
+		(Td4[(t1 >> 16) & 0xff] << 16) ^
+		(Td4[(t0 >>  8) & 0xff] <<  8) ^
+		(Td4[(t3      ) & 0xff]      ) ^
+		rk[2];
+	PUTU32(pt +  8, s2);
+	s3 =
+		(Td4[(t3 >> 24)       ] << 24) ^
+		(Td4[(t2 >> 16) & 0xff] << 16) ^
+		(Td4[(t1 >>  8) & 0xff] <<  8) ^
+		(Td4[(t0      ) & 0xff]      ) ^
+		rk[3];
+	PUTU32(pt + 12, s3);
 }
 
-#endif			/* INTERMEDIATE_VALUE_KAT */
+void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]) = AESencrypt;
+void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]) = AESdecrypt;
+
+void
+setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec)
+{
+	static int (*aes_setup)(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */], uchar key[], int nkey);
+
+	if(aes_setup == nil){
+		extern void *aesni_init(void);
+		if((aes_setup = aesni_init()) == nil)
+			aes_setup = AESsetup;
+	}
+	memset(s, 0, sizeof(*s));
+	if(nkey > AESmaxkey)
+		nkey = AESmaxkey;
+	memmove(s->key, key, nkey);
+	s->keybytes = nkey;
+	s->ctrsz = 4;	/* default counter size from rfc3686 */
+	s->ekey = s->storage+16 - (s->storage - (uchar*)0 & 15);
+	s->dkey = (uchar*)s->ekey + 16*(AESmaxrounds+1);
+	s->rounds = (*aes_setup)(s->ekey, s->dkey, s->key, nkey);
+	if(ivec != nil)
+		memmove(s->ivec, ivec, AESbsize);
+	if(s->rounds != 0)
+		s->setup = 0xcafebabe;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesCTR.c	Mon Jun 28 17:01:07 2021
@@ -0,0 +1,69 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*
+ * AES-CTR mode, per rfc3686.
+ * CTRs could be precalculated for efficiency
+ * and there would also be less back and forth mp
+ */
+
+static void
+incrementCTR(uchar *p, uint ctrsz)
+{
+	int len;
+	ulong c;
+	uchar *ctr;
+	mpint *mpctr, *mpctrsz;
+
+	ctr = p + AESbsize - ctrsz;
+	if(ctrsz == 4){
+		/*
+		 * If counter is 32 bits (as in rfc3686 and ssh2) there's
+		 * no need to use extended precision.
+		 */
+		c = 1 + (ctr[0]<<24 | ctr[1]<<16 | ctr[2]<<8 | ctr[3]);
+		ctr[0] = c>>24; ctr[1] = c>>16; ctr[2] = c>>8; ctr[3] = c;
+		return;
+	}
+	mpctr = betomp(ctr, ctrsz, nil);
+	mpctrsz = mpnew(ctrsz*8 + 1);
+	mpleft(mpone, ctrsz*8, mpctrsz);
+	mpadd(mpctr, mpone, mpctr);
+	mpmod(mpctr, mpctrsz, mpctr);
+	len = mptobe(mpctr, ctr, ctrsz, nil);
+	assert(len == ctrsz);
+	mpfree(mpctrsz);
+	mpfree(mpctr);
+}
+
+void
+aesCTRencrypt(uchar *p, int len, AESstate *s)
+{
+	uchar q[AESbsize];
+	uchar *ip, *eip, *ctr;
+
+	ctr = s->ivec;
+	for(; len >= AESbsize; len -= AESbsize){
+		ip = q;
+		aes_encrypt(s->ekey, s->rounds, ctr, q);
+		for(eip = p + AESbsize; p < eip; )
+			*p++ ^= *ip++;
+		incrementCTR(ctr, s->ctrsz);
+	}
+
+	if(len > 0){
+		ip = q;
+		aes_encrypt(s->ekey, s->rounds, ctr, q);
+		for(eip = p + len; p < eip; )
+			*p++ ^= *ip++;
+		incrementCTR(ctr, s->ctrsz);
+	}
+}
+
+void
+aesCTRdecrypt(uchar *p, int len, AESstate *s)
+{
+	aesCTRencrypt(p, len, s);
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesCBC.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,96 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*
+ * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
+ * Because of the way that non-multiple-of-16 buffers are handled,
+ * the decryptor must be fed buffers of the same size as the encryptor.
+ */
+void
+aesCBCencrypt(uchar *p, int len, AESstate *s)
+{
+	uchar *ip, *eip;
+
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			((u32int*)ip)[0] ^= ((u32int*)p)[0];
+			((u32int*)ip)[1] ^= ((u32int*)p)[1];
+			((u32int*)ip)[2] ^= ((u32int*)p)[2];
+			((u32int*)ip)[3] ^= ((u32int*)p)[3];
+
+			aes_encrypt(s->ekey, s->rounds, ip, ip);
+
+			((u32int*)p)[0] = ((u32int*)ip)[0];
+			((u32int*)p)[1] = ((u32int*)ip)[1];
+			((u32int*)p)[2] = ((u32int*)ip)[2];
+			((u32int*)p)[3] = ((u32int*)ip)[3];
+			p += AESbsize;
+		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; )
+				*ip++ ^= *p++;
+			aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+			memmove(p - AESbsize, s->ivec, AESbsize);
+		}
+	}
+
+	if(len > 0){
+		ip = s->ivec;
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
+		for(eip = ip+len; ip < eip; )
+			*p++ ^= *ip++;
+	}
+}
+
+void
+aesCBCdecrypt(uchar *p, int len, AESstate *s)
+{
+	uchar *ip, *eip, *tp;
+	u32int t[4];
+
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			t[0] = ((u32int*)p)[0];
+			t[1] = ((u32int*)p)[1];
+			t[2] = ((u32int*)p)[2];
+			t[3] = ((u32int*)p)[3];
+
+			aes_decrypt(s->dkey, s->rounds, p, p);
+
+			ip = s->ivec;
+			((u32int*)p)[0] ^= ((u32int*)ip)[0];
+			((u32int*)p)[1] ^= ((u32int*)ip)[1];
+			((u32int*)p)[2] ^= ((u32int*)ip)[2];
+			((u32int*)p)[3] ^= ((u32int*)ip)[3];
+			p += AESbsize;
+
+			((u32int*)ip)[0] = t[0];
+			((u32int*)ip)[1] = t[1];
+			((u32int*)ip)[2] = t[2];
+			((u32int*)ip)[3] = t[3];
+		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			tp = (uchar*)t;
+			memmove(tp, p, AESbsize);
+			aes_decrypt(s->dkey, s->rounds, p, p);
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	}
+
+	if(len > 0){
+		ip = s->ivec;
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
+		for(eip = ip+len; ip < eip; )
+			*p++ ^= *ip++;
+	}
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesCFB.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,52 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+typedef ulong u32;
+
+void
+aesCFBencrypt(uchar *p, int len, AESstate *s)
+{
+	u32 a, o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			a = (s->ivec[o++ % 16] ^= *p), *p++ = a, len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		((u32*)p)[0] = (((u32*)s->ivec)[0] ^= ((u32*)p)[0]);
+		((u32*)p)[1] = (((u32*)s->ivec)[1] ^= ((u32*)p)[1]);
+		((u32*)p)[2] = (((u32*)s->ivec)[2] ^= ((u32*)p)[2]);
+		((u32*)p)[3] = (((u32*)s->ivec)[3] ^= ((u32*)p)[3]);
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
+
+void
+aesCFBdecrypt(uchar *p, int len, AESstate *s)
+{
+	u32 a, o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			a = *p, *p++ ^= s->ivec[o % 16], s->ivec[o++ % 16] = a, len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		a = ((u32*)p)[0], ((u32*)p)[0] ^= ((u32*)s->ivec)[0], ((u32*)s->ivec)[0] = a;
+		a = ((u32*)p)[1], ((u32*)p)[1] ^= ((u32*)s->ivec)[1], ((u32*)s->ivec)[1] = a;
+		a = ((u32*)p)[2], ((u32*)p)[2] ^= ((u32*)s->ivec)[2], ((u32*)s->ivec)[2] = a;
+		a = ((u32*)p)[3], ((u32*)p)[3] ^= ((u32*)s->ivec)[3], ((u32*)s->ivec)[3] = a;
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesOFB.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,30 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+typedef ulong u32;
+
+void
+aesOFBencrypt(uchar *p, int len, AESstate *s)
+{
+	u32 o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			*p++ ^= s->ivec[o++ % 16], len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		((u32*)p)[0] ^= ((u32*)s->ivec)[0];
+		((u32*)p)[1] ^= ((u32*)s->ivec)[1];
+		((u32*)p)[2] ^= ((u32*)s->ivec)[2];
+		((u32*)p)[3] ^= ((u32*)s->ivec)[3];
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
+
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesXCBmac.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,95 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*
+ * AES-XCBC-MAC-96 message authentication, per rfc3566.
+ */
+static uchar basekey[3][16] = {
+	{
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	},
+	{
+	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+	},
+	{
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+	},
+};
+
+void
+setupAESXCBCstate(AESstate *s)		/* was setupmac96 */
+{
+	int i, j;
+	uint q[16 / sizeof(uint)];
+	uchar *p;
+
+	assert(s->keybytes == 16);
+	for(i = 0; i < 3; i++)
+		aes_encrypt(s->ekey, s->rounds, basekey[i],
+			s->mackey + AESbsize*i);
+
+	p = s->mackey;
+	memset(q, 0, AESbsize);
+
+	/*
+	 * put the in the right endian.  once figured, probably better
+	 * to use some fcall macros.
+	 * keys for encryption in local endianness for the algorithm...
+	 * only key1 is used for encryption;
+	 * BUG!!: I think this is what I got wrong.
+	 */
+	for(i = 0; i < 16 / sizeof(uint); i ++){
+		for(j = 0; j < sizeof(uint); j++)
+			q[i] |= p[sizeof(uint)-j-1] << 8*j;
+		p += sizeof(uint);
+	}
+	memmove(s->mackey, q, 16);
+}
+
+/*
+ * Not dealing with > 128-bit keys, not dealing with strange corner cases like
+ * empty message.  Should be fine for AES-XCBC-MAC-96.
+ */
+uchar*
+aesXCBCmac(uchar *p, int len, AESstate *s)
+{
+	uchar *p2, *ip, *eip, *mackey;
+	uchar q[AESbsize];
+
+	assert(s->keybytes == 16);	/* more complicated for bigger */
+	memset(s->ivec, 0, AESbsize);	/* E[0] is 0+ */
+
+	for(; len > AESbsize; len -= AESbsize){
+		memmove(q, p, AESbsize);
+		p2 = q;
+		ip = s->ivec;
+		for(eip = ip + AESbsize; ip < eip; )
+			*p2++ ^= *ip++;
+		aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
+		p += AESbsize;
+	}
+	/* the last one */
+
+	memmove(q, p, len);
+	p2 = q+len;
+	if(len == AESbsize)
+		mackey = s->mackey + AESbsize;	/* k2 */
+	else{
+		mackey = s->mackey+2*AESbsize;	/* k3 */
+		*p2++ = 1 << 7;			/* padding */
+		len = AESbsize - len - 1;
+		memset(p2, 0, len);
+	}
+
+	ip = s->ivec;
+	p2 = q;
+	for(eip = ip + AESbsize; ip < eip; )
+		*p2++ ^= *ip++ ^ *mackey++;
+	aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
+	return s->ivec;			/* only the 12 bytes leftmost */
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aes_gcm.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,201 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+static void
+load128(uchar b[16], ulong W[4])
+{
+	W[0] = (ulong)b[15] | (ulong)b[14]<<8 | (ulong)b[13]<<16 | (ulong)b[12]<<24;
+	W[1] = (ulong)b[11] | (ulong)b[10]<<8 | (ulong)b[ 9]<<16 | (ulong)b[ 8]<<24;
+	W[2] = (ulong)b[ 7] | (ulong)b[ 6]<<8 | (ulong)b[ 5]<<16 | (ulong)b[ 4]<<24;
+	W[3] = (ulong)b[ 3] | (ulong)b[ 2]<<8 | (ulong)b[ 1]<<16 | (ulong)b[ 0]<<24;
+}
+
+static void
+store128(ulong W[4], uchar b[16])
+{
+	b[15] = W[0], b[14] = W[0]>>8, b[13] = W[0]>>16, b[12] = W[0]>>24;
+	b[11] = W[1], b[10] = W[1]>>8, b[ 9] = W[1]>>16, b[ 8] = W[1]>>24;
+	b[ 7] = W[2], b[ 6] = W[2]>>8, b[ 5] = W[2]>>16, b[ 4] = W[2]>>24;
+	b[ 3] = W[3], b[ 2] = W[3]>>8, b[ 1] = W[3]>>16, b[ 0] = W[3]>>24;
+}
+
+static void
+gfmul(ulong X[4], ulong Y[4], ulong Z[4])
+{
+	long m, i;
+
+	Z[0] = Z[1] = Z[2] = Z[3] = 0;
+	for(i=127; i>=0; i--){
+		m = ((long)Y[i>>5] << 31-(i&31)) >> 31;
+		Z[0] ^= X[0] & m;
+		Z[1] ^= X[1] & m;
+		Z[2] ^= X[2] & m;
+		Z[3] ^= X[3] & m;
+		m = ((long)X[0]<<31) >> 31;
+		X[0] = X[0]>>1 | X[1]<<31;
+		X[1] = X[1]>>1 | X[2]<<31;
+		X[2] = X[2]>>1 | X[3]<<31;
+		X[3] = X[3]>>1 ^ (0xE1000000 & m);
+	}
+}
+
+static void
+prepareM(ulong H[4], ulong M[16][256][4])
+{
+	ulong X[4], i, j;
+
+	for(i=0; i<16; i++){
+		for(j=0; j<256; j++){
+			X[0] = X[1] = X[2] = X[3] = 0;
+			X[i>>2] = j<<((i&3)<<3);
+			gfmul(X, H, M[i][j]);
+		}
+	}
+}
+
+static void
+ghash1(AESGCMstate *s, ulong X[4], ulong Y[4])
+{
+	ulong *Xi, i;
+
+	X[0] ^= Y[0], X[1] ^= Y[1], X[2] ^= Y[2], X[3] ^= Y[3];
+	if(0){
+		gfmul(X, s->H, Y);
+		return;
+	}
+
+	Y[0] = Y[1] = Y[2] = Y[3] = 0;
+	for(i=0; i<16; i++){
+		Xi = s->M[i][(X[i>>2]>>((i&3)<<3))&0xFF];
+		Y[0] ^= Xi[0];
+		Y[1] ^= Xi[1];
+		Y[2] ^= Xi[2];
+		Y[3] ^= Xi[3];
+	}
+}
+
+static void
+ghashn(AESGCMstate *s, uchar *dat, ulong len, ulong Y[4])
+{
+	uchar tmp[16];
+	ulong X[4];
+
+	while(len >= 16){
+		load128(dat, X);
+		ghash1(s, X, Y);
+		dat += 16, len -= 16;
+	}
+	if(len > 0){
+		memmove(tmp, dat, len);
+		memset(tmp+len, 0, 16-len);
+		load128(tmp, X);
+		ghash1(s, X, Y);
+	}
+}
+
+static ulong
+aesxctr1(AESstate *s, uchar ctr[AESbsize], uchar *dat, ulong len)
+{
+	uchar tmp[AESbsize];
+	ulong i;
+
+	aes_encrypt(s->ekey, s->rounds, ctr, tmp);
+	if(len > AESbsize)
+		len = AESbsize;
+	for(i=0; i<len; i++)
+		dat[i] ^= tmp[i];
+	return len;
+}
+
+static void
+aesxctrn(AESstate *s, uchar *dat, ulong len)
+{
+	uchar ctr[AESbsize];
+	ulong i;
+
+	memmove(ctr, s->ivec, AESbsize);
+	while(len > 0){
+		for(i=AESbsize-1; i>=AESbsize-4; i--)
+			if(++ctr[i] != 0)
+				break;
+
+		if(aesxctr1(s, ctr, dat, len) < AESbsize)
+			break;
+		dat += AESbsize;
+		len -= AESbsize;
+	}
+}
+
+void
+aesgcm_setiv(AESGCMstate *s, uchar *iv, int ivlen)
+{
+	if(ivlen == 96/8){
+		memmove(s->ivec, iv, ivlen);
+		memset(s->ivec+ivlen, 0, AESbsize-ivlen);
+		s->ivec[AESbsize-1] = 1;
+	} else {
+		ulong L[4], Y[4] = {0};
+
+		ghashn(s, iv, ivlen, Y);
+		L[0] = ivlen << 3;
+		L[1] = ivlen >> 29;
+		L[2] = L[3] = 0;
+		ghash1(s, L, Y);
+		store128(Y, s->ivec);
+	}
+}
+
+void
+setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
+{
+	setupAESstate(s, key, keylen, nil);
+
+	memset(s->ivec, 0, AESbsize);
+	aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+	load128(s->ivec, s->H);
+	memset(s->ivec, 0, AESbsize);
+	prepareM(s->H, s->M);
+
+	if(iv != nil && ivlen > 0)
+		aesgcm_setiv(s, iv, ivlen);
+}
+
+void
+aesgcm_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s)
+{
+	ulong L[4], Y[4] = {0};
+
+	ghashn(s, aad, naad, Y);
+	aesxctrn(s, dat, ndat);
+	ghashn(s, dat, ndat, Y);
+	L[0] = ndat << 3;
+	L[1] = ndat >> 29;
+	L[2] = naad << 3;
+	L[3] = naad >> 29;
+	ghash1(s, L, Y);
+	store128(Y, tag);
+	aesxctr1(s, s->ivec, tag, 16);
+}
+
+int
+aesgcm_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], AESGCMstate *s)
+{
+	ulong L[4], Y[4] = {0};
+	uchar tmp[16];
+
+	ghashn(s, aad, naad, Y);
+	ghashn(s, dat, ndat, Y);
+	L[0] = ndat << 3;
+	L[1] = ndat >> 29;
+	L[2] = naad << 3;
+	L[3] = naad >> 29;
+	ghash1(s, L, Y);
+	store128(Y, tmp);
+	aesxctr1(s, s->ivec, tmp, 16);
+	if(tsmemcmp(tag, tmp, 16) != 0)
+		return -1;
+	aesxctrn(s, dat, ndat);
+	return 0;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aes_xts.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,85 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/* little-endian data order */
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+
+static void
+gf_mulx(uchar *x)
+{
+	ulong t0, t1, t2, t3, t4;
+
+	t0 = GET4(x);
+	t1 = GET4(x+4);
+	t2 = GET4(x+8);
+	t3 = GET4(x+12);
+
+	t4 =             (t3 >> 31);
+	t3 = (t3 << 1) | (t2 >> 31);
+	t2 = (t2 << 1) | (t1 >> 31);
+	t1 = (t1 << 1) | (t0 >> 31);
+	t0 = (t0 << 1) ^ (t4*135);
+
+	PUT4(x, t0);
+	PUT4(x+4, t1);
+	PUT4(x+8, t2);
+	PUT4(x+12, t3);
+}
+
+static void
+xor128(uchar *o, uchar *i1, uchar *i2)
+{
+	int i;
+
+	for(i=0; i<16; i++)
+		o[i] = i1[i] ^ i2[i];
+}
+
+static void
+setupT(AESstate *tweak, uvlong sectorNumber, uchar T[AESbsize])
+{
+	PUT4(T+0, (ulong)sectorNumber), sectorNumber >>= 32;
+	PUT4(T+4, (ulong)sectorNumber);
+	PUT4(T+8, 0);
+	PUT4(T+12, 0);
+	aes_encrypt(tweak->ekey, tweak->rounds, T, T);
+}
+
+void
+aes_xts_encrypt(AESstate *tweak, AESstate *ecb,
+	uvlong sectorNumber, uchar *input, uchar *output, ulong len)
+{
+	uchar T[AESbsize], x[AESbsize];
+	
+	if(len % AESbsize)
+		abort();
+
+	setupT(tweak, sectorNumber, T);
+	for (; len > 0; len -= AESbsize, input += AESbsize, output += AESbsize) {
+		xor128(x, input, T);
+		aes_encrypt(ecb->ekey, ecb->rounds, x, x);
+		xor128(output, x, T);
+		gf_mulx(T);
+	}
+}
+
+void
+aes_xts_decrypt(AESstate *tweak, AESstate *ecb,
+	uvlong sectorNumber, uchar *input, uchar *output, ulong len)
+{
+	uchar T[AESbsize], x[AESbsize];
+	
+	if(len % AESbsize)
+		abort();
+
+	setupT(tweak, sectorNumber, T);
+	for (; len > 0; len -= AESbsize, input += AESbsize, output += AESbsize) {
+		xor128(x, input, T);
+		aes_decrypt(ecb->dkey, ecb->rounds, x, x);
+		xor128(output, x, T);
+		gf_mulx(T);
+	}
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesgcmtest.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,314 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+typedef struct Test Test;
+struct Test
+{
+	char *K;
+	char *P;
+	char *A;
+	char *IV;
+	char *T;
+};
+
+Test tests[] = {
+	{	/* Test Case 1 */
+		"00000000000000000000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"58E2FCCEFA7E3061367F1D57A4E7455A"
+	},
+	{	/* Test Case 2 */
+		"00000000000000000000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"AB6E47D42CEC13BDF53A67B21257BDDF",
+	},
+	{	/* Test Case 3 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"4D5C2AF327CD64A62CF35ABD2BA6FAB4"
+	},
+	{	/* Test Case 4 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"5BC94FBC3221A5DB94FAE95AE7121A47"
+	},
+	{	/* Test Case 5 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"3612D2E79E3B0785561BE14AACA2FCCB"
+	},
+	{	/* Test Case 6 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"619CC5AEFFFE0BFA462AF43C1699D050"
+	},
+	{	/* Test Case 7 */
+		"00000000000000000000000000000000"
+		"0000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"CD33B28AC773F74BA00ED1F312572435"
+	},
+	{	/* Test Case 8 */
+		"00000000000000000000000000000000"
+		"0000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"2FF58D80033927AB8EF4D4587514F0FB"
+	},
+	{	/* Test Case 9 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"9924A7C8587336BFB118024DB8674A14"
+	},
+	{	/* Test Case 10 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"2519498E80F1478F37BA55BD6D27618C"
+	},
+	{	/* Test Case 11 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"65DCC57FCF623A24094FCCA40D3533F8"
+	},
+	{	/* Test Case 12 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"DCF566FF291C25BBB8568FC3D376A6D9"
+	},
+	{	/* Test Case 13 */
+		"00000000000000000000000000000000"
+		"00000000000000000000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"530F8AFBC74536B9A963B4F1C4CB738B"
+	},
+	{	/* Test Case 14 */
+		"00000000000000000000000000000000"
+		"00000000000000000000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"D0D1C8A799996BF0265B98B5D48AB919"
+	},
+	{	/* Test Case 15 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"B094DAC5D93471BDEC1A502270E3CC6C"
+	},
+	{	/* Test Case 16 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"76FC6ECE0F4E1768CDDF8853BB2D551B"
+	},
+	{	/* Test Case 17 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"3A337DBF46A792C45E454913FE2EA8F2"
+	},
+	{	/* Test Case 18 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"A44A8266EE1C8EB0C8B5D4CF5AE9F19A"
+	},
+};
+
+int
+parsehex(char *s, uchar *h, char *l)
+{
+	char *e;
+	mpint *m;
+	int n;
+
+	n = strlen(s);
+	if(n == 0)
+		return 0;
+	assert((n & 1) == 0);
+	n >>= 1;
+	e = nil;
+	m = strtomp(s, &e, 16, nil);
+	if(m == nil || *e != '\0')
+		abort();
+	mptober(m, h, n);
+	if(l != nil)
+		print("%s = %.*H\n", l, n, h);
+	return n;
+}
+
+void
+runtest(Test *t)
+{
+	AESGCMstate s;
+	uchar key[1024], plain[1024], aad[1024], iv[1024], tag[16], tmp[16];
+	int nkey, nplain, naad, niv;
+
+	nkey = parsehex(t->K, key, "K");
+	nplain = parsehex(t->P, plain, "P");
+	naad = parsehex(t->A, aad, "A");
+	niv = parsehex(t->IV, iv, "IV");
+
+	setupAESGCMstate(&s, key, nkey, iv, niv);
+	aesgcm_encrypt(plain, nplain, aad, naad, tag, &s);
+	print("C = %.*H\n", nplain, plain);
+	print("T = %.*H\n", 16, tag);
+
+	parsehex(t->T, tmp, nil);
+	assert(memcmp(tmp, tag, 16) == 0);
+}
+
+void
+perftest(void)
+{
+	AESGCMstate s;
+	static uchar zeros[16];
+	uchar buf[1024*1024], tag[16];
+	vlong now;
+	int i, delta;
+
+	now = nsec();
+	for(i=0; i<100; i++){
+		memset(buf, 0, sizeof(buf));
+		if(1){
+			setupAESGCMstate(&s, zeros, 16, zeros, 12);
+			aesgcm_encrypt(buf, sizeof(buf), nil, 0, tag, &s);
+		} else {
+			setupAESstate(&s, zeros, 16, zeros);
+			aesCBCencrypt(buf, sizeof(buf), &s);
+		}
+	}
+	delta = (nsec() - now) / 1000000000LL;
+	fprint(2, "%ds = %d/s\n", delta, i*sizeof(buf) / delta);
+}
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	fmtinstall('H', encodefmt);
+
+	ARGBEGIN {
+	case 'p':
+		perftest();
+		exits(nil);
+	} ARGEND;
+
+	for(i=0; i<nelem(tests); i++){
+		print("Test Case %d\n", i+1);
+		runtest(&tests[i]);
+		print("\n");
+	}
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/aesni.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,5 @@
+void*
+aesni_init(void)
+{
+	return 0;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/bftest.c	Sat Jun 26 21:21:40 2021
@@ -0,0 +1,279 @@
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+
+enum{
+	Bsz = 8,
+};
+
+typedef struct Testvector Testvector;
+
+struct Testvector{
+	uchar key[Bsz];
+	uchar plain[Bsz];
+	uchar cipher[Bsz];
+};
+
+/*
+ * Blowfish test vectors from https://www.schneier.com/code/vectors.txt
+ */
+Testvector vector [] = {
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
+
+	{{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x51, 0x86, 0x6F, 0xD5, 0xB8, 0x5E, 0xCB, 0x8A}},
+
+	{{0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+	 {0x7D, 0x85, 0x6F, 0x9A, 0x61, 0x30, 0x63, 0xF2}},
+
+	{{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x24, 0x66, 0xDD, 0x87, 0x8B, 0x96, 0x3C, 0x9D}},
+
+	{{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x61, 0xF9, 0xC3, 0x80, 0x22, 0x81, 0xB0, 0x96}},
+
+	{{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x7D, 0x0C, 0xC6, 0x30, 0xAF, 0xDA, 0x1E, 0xC7}},
+
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
+
+	{{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x0A, 0xCE, 0xAB, 0x0F, 0xC6, 0xA0, 0xA2, 0x8D}},
+
+	{{0x7C, 0xA1, 0x10, 0x45, 0x4A, 0x1A, 0x6E, 0x57},
+	 {0x01, 0xA1, 0xD6, 0xD0, 0x39, 0x77, 0x67, 0x42},
+	 {0x59, 0xC6, 0x82, 0x45, 0xEB, 0x05, 0x28, 0x2B}},
+
+	{{0x01, 0x31, 0xD9, 0x61, 0x9D, 0xC1, 0x37, 0x6E},
+	 {0x5C, 0xD5, 0x4C, 0xA8, 0x3D, 0xEF, 0x57, 0xDA},
+	 {0xB1, 0xB8, 0xCC, 0x0B, 0x25, 0x0F, 0x09, 0xA0}},
+
+	{{0x07, 0xA1, 0x13, 0x3E, 0x4A, 0x0B, 0x26, 0x86},
+	 {0x02, 0x48, 0xD4, 0x38, 0x06, 0xF6, 0x71, 0x72},
+	 {0x17, 0x30, 0xE5, 0x77, 0x8B, 0xEA, 0x1D, 0xA4}},
+
+	{{0x38, 0x49, 0x67, 0x4C, 0x26, 0x02, 0x31, 0x9E},
+	 {0x51, 0x45, 0x4B, 0x58, 0x2D, 0xDF, 0x44, 0x0A},
+	 {0xA2, 0x5E, 0x78, 0x56, 0xCF, 0x26, 0x51, 0xEB}},
+
+	{{0x04, 0xB9, 0x15, 0xBA, 0x43, 0xFE, 0xB5, 0xB6},
+	 {0x42, 0xFD, 0x44, 0x30, 0x59, 0x57, 0x7F, 0xA2},
+	 {0x35, 0x38, 0x82, 0xB1, 0x09, 0xCE, 0x8F, 0x1A}},
+
+	{{0x01, 0x13, 0xB9, 0x70, 0xFD, 0x34, 0xF2, 0xCE},
+	 {0x05, 0x9B, 0x5E, 0x08, 0x51, 0xCF, 0x14, 0x3A},
+	 {0x48, 0xF4, 0xD0, 0x88, 0x4C, 0x37, 0x99, 0x18}},
+
+	{{0x01, 0x70, 0xF1, 0x75, 0x46, 0x8F, 0xB5, 0xE6},
+	 {0x07, 0x56, 0xD8, 0xE0, 0x77, 0x47, 0x61, 0xD2},
+	 {0x43, 0x21, 0x93, 0xB7, 0x89, 0x51, 0xFC, 0x98}},
+
+	{{0x43, 0x29, 0x7F, 0xAD, 0x38, 0xE3, 0x73, 0xFE},
+	 {0x76, 0x25, 0x14, 0xB8, 0x29, 0xBF, 0x48, 0x6A},
+	 {0x13, 0xF0, 0x41, 0x54, 0xD6, 0x9D, 0x1A, 0xE5}},
+
+	{{0x07, 0xA7, 0x13, 0x70, 0x45, 0xDA, 0x2A, 0x16},
+	 {0x3B, 0xDD, 0x11, 0x90, 0x49, 0x37, 0x28, 0x02},
+	 {0x2E, 0xED, 0xDA, 0x93, 0xFF, 0xD3, 0x9C, 0x79}},
+
+	{{0x04, 0x68, 0x91, 0x04, 0xC2, 0xFD, 0x3B, 0x2F},
+	 {0x26, 0x95, 0x5F, 0x68, 0x35, 0xAF, 0x60, 0x9A},
+	 {0xD8, 0x87, 0xE0, 0x39, 0x3C, 0x2D, 0xA6, 0xE3}},
+
+	{{0x37, 0xD0, 0x6B, 0xB5, 0x16, 0xCB, 0x75, 0x46},
+	 {0x16, 0x4D, 0x5E, 0x40, 0x4F, 0x27, 0x52, 0x32},
+	 {0x5F, 0x99, 0xD0, 0x4F, 0x5B, 0x16, 0x39, 0x69}},
+
+	{{0x1F, 0x08, 0x26, 0x0D, 0x1A, 0xC2, 0x46, 0x5E},
+	 {0x6B, 0x05, 0x6E, 0x18, 0x75, 0x9F, 0x5C, 0xCA},
+	 {0x4A, 0x05, 0x7A, 0x3B, 0x24, 0xD3, 0x97, 0x7B}},
+
+	{{0x58, 0x40, 0x23, 0x64, 0x1A, 0xBA, 0x61, 0x76},
+	 {0x00, 0x4B, 0xD6, 0xEF, 0x09, 0x17, 0x60, 0x62},
+	 {0x45, 0x20, 0x31, 0xC1, 0xE4, 0xFA, 0xDA, 0x8E}},
+
+	{{0x02, 0x58, 0x16, 0x16, 0x46, 0x29, 0xB0, 0x07},
+	 {0x48, 0x0D, 0x39, 0x00, 0x6E, 0xE7, 0x62, 0xF2},
+	 {0x75, 0x55, 0xAE, 0x39, 0xF5, 0x9B, 0x87, 0xBD}},
+
+	{{0x49, 0x79, 0x3E, 0xBC, 0x79, 0xB3, 0x25, 0x8F},
+	 {0x43, 0x75, 0x40, 0xC8, 0x69, 0x8F, 0x3C, 0xFA},
+	 {0x53, 0xC5, 0x5F, 0x9C, 0xB4, 0x9F, 0xC0, 0x19}},
+
+	{{0x4F, 0xB0, 0x5E, 0x15, 0x15, 0xAB, 0x73, 0xA7},
+	 {0x07, 0x2D, 0x43, 0xA0, 0x77, 0x07, 0x52, 0x92},
+	 {0x7A, 0x8E, 0x7B, 0xFA, 0x93, 0x7E, 0x89, 0xA3}},
+
+	{{0x49, 0xE9, 0x5D, 0x6D, 0x4C, 0xA2, 0x29, 0xBF},
+	 {0x02, 0xFE, 0x55, 0x77, 0x81, 0x17, 0xF1, 0x2A},
+	 {0xCF, 0x9C, 0x5D, 0x7A, 0x49, 0x86, 0xAD, 0xB5}},
+
+	{{0x01, 0x83, 0x10, 0xDC, 0x40, 0x9B, 0x26, 0xD6},
+	 {0x1D, 0x9D, 0x5C, 0x50, 0x18, 0xF7, 0x28, 0xC2},
+	 {0xD1, 0xAB, 0xB2, 0x90, 0x65, 0x8B, 0xC7, 0x78}},
+
+	{{0x1C, 0x58, 0x7F, 0x1C, 0x13, 0x92, 0x4F, 0xEF},
+	 {0x30, 0x55, 0x32, 0x28, 0x6D, 0x6F, 0x29, 0x5A},
+	 {0x55, 0xCB, 0x37, 0x74, 0xD1, 0x3E, 0xF2, 0x01}},
+
+	{{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xFA, 0x34, 0xEC, 0x48, 0x47, 0xB2, 0x68, 0xB2}},
+
+	{{0x1F, 0x1F, 0x1F, 0x1F, 0x0E, 0x0E, 0x0E, 0x0E},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xA7, 0x90, 0x79, 0x51, 0x08, 0xEA, 0x3C, 0xAE}},
+
+	{{0xE0, 0xFE, 0xE0, 0xFE, 0xF1, 0xFE, 0xF1, 0xFE},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xC3, 0x9E, 0x07, 0x2D, 0x9F, 0xAC, 0x63, 0x1D}},
+
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x01, 0x49, 0x33, 0xE0, 0xCD, 0xAF, 0xF6, 0xE4}},
+
+	{{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0xF2, 0x1E, 0x9A, 0x77, 0xB7, 0x1C, 0x49, 0xBC}},
+
+	{{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x24, 0x59, 0x46, 0x88, 0x57, 0x54, 0x36, 0x9A}},
+
+	{{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x6B, 0x5C, 0x5A, 0x9C, 0x5D, 0x9E, 0x0A, 0x5A}}
+};
+
+uchar CBCkey[16] = { 
+	 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 
+	 0xF0, 0xE1, 0xD2, 0xC3, 0xB4, 0xA5, 0x96, 0x87
+};
+
+uchar CBCiv[8] = {
+	 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10
+};
+
+uchar CBCdata[29] = {
+	 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, 
+	 0x4E, 0x6F, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 
+	 0x68, 0x65, 0x20, 0x74, 0x69, 0x6D, 0x65, 0x20, 
+	 0x66, 0x6F, 0x72, 0x20, 0x00
+};
+
+uchar CBCcipher[32] = {
+	 0x6B, 0x77, 0xB4, 0xD6, 0x30, 0x06, 0xDE, 0xE6,
+	 0x05, 0xB1, 0x56, 0xE2, 0x74, 0x03, 0x97, 0x93,
+	 0x58, 0xDE, 0xB9, 0xE7, 0x15, 0x46, 0x16, 0xD9,
+	 0x59, 0xF1, 0x65, 0x2B, 0xD5, 0xFF, 0x92, 0xCC
+};
+
+int
+testECB(Testvector *t)
+{
+	BFstate s;
+	int i;
+	uchar aux[Bsz];
+
+	memcpy(aux, t->plain, Bsz);
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, t->key, Bsz, nil);
+	bfECBencrypt(aux, Bsz, &s);
+
+	if(memcmp(aux, t->cipher, Bsz) != 0){
+		fprint(2, "ECB encrypt failed, ciphertext is:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", t->cipher[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, t->key, Bsz, nil);
+	bfECBdecrypt(aux, Bsz, &s);
+
+	if(memcmp(aux, t->plain, Bsz) != 0){
+		fprint(2, "ECB decrypt failed, plaintext is:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", t->plain[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+	return 0;
+}
+
+int
+testCBC(void)
+{
+	BFstate s;
+	uchar aux[32];
+	int i;
+
+	memset(aux, 0 , sizeof(aux));
+	memcpy(aux, CBCdata, sizeof(CBCdata));
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, CBCkey, sizeof(CBCkey), CBCiv);
+	bfCBCencrypt(aux, 32, &s);
+	
+	if(memcmp(aux, CBCcipher, sizeof(CBCcipher)) != 0){
+		fprint(2, "CBC encrypt failed, ciphertext is:\n");
+		for(i = 0; i < sizeof(aux); i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < sizeof(CBCcipher); i++)
+			fprint(2, "%02X", CBCcipher[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, CBCkey, sizeof(CBCkey), CBCiv);
+	bfCBCdecrypt(aux, 32, &s);
+
+	if(memcmp(aux, CBCdata, sizeof(CBCdata)) != 0){
+		fprint(2, "CBC decrypt failed, plaintext is:\n");
+		for(i = 0; i < sizeof(aux); i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < sizeof(CBCdata); i++)
+			fprint(2, "%02X", CBCdata[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	if(argc != 1)
+		sysfatal("usage: %s", argv[0]);
+
+	for(i=0; i < nelem(vector); i++)
+		if(testECB(&vector[i]) < 0)
+			sysfatal("TestECB %d failed", i);
+
+	if(testCBC() < 0)
+		sysfatal("TestCBC failed");
+	exits(nil);
+}
--- sys/src/libsec/port/blowfish.c	Mon Mar 18 16:47:47 2002
+++ /sys/src/libsec/port/blowfish.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -13,6 +14,37 @@
 static void bfencrypt(u32int *, BFstate *);
 static void bfdecrypt(u32int *, BFstate *);
 
+/*
+ * Endianess agnostic functions to convert a 
+ * block (8-byte buffer) to a u32int array and 
+ * viceversa.
+ */
+
+static void
+buf2ints(uchar *p, u32int *b)
+{
+	b[0] =  p[0]<<24 | p[1]<<16  | p[2]<<8 | p[3];
+	b[1] =  p[4]<<24 | p[5]<<16  | p[6]<<8 | p[7];
+}
+
+static void
+ints2buf(u32int *b, uchar *p)
+{
+	u32int u;
+
+	u = b[0];
+	p[0] = u>>24;
+	p[1] = u>>16;
+	p[2] = u>>8;
+	p[3] = u;
+
+	u = b[1];
+	p[4] = u>>24;
+	p[5] = u>>16;
+	p[6] = u>>8;
+	p[7] = u;
+}
+
 void
 setupBFstate(BFstate *s, uchar key[], int keybytes, uchar *ivec)
 {
@@ -31,7 +63,7 @@
 		memmove(s->ivec, ivec, sizeof(s->ivec));
 	else
 		memset(s->ivec, 0, sizeof(s->ivec));
-		
+
 	memmove(s->pbox, pbox, sizeof(pbox));
 	memmove(s->sbox, sbox, sizeof(sbox));
 
@@ -76,17 +108,13 @@
 bfCBCencrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	uchar *p;
-	u32int bo[2], bi[2], b;
+	u32int bo[2], bi[2];
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32int) s->ivec[1]<<8) | ((u32int)s->ivec[2]<<16) | ((u32int)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32int) s->ivec[5]<<8) | ((u32int)s->ivec[6]<<16) | ((u32int)s->ivec[7]<<24);
-
+	buf2ints(s->ivec, bo);
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		buf2ints(buf, bi);
 
 		bi[0] ^= bo[0];
 		bi[1] ^= bo[1];
@@ -96,36 +124,9 @@
 		bo[0] = bi[0];
 		bo[1] = bi[1];
 
-		p = buf;
-		b = bo[0];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-
-		b = bo[1];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p = b;
+		ints2buf(bi, buf);
 	}
-
-	s->ivec[7] = bo[1] >> 24;
-	s->ivec[6] = bo[1] >> 16;
-	s->ivec[5] = bo[1] >> 8;
-	s->ivec[4] = bo[1];
-
-	s->ivec[3] = bo[0] >> 24;
-	s->ivec[2] = bo[0] >> 16;
-	s->ivec[1] = bo[0] >> 8;
-	s->ivec[0] = bo[0];
-
+	ints2buf(bo, s->ivec);
 	return;
 }
 
@@ -133,17 +134,13 @@
 bfCBCdecrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	uchar *p;
-	u32int b, bo[2], bi[2], xr[2];
+	u32int  bo[2], bi[2], xr[2];
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32int) s->ivec[1]<<8) | ((u32int)s->ivec[2]<<16) | ((u32int)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32int) s->ivec[5]<<8) | ((u32int)s->ivec[6]<<16) | ((u32int)s->ivec[7]<<24);
-
+	buf2ints(s->ivec, bo);
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		buf2ints(buf, bi);
 
 		xr[0] = bi[0];
 		xr[1] = bi[1];
@@ -153,39 +150,12 @@
 		bo[0] ^= bi[0];
 		bo[1] ^= bi[1];
 
-		p = buf;
-		b = bo[0];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-
-		b = bo[1];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p = b;
+		ints2buf(bo, buf);
 
 		bo[0] = xr[0];
 		bo[1] = xr[1];
 	}
-
-	s->ivec[7] = bo[1] >> 24;
-	s->ivec[6] = bo[1] >> 16;
-	s->ivec[5] = bo[1] >> 8;
-	s->ivec[4] = bo[1];
-
-	s->ivec[3] = bo[0] >> 24;
-	s->ivec[2] = bo[0] >> 16;
-	s->ivec[1] = bo[0] >> 8;
-	s->ivec[0] = bo[0];
-
+	ints2buf(bo, s->ivec);
 	return;
 }
 
@@ -196,20 +166,9 @@
 	u32int b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		b[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
-
+		buf2ints(buf, b);
 		bfencrypt(b, s);
-
-		buf[7] = b[1] >> 24;
-		buf[6] = b[1] >> 16;
-		buf[5] = b[1] >> 8;
-		buf[4] = b[1];
-
-		buf[3] = b[0] >> 24;
-		buf[2] = b[0] >> 16;
-		buf[1] = b[0] >> 8;
-		buf[0] = b[0];
+		ints2buf(b, buf);
 	}
 
 	return;
@@ -222,20 +181,9 @@
 	u32int b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		b[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
-
+		buf2ints(buf, b);
 		bfdecrypt(b, s);
-
-		buf[7] = b[1] >> 24;
-		buf[6] = b[1] >> 16;
-		buf[5] = b[1] >> 8;
-		buf[4] = b[1];
-
-		buf[3] = b[0] >> 24;
-		buf[2] = b[0] >> 16;
-		buf[1] = b[0] >> 8;
-		buf[0] = b[0];
+		ints2buf(b, buf);
 	}
 
 	return;		
@@ -575,5 +523,4 @@
 	0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL, 
 	0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L, 
 };
-
 
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/ccpoly.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,92 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+static void
+ccpolyotk(Chachastate *cs, DigestState *ds)
+{
+	uchar otk[ChachaBsize];
+
+	memset(ds, 0, sizeof(*ds));
+	memset(otk, 0, 32);
+	chacha_setblock(cs, 0);
+	chacha_encrypt(otk, ChachaBsize, cs);
+	poly1305(nil, 0, otk, 32, nil, ds);
+}
+
+static void
+ccpolypad(uchar *buf, ulong nbuf, DigestState *ds)
+{
+	static uchar zeros[16] = {0};
+	ulong npad;
+
+	if(nbuf == 0)
+		return;
+	poly1305(buf, nbuf, nil, 0, nil, ds);
+	npad = nbuf % 16;
+	if(npad == 0)
+		return;
+	poly1305(zeros, 16 - npad, nil, 0, nil, ds);
+}
+
+static void
+ccpolylen(ulong n, uchar tag[16], DigestState *ds)
+{
+	uchar info[8];
+
+	info[0] = n;
+	info[1] = n>>8;
+	info[2] = n>>16;
+	info[3] = n>>24;
+	info[4] = 0;
+	info[5] = 0;
+	info[6] = 0;
+	info[7] = 0;
+	poly1305(info, 8, nil, 0, tag, ds);
+}
+
+void
+ccpoly_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs)
+{
+	DigestState ds;
+
+	ccpolyotk(cs, &ds);
+	if(cs->ivwords == 2){
+		poly1305(aad, naad, nil, 0, nil, &ds);
+		ccpolylen(naad, nil, &ds);
+		chacha_encrypt(dat, ndat, cs);
+		poly1305(dat, ndat, nil, 0, nil, &ds);
+		ccpolylen(ndat, tag, &ds);
+	} else {
+		ccpolypad(aad, naad, &ds);
+		chacha_encrypt(dat, ndat, cs);
+		ccpolypad(dat, ndat, &ds);
+		ccpolylen(naad, nil, &ds);
+		ccpolylen(ndat, tag, &ds);
+	}
+}
+
+int
+ccpoly_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs)
+{
+	DigestState ds;
+	uchar tmp[16];
+
+	ccpolyotk(cs, &ds);
+	if(cs->ivwords == 2){
+		poly1305(aad, naad, nil, 0, nil, &ds);
+		ccpolylen(naad, nil, &ds);
+		poly1305(dat, ndat, nil, 0, nil, &ds);
+		ccpolylen(ndat, tmp, &ds);
+	} else {
+		ccpolypad(aad, naad, &ds);
+		ccpolypad(dat, ndat, &ds);
+		ccpolylen(naad, nil, &ds);
+		ccpolylen(ndat, tmp, &ds);
+	}
+	if(tsmemcmp(tag, tmp, 16) != 0)
+		return -1;
+	chacha_encrypt(dat, ndat, cs);
+	return 0;
+}
--- sys/src/libsec/port/chacha.c	Thu Jul  1 17:28:57 2021
+++ /sys/src/libsec/port/chacha.c	Sat Jun 26 21:21:41 2021
@@ -9,36 +9,21 @@
 
 #include <u.h>
 #include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
-enum{
-	Blockwords=	ChachaBsize/sizeof(u32int)
-};
+/* from chachablock.$O */
+extern void _chachablock(u32int x[16], int rounds);
 
 /* little-endian data order */
-#define GET4(p)	((((((p)[3]<<8) | (p)[2])<<8) | (p)[1])<<8 | (p)[0])
-#define PUT4(p, v)	(((p)[0]=v), (v>>=8), ((p)[1]=v), (v>>=8), ((p)[2]=v), (v>>=8), ((p)[3]=v))
-
-#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c))))
-
-#define QUARTERROUND(ia,ib,ic,id) { \
-	u32int a, b, c, d, t;\
-	a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \
-	a += b; t = d^a; d = ROTATE(t,16); \
-	c += d; t = b^c; b = ROTATE(t,12); \
-	a += b; t = d^a; d = ROTATE(t, 8); \
-	c += d; t = b^c; b = ROTATE(t, 7); \
-	x[ia] = a; x[ib] = b; x[ic] = c; x[id] = d; \
-}
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
 
 #define ENCRYPT(s, x, y, d) {\
 	u32int v; \
-	uchar *sp, *dp; \
-	sp = (s); \
-	v = GET4(sp); \
+	v = GET4(s); \
 	v ^= (x)+(y); \
-	dp = (d); \
-	PUT4(dp, v); \
+	PUT4(d, v); \
 }
 
 static uchar sigma[16] = "expand 32-byte k";
@@ -54,10 +39,13 @@
 }
 
 void
-setupChachastate(Chachastate *s, uchar *key, usize keylen, uchar *iv, int rounds)
+setupChachastate(Chachastate *s, uchar *key, ulong keylen, uchar *iv, ulong ivlen, int rounds)
 {
 	if(keylen != 256/8 && keylen != 128/8)
 		sysfatal("invalid chacha key length");
+	if(ivlen != 64/8 && ivlen != 96/8
+	&& ivlen != 128/8 && ivlen != 192/8)	/* hchacha, xchacha */
+		sysfatal("invalid chacha iv length");
 	if(rounds == 0)
 		rounds = 20;
 	s->rounds = rounds;
@@ -69,28 +57,108 @@
 		load(&s->input[4], key, 4);
 		load(&s->input[8], key, 4);
 	}
+	s->xkey[0] = s->input[4];
+	s->xkey[1] = s->input[5];
+	s->xkey[2] = s->input[6];
+	s->xkey[3] = s->input[7];
+	s->xkey[4] = s->input[8];
+	s->xkey[5] = s->input[9];
+	s->xkey[6] = s->input[10];
+	s->xkey[7] = s->input[11];
+
+	s->ivwords = ivlen/4;
 	s->input[12] = 0;
+	s->input[13] = 0;
 	if(iv == nil){
-		s->input[13] = 0;
 		s->input[14] = 0;
 		s->input[15] = 0;
 	}else
-		load(&s->input[13], iv, 3);
+		chacha_setiv(s, iv);
+}
+
+static void
+hchachablock(uchar h[32], Chachastate *s)
+{
+	u32int x[16];
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	_chachablock(x, s->rounds);
+
+	PUT4(h+0*4, x[0]);
+	PUT4(h+1*4, x[1]);
+	PUT4(h+2*4, x[2]);
+	PUT4(h+3*4, x[3]);
+	PUT4(h+4*4, x[12]);
+	PUT4(h+5*4, x[13]);
+	PUT4(h+6*4, x[14]);
+	PUT4(h+7*4, x[15]);
 }
 
 void
-chacha_setblock(Chachastate *s, u32int blockno)
+chacha_setiv(Chachastate *s, uchar *iv)
+{
+	if(s->ivwords == 192/32){
+		/* xchacha with 192-bit iv */
+		u32int counter[2];
+		uchar h[32];
+
+		s->input[4] = s->xkey[0];
+		s->input[5] = s->xkey[1];
+		s->input[6] = s->xkey[2];
+		s->input[7] = s->xkey[3];
+		s->input[8] = s->xkey[4];
+		s->input[9] = s->xkey[5];
+		s->input[10] = s->xkey[6];
+		s->input[11] = s->xkey[7];
+
+		counter[0] = s->input[12];
+		counter[1] = s->input[13];
+
+		load(&s->input[12], iv, 4);
+
+		hchachablock(h, s);
+		load(&s->input[4], h, 8);
+		memset(h, 0, 32);
+
+		s->input[12] = counter[0];
+		s->input[13] = counter[1];
+
+		load(&s->input[14], iv+16, 2);
+		return;
+	}
+	load(&s->input[16 - s->ivwords], iv, s->ivwords);
+}
+
+void
+chacha_setblock(Chachastate *s, u64int blockno)
 {
 	s->input[12] = blockno;
+	if(s->ivwords != 3)
+		s->input[13] = blockno>>32;
 }
 
 static void
 encryptblock(Chachastate *s, uchar *src, uchar *dst)
 {
-	u32int x[Blockwords];
-	int i, rounds;
+	u32int x[16];
+	int i;
 
-	rounds = s->rounds;
 	x[0] = s->input[0];
 	x[1] = s->input[1];
 	x[2] = s->input[2];
@@ -107,37 +175,8 @@
 	x[13] = s->input[13];
 	x[14] = s->input[14];
 	x[15] = s->input[15];
+	_chachablock(x, s->rounds);
 
-	for(i = rounds; i > 0; i -= 2) {
-		QUARTERROUND(0, 4, 8,12)
-		QUARTERROUND(1, 5, 9,13)
-		QUARTERROUND(2, 6,10,14)
-		QUARTERROUND(3, 7,11,15)
-
-		QUARTERROUND(0, 5,10,15)
-		QUARTERROUND(1, 6,11,12)
-		QUARTERROUND(2, 7, 8,13)
-		QUARTERROUND(3, 4, 9,14)
-	}
-
-#ifdef FULL_UNROLL
-	ENCRYPT(src+0*4, x[0], s->input[0], dst+0*4);
-	ENCRYPT(src+1*4, x[1], s->input[1], dst+1*4);
-	ENCRYPT(src+2*4, x[2], s->input[2], dst+2*4);
-	ENCRYPT(src+3*4, x[3], s->input[3], dst+3*4);
-	ENCRYPT(src+4*4, x[4], s->input[4], dst+4*4);
-	ENCRYPT(src+5*4, x[5], s->input[5], dst+5*4);
-	ENCRYPT(src+6*4, x[6], s->input[6], dst+6*4);
-	ENCRYPT(src+7*4, x[7], s->input[7], dst+7*4);
-	ENCRYPT(src+8*4, x[8], s->input[8], dst+8*4);
-	ENCRYPT(src+9*4, x[9], s->input[9], dst+9*4);
-	ENCRYPT(src+10*4, x[10], s->input[10], dst+10*4);
-	ENCRYPT(src+11*4, x[11], s->input[11], dst+11*4);
-	ENCRYPT(src+12*4, x[12], s->input[12], dst+12*4);
-	ENCRYPT(src+13*4, x[13], s->input[13], dst+13*4);
-	ENCRYPT(src+14*4, x[14], s->input[14], dst+14*4);
-	ENCRYPT(src+15*4, x[15], s->input[15], dst+15*4);
-#else
 	for(i=0; i<nelem(x); i+=4){
 		ENCRYPT(src, x[i], s->input[i], dst);
 		ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
@@ -146,13 +185,13 @@
 		src += 16;
 		dst += 16;
 	}
-#endif
 
-	s->input[12]++;
+	if(++s->input[12] == 0 && s->ivwords != 3)
+		s->input[13]++;
 }
 
 void
-chacha_encrypt2(uchar *src, uchar *dst, usize bytes, Chachastate *s)
+chacha_encrypt2(uchar *src, uchar *dst, ulong bytes, Chachastate *s)
 {
 	uchar tmp[ChachaBsize];
 
@@ -169,7 +208,17 @@
 }
 
 void
-chacha_encrypt(uchar *buf, usize bytes, Chachastate *s)
+chacha_encrypt(uchar *buf, ulong bytes, Chachastate *s)
 {
 	chacha_encrypt2(buf, buf, bytes, s);
+}
+
+void
+hchacha(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds)
+{
+	Chachastate s[1];
+
+	setupChachastate(s, key, keylen, nonce, 16, rounds);
+	hchachablock(h, s);
+	memset(s, 0, sizeof(s));
 }
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/chachablock.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,31 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+
+#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c))))
+
+#define QUARTERROUND(ia,ib,ic,id) { \
+	u32int a, b, c, d, t; \
+	a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \
+	a += b; t = d^a; d = ROTATE(t,16); \
+	c += d; t = b^c; b = ROTATE(t,12); \
+	a += b; t = d^a; d = ROTATE(t, 8); \
+	c += d; t = b^c; b = ROTATE(t, 7); \
+	x[ia] = a; x[ib] = b; x[ic] = c; x[id] = d; \
+}
+
+void
+_chachablock(u32int x[16], int rounds)
+{
+	for(; rounds > 0; rounds -= 2) {
+		QUARTERROUND(0, 4, 8,12)
+		QUARTERROUND(1, 5, 9,13)
+		QUARTERROUND(2, 6,10,14)
+		QUARTERROUND(3, 7,11,15)
+
+		QUARTERROUND(0, 5,10,15)
+		QUARTERROUND(1, 6,11,12)
+		QUARTERROUND(2, 7, 8,13)
+		QUARTERROUND(3, 4, 9,14)
+	}
+}
--- sys/src/libsec/port/chachatest.c	Thu Jul  1 17:28:57 2021
+++ /sys/src/libsec/port/chachatest.c	Sat Jun 26 21:21:41 2021
@@ -31,7 +31,7 @@
 char	rfctext[] = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, "
 	"sunscreen would be it.";
 uchar	rfcout[3*ChachaBsize];
-uchar	rfcref[3*ChachaBsize] = {
+uchar	rfcref[] = {
 	0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80, 0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81,
 	0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2, 0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b,
 	0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab, 0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57,
@@ -42,10 +42,66 @@
 	0x87, 0x4d
 };
 
+uchar	xcckey[] = {
+	0x1b, 0x27, 0x55, 0x64, 0x73, 0xe9, 0x85, 0xd4, 0x62, 0xcd, 0x51, 0x19, 0x7a, 0x9a, 0x46, 0xc7,
+	0x60, 0x09, 0x54, 0x9e, 0xac, 0x64, 0x74, 0xf2, 0x06, 0xc4, 0xee, 0x08, 0x44, 0xf6, 0x83, 0x89,
+};
+uchar	xcciv[] = {
+	0x69, 0x69, 0x6e, 0xe9, 0x55, 0xb6, 0x2b, 0x73, 0xcd, 0x62, 0xbd, 0xa8, 0x75, 0xfc, 0x73, 0xd6,
+	0x82, 0x19, 0xe0, 0x03, 0x6b, 0x7a, 0x0b, 0x37,
+};
+uchar	xccref[] = {
+	0x4f, 0xeb, 0xf2, 0xfe, 0x4b, 0x35, 0x9c, 0x50, 0x8d, 0xc5, 0xe8, 0xb5, 0x98, 0x0c, 0x88, 0xe3,
+	0x89, 0x46, 0xd8, 0xf1, 0x8f, 0x31, 0x34, 0x65, 0xc8, 0x62, 0xa0, 0x87, 0x82, 0x64, 0x82, 0x48,
+	0x01, 0x8d, 0xac, 0xdc, 0xb9, 0x04, 0x17, 0x88, 0x53, 0xa4, 0x6d, 0xca, 0x3a, 0x0e, 0xaa, 0xee,
+	0x74, 0x7c, 0xba, 0x97, 0x43, 0x4e, 0xaf, 0xfa, 0xd5, 0x8f, 0xea, 0x82, 0x22, 0x04, 0x7e, 0x0d,
+	0xe6, 0xc3, 0xa6, 0x77, 0x51, 0x06, 0xe0, 0x33, 0x1a, 0xd7, 0x14, 0xd2, 0xf2, 0x7a, 0x55, 0x64,
+	0x13, 0x40, 0xa1, 0xf1, 0xdd, 0x9f, 0x94, 0x53, 0x2e, 0x68, 0xcb, 0x24, 0x1c, 0xbd, 0xd1, 0x50,
+	0x97, 0x0d, 0x14, 0xe0, 0x5c, 0x5b, 0x17, 0x31, 0x93, 0xfb, 0x14, 0xf5, 0x1c, 0x41, 0xf3, 0x93,
+	0x83, 0x5b, 0xf7, 0xf4, 0x16, 0xa7, 0xe0, 0xbb, 0xa8, 0x1f, 0xfb, 0x8b, 0x13, 0xaf, 0x0e, 0x21,
+	0x69, 0x1d, 0x7e, 0xce, 0xc9, 0x3b, 0x75, 0xe6, 0xe4, 0x18, 0x3a,
+};
+
+uchar	ccpaad[] = {
+	0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+};
+uchar	ccpkey[] = {
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+};
+uchar	ccpiv[] = {
+	0x07, 0x00, 0x00, 0x00,  
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+};
+uchar	ccptag[] = {
+	0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91,
+};
+
+uchar	ccp64aad[] = {
+	0x87, 0xe2, 0x29, 0xd4, 0x50, 0x08, 0x45, 0xa0, 0x79, 0xc0,
+};
+uchar	ccp64key[] = {
+	0x42, 0x90, 0xbc, 0xb1, 0x54, 0x17, 0x35, 0x31, 0xf3, 0x14, 0xaf, 0x57, 0xf3, 0xbe, 0x3b, 0x50,
+	0x06, 0xda, 0x37, 0x1e, 0xce, 0x27, 0x2a, 0xfa, 0x1b, 0x5d, 0xbd, 0xd1, 0x10, 0x0a, 0x10, 0x07,
+};
+uchar	ccp64iv[] = {
+	0xcd, 0x7c, 0xf6, 0x7b, 0xe3, 0x9c, 0x79, 0x4a,
+};
+uchar	ccp64inp[] = {
+	0x86, 0xd0, 0x99, 0x74, 0x84, 0x0b, 0xde, 0xd2, 0xa5, 0xca, 
+};
+uchar	ccp64out[] = {
+	0xe3, 0xe4, 0x46, 0xf7, 0xed, 0xe9, 0xa1, 0x9b, 0x62, 0xa4,
+};
+uchar	ccp64tag[] = {
+	0x67, 0x7d, 0xab, 0xf4, 0xe3, 0xd2, 0x4b, 0x87, 0x6b, 0xb2, 0x84, 0x75, 0x38, 0x96, 0xe1, 0xd6,
+};
+
 void
 main(int argc, char **argv)
 {
 	Chachastate s;
+	uchar tag[16];
 	int n;
 
 	ARGBEGIN{
@@ -54,17 +110,108 @@
 	print("key:\n");
 	printblock(rfckey, sizeof(rfckey));
 	n = strlen(rfctext);
-	setupChachastate(&s, rfckey, sizeof(rfckey), rfcnonce, 0);
+	setupChachastate(&s, rfckey, sizeof(rfckey), rfcnonce, sizeof(rfcnonce), 0);
 	chacha_setblock(&s, rfccount);
 	print("rfc in:\n");
 	printblock((uchar*)rfctext, n);
 	chacha_encrypt2((uchar*)rfctext, rfcout, n, &s);
 	print("rfc out:\n");
 	printblock(rfcout, n);
-	if(memcmp(rfcout, rfcref, sizeof(rfcout)) != 0){
+	if(memcmp(rfcout, rfcref, sizeof(rfcref)) != 0){
+		print("failure of vision\n");
+		exits("wrong");
+	}
+	print("\n");
+
+
+	print("xchacha key:\n");
+	printblock(xcckey, sizeof(xcckey));
+
+	print("xchacha iv:\n");
+	printblock(xcciv, sizeof(xcciv));
+
+	setupChachastate(&s, xcckey, sizeof(xcckey), xcciv, sizeof(xcciv), 20);
+	memset(rfcout, 0, sizeof(xccref));
+	chacha_encrypt(rfcout, sizeof(xccref), &s);
+
+	print("xchacha out:\n");
+	printblock(rfcout, sizeof(xccref));
+	if(memcmp(rfcout, xccref, sizeof(xccref)) != 0){
 		print("failure of vision\n");
 		exits("wrong");
 	}
+	print("\n");
+
+
+	print("ccpoly key:\n");
+	printblock(ccpkey, sizeof(ccpkey));
+
+	print("ccpoly iv:\n");
+	printblock(ccpiv, sizeof(ccpiv));
+
+	setupChachastate(&s, ccpkey, sizeof(ccpkey), ccpiv, sizeof(ccpiv), 20);
+
+	memmove(rfcout, rfctext, sizeof(rfctext)-1);
+	ccpoly_encrypt(rfcout, sizeof(rfctext)-1, ccpaad, sizeof(ccpaad), tag, &s);
+
+	print("ccpoly cipher:\n");
+	printblock(rfcout, sizeof(rfctext)-1);
+
+	print("ccpoly tag:\n");
+	printblock(tag, sizeof(tag));
+
+	if(memcmp(tag, ccptag, sizeof(tag)) != 0){
+		print("bad ccpoly tag\n");
+		exits("wrong");
+	}
+
+	if(ccpoly_decrypt(rfcout, sizeof(rfctext)-1, ccpaad, sizeof(ccpaad), tag, &s) != 0){
+		print("ccpoly decryption failed\n");
+		exits("wrong");
+	}
+
+	if(memcmp(rfcout, rfctext, sizeof(rfctext)-1) != 0){
+		print("ccpoly bad decryption\n");
+		exits("wrong");
+	}
+	print("\n");
+
+
+	print("ccpoly64 key:\n");
+	printblock(ccp64key, sizeof(ccp64key));
+
+	print("ccpoly64 iv:\n");
+	printblock(ccp64iv, sizeof(ccp64iv));
+
+	setupChachastate(&s, ccp64key, sizeof(ccp64key), ccp64iv, sizeof(ccp64iv), 20);
+
+	memmove(rfcout, ccp64inp, sizeof(ccp64inp));
+	ccpoly_encrypt(rfcout, sizeof(ccp64inp), ccp64aad, sizeof(ccp64aad), tag, &s);
+
+	print("ccpoly64 cipher:\n");
+	printblock(rfcout, sizeof(ccp64inp));
+
+	print("ccpoly64 tag:\n");
+	printblock(tag, sizeof(tag));
+
+	if(memcmp(rfcout, ccp64out, sizeof(ccp64out)) != 0){
+		print("ccpoly64 bad ciphertext\n");
+		exits("wrong");
+	}
+	if(memcmp(tag, ccp64tag, sizeof(ccp64tag)) != 0){
+		print("ccpoly64 bad encryption tag\n");
+		exits("wrong");
+	}
+
+	if(ccpoly_decrypt(rfcout, sizeof(ccp64inp), ccp64aad, sizeof(ccp64aad), tag, &s) != 0){
+		print("ccpoly64 decryption failed\n");
+		exits("wrong");
+	}
+	if(memcmp(rfcout, ccp64inp, sizeof(ccp64inp)) != 0){
+		print("ccpoly64 bad decryption\n");
+		exits("wrong");
+	}
+
 	print("passed\n");
 	exits(nil);
 }
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/curve25519.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,572 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * curve25519: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+typedef vlong felem;
+
+/* Sum two numbers: output += in */
+static void fsum(felem *output, felem *in) {
+  unsigned i;
+  for (i = 0; i < 10; i += 2) {
+    output[0+i] = (output[0+i] + in[0+i]);
+    output[1+i] = (output[1+i] + in[1+i]);
+  }
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void fdifference(felem *output, felem *in) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = (in[i] - output[i]);
+  }
+}
+
+/* Multiply a number my a scalar: output = in * scalar */
+static void fscalar_product(felem *output, felem *in, felem scalar) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = in[i] * scalar;
+  }
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void fproduct(felem *output, felem *in2, felem *in) {
+  output[0] =      in2[0] * in[0];
+  output[1] =      in2[0] * in[1] +
+                   in2[1] * in[0];
+  output[2] =  2 * in2[1] * in[1] +
+                   in2[0] * in[2] +
+                   in2[2] * in[0];
+  output[3] =      in2[1] * in[2] +
+                   in2[2] * in[1] +
+                   in2[0] * in[3] +
+                   in2[3] * in[0];
+  output[4] =      in2[2] * in[2] +
+               2 * (in2[1] * in[3] +
+                    in2[3] * in[1]) +
+                   in2[0] * in[4] +
+                   in2[4] * in[0];
+  output[5] =      in2[2] * in[3] +
+                   in2[3] * in[2] +
+                   in2[1] * in[4] +
+                   in2[4] * in[1] +
+                   in2[0] * in[5] +
+                   in2[5] * in[0];
+  output[6] =  2 * (in2[3] * in[3] +
+                    in2[1] * in[5] +
+                    in2[5] * in[1]) +
+                   in2[2] * in[4] +
+                   in2[4] * in[2] +
+                   in2[0] * in[6] +
+                   in2[6] * in[0];
+  output[7] =      in2[3] * in[4] +
+                   in2[4] * in[3] +
+                   in2[2] * in[5] +
+                   in2[5] * in[2] +
+                   in2[1] * in[6] +
+                   in2[6] * in[1] +
+                   in2[0] * in[7] +
+                   in2[7] * in[0];
+  output[8] =      in2[4] * in[4] +
+               2 * (in2[3] * in[5] +
+                    in2[5] * in[3] +
+                    in2[1] * in[7] +
+                    in2[7] * in[1]) +
+                   in2[2] * in[6] +
+                   in2[6] * in[2] +
+                   in2[0] * in[8] +
+                   in2[8] * in[0];
+  output[9] =      in2[4] * in[5] +
+                   in2[5] * in[4] +
+                   in2[3] * in[6] +
+                   in2[6] * in[3] +
+                   in2[2] * in[7] +
+                   in2[7] * in[2] +
+                   in2[1] * in[8] +
+                   in2[8] * in[1] +
+                   in2[0] * in[9] +
+                   in2[9] * in[0];
+  output[10] = 2 * (in2[5] * in[5] +
+                    in2[3] * in[7] +
+                    in2[7] * in[3] +
+                    in2[1] * in[9] +
+                    in2[9] * in[1]) +
+                   in2[4] * in[6] +
+                   in2[6] * in[4] +
+                   in2[2] * in[8] +
+                   in2[8] * in[2];
+  output[11] =     in2[5] * in[6] +
+                   in2[6] * in[5] +
+                   in2[4] * in[7] +
+                   in2[7] * in[4] +
+                   in2[3] * in[8] +
+                   in2[8] * in[3] +
+                   in2[2] * in[9] +
+                   in2[9] * in[2];
+  output[12] =     in2[6] * in[6] +
+               2 * (in2[5] * in[7] +
+                    in2[7] * in[5] +
+                    in2[3] * in[9] +
+                    in2[9] * in[3]) +
+                   in2[4] * in[8] +
+                   in2[8] * in[4];
+  output[13] =     in2[6] * in[7] +
+                   in2[7] * in[6] +
+                   in2[5] * in[8] +
+                   in2[8] * in[5] +
+                   in2[4] * in[9] +
+                   in2[9] * in[4];
+  output[14] = 2 * (in2[7] * in[7] +
+                    in2[5] * in[9] +
+                    in2[9] * in[5]) +
+                   in2[6] * in[8] +
+                   in2[8] * in[6];
+  output[15] =     in2[7] * in[8] +
+                   in2[8] * in[7] +
+                   in2[6] * in[9] +
+                   in2[9] * in[6];
+  output[16] =     in2[8] * in[8] +
+               2 * (in2[7] * in[9] +
+                    in2[9] * in[7]);
+  output[17] =     in2[8] * in[9] +
+                   in2[9] * in[8];
+  output[18] = 2 * in2[9] * in[9];
+}
+
+/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
+static void freduce_degree(felem *output) {
+  output[8] += 19 * output[18];
+  output[7] += 19 * output[17];
+  output[6] += 19 * output[16];
+  output[5] += 19 * output[15];
+  output[4] += 19 * output[14];
+  output[3] += 19 * output[13];
+  output[2] += 19 * output[12];
+  output[1] += 19 * output[11];
+  output[0] += 19 * output[10];
+}
+
+/* Reduce all coefficients of the short form input to be -2**25 <= x <= 2**25
+ */
+static void freduce_coefficients(felem *output) {
+  unsigned i;
+  do {
+    output[10] = 0;
+
+    for (i = 0; i < 10; i += 2) {
+      felem over = output[i] / 0x2000000l;
+      felem over2 = (over + ((over >> 63) * 2) + 1) / 2;
+      output[i+1] += over2;
+      output[i] -= over2 * 0x4000000l;
+
+      over = output[i+1] / 0x2000000;
+      output[i+2] += over;
+      output[i+1] -= over * 0x2000000;
+    }
+    output[0] += 19 * output[10];
+  } while (output[10]);
+}
+
+/* A helpful wrapper around fproduct: output = in * in2.
+ *
+ * output must be distinct to both inputs. The output is reduced degree and
+ * reduced coefficient.
+ */
+static void
+fmul(felem *output, felem *in, felem *in2) {
+  felem t[19];
+  fproduct(t, in, in2);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(felem) * 10);
+}
+
+static void fsquare_inner(felem *output, felem *in) {
+  felem tmp;
+  output[0] =      in[0] * in[0];
+  output[1] =  2 * in[0] * in[1];
+  output[2] =  2 * (in[1] * in[1] +
+                    in[0] * in[2]);
+  output[3] =  2 * (in[1] * in[2] +
+                    in[0] * in[3]);
+  output[4] =      in[2] * in[2] +
+               4 * in[1] * in[3] +
+               2 * in[0] * in[4];
+  output[5] =  2 * (in[2] * in[3] +
+                    in[1] * in[4] +
+                    in[0] * in[5]);
+  output[6] =  2 * (in[3] * in[3] +
+                    in[2] * in[4] +
+                    in[0] * in[6] +
+                2 * in[1] * in[5]);
+  output[7] =  2 * (in[3] * in[4] +
+                    in[2] * in[5] +
+                    in[1] * in[6] +
+                    in[0] * in[7]);
+  tmp = in[1] * in[7] + in[3] * in[5];
+  output[8] =      in[4] * in[4] +
+               2 * (in[2] * in[6] +
+                    in[0] * in[8] +
+                        2 * tmp);
+  output[9] =  2 * (in[4] * in[5] +
+                    in[3] * in[6] +
+                    in[2] * in[7] +
+                    in[1] * in[8] +
+                    in[0] * in[9]);
+  tmp = in[3] * in[7] + in[1] * in[9];
+  output[10] = 2 * (in[5] * in[5] +
+                   in[4] * in[6] +
+                   in[2] * in[8] +
+                       2 * tmp);
+  output[11] = 2 * (in[5] * in[6] +
+                    in[4] * in[7] +
+                    in[3] * in[8] +
+                    in[2] * in[9]);
+  output[12] =     in[6] * in[6] +
+               2 * (in[4] * in[8] +
+                2 * (in[5] * in[7] +
+                     in[3] * in[9]));
+  output[13] = 2 * (in[6] * in[7] +
+                    in[5] * in[8] +
+                    in[4] * in[9]);
+  output[14] = 2 * (in[7] * in[7] +
+                    in[6] * in[8] +
+                2 * in[5] * in[9]);
+  output[15] = 2 * (in[7] * in[8] +
+                    in[6] * in[9]);
+  output[16] =     in[8] * in[8] +
+               4 * in[7] * in[9];
+  output[17] = 2 * in[8] * in[9];
+  output[18] = 2 * in[9] * in[9];
+}
+
+static void
+fsquare(felem *output, felem *in) {
+  felem t[19];
+  fsquare_inner(t, in);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(felem) * 10);
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(felem *output, uchar *input) {
+#define F(n,start,shift,mask) \
+  output[n] = ((((felem) input[start + 0]) | \
+                ((felem) input[start + 1]) << 8 | \
+                ((felem) input[start + 2]) << 16 | \
+                ((felem) input[start + 3]) << 24) >> shift) & mask;
+  F(0, 0, 0, 0x3ffffff);
+  F(1, 3, 2, 0x1ffffff);
+  F(2, 6, 3, 0x3ffffff);
+  F(3, 9, 5, 0x1ffffff);
+  F(4, 12, 6, 0x3ffffff);
+  F(5, 16, 0, 0x1ffffff);
+  F(6, 19, 1, 0x3ffffff);
+  F(7, 22, 3, 0x1ffffff);
+  F(8, 25, 4, 0x3ffffff);
+  F(9, 28, 6, 0x1ffffff);
+#undef F
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(uchar *output, felem *input) {
+  int i;
+
+  do {
+    for (i = 0; i < 9; ++i) {
+      if ((i & 1) == 1) {
+        while (input[i] < 0) {
+          input[i] += 0x2000000;
+          input[i + 1]--;
+        }
+      } else {
+        while (input[i] < 0) {
+          input[i] += 0x4000000;
+          input[i + 1]--;
+        }
+      }
+    }
+    while (input[9] < 0) {
+      input[9] += 0x2000000;
+      input[0] -= 19;
+    }
+  } while (input[0] < 0);
+
+  input[1] <<= 2;
+  input[2] <<= 3;
+  input[3] <<= 5;
+  input[4] <<= 6;
+  input[6] <<= 1;
+  input[7] <<= 3;
+  input[8] <<= 4;
+  input[9] <<= 6;
+#define F(i, s) \
+  output[s+0] |=  input[i] & 0xff; \
+  output[s+1]  = (input[i] >> 8) & 0xff; \
+  output[s+2]  = (input[i] >> 16) & 0xff; \
+  output[s+3]  = (input[i] >> 24) & 0xff;
+  output[0] = 0;
+  output[16] = 0;
+  F(0,0);
+  F(1,3);
+  F(2,6);
+  F(3,9);
+  F(4,12);
+  F(5,16);
+  F(6,19);
+  F(7,22);
+  F(8,25);
+  F(9,28);
+#undef F
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void fmonty(felem *x2, felem *z2,  /* output 2Q */
+                   felem *x3, felem *z3,  /* output Q + Q' */
+                   felem *x, felem *z,    /* input Q */
+                   felem *xprime, felem *zprime,  /* input Q' */
+                   felem *qmqp /* input Q - Q' */) {
+  felem origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
+        zzprime[19], zzzprime[19], xxxprime[19];
+
+  memcpy(origx, x, 10 * sizeof(felem));
+  fsum(x, z);
+  fdifference(z, origx);  // does x - z
+
+  memcpy(origxprime, xprime, sizeof(felem) * 10);
+  fsum(xprime, zprime);
+  fdifference(zprime, origxprime);
+  fproduct(xxprime, xprime, z);
+  fproduct(zzprime, x, zprime);
+  freduce_degree(xxprime);
+  freduce_coefficients(xxprime);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(origxprime, xxprime, sizeof(felem) * 10);
+  fsum(xxprime, zzprime);
+  fdifference(zzprime, origxprime);
+  fsquare(xxxprime, xxprime);
+  fsquare(zzzprime, zzprime);
+  fproduct(zzprime, zzzprime, qmqp);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(x3, xxxprime, sizeof(felem) * 10);
+  memcpy(z3, zzprime, sizeof(felem) * 10);
+
+  fsquare(xx, x);
+  fsquare(zz, z);
+  fproduct(x2, xx, zz);
+  freduce_degree(x2);
+  freduce_coefficients(x2);
+  fdifference(zz, xx);  // does zz = xx - zz
+  memset(zzz + 10, 0, sizeof(felem) * 9);
+  fscalar_product(zzz, zz, 121665);
+  freduce_degree(zzz);
+  freduce_coefficients(zzz);
+  fsum(zzz, xx);
+  fproduct(z2, zz, zzz);
+  freduce_degree(z2);
+  freduce_coefficients(z2);
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, uchar *n, felem *q) {
+  felem a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
+  felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  felem e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
+  felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+  unsigned i, j;
+
+  memcpy(nqpqx, q, sizeof(felem) * 10);
+
+  for (i = 0; i < 32; ++i) {
+    uchar byte = n[31 - i];
+    for (j = 0; j < 8; ++j) {
+      if (byte & 0x80) {
+        fmonty(nqpqx2, nqpqz2,
+               nqx2, nqz2,
+               nqpqx, nqpqz,
+               nqx, nqz,
+               q);
+      } else {
+        fmonty(nqx2, nqz2,
+               nqpqx2, nqpqz2,
+               nqx, nqz,
+               nqpqx, nqpqz,
+               q);
+      }
+
+      t = nqx;
+      nqx = nqx2;
+      nqx2 = t;
+      t = nqz;
+      nqz = nqz2;
+      nqz2 = t;
+      t = nqpqx;
+      nqpqx = nqpqx2;
+      nqpqx2 = t;
+      t = nqpqz;
+      nqpqz = nqpqz2;
+      nqpqz2 = t;
+
+      byte <<= 1;
+    }
+  }
+
+  memcpy(resultx, nqx, sizeof(felem) * 10);
+  memcpy(resultz, nqz, sizeof(felem) * 10);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, felem *z) {
+  felem z2[10];
+  felem z9[10];
+  felem z11[10];
+  felem z2_5_0[10];
+  felem z2_10_0[10];
+  felem z2_20_0[10];
+  felem z2_50_0[10];
+  felem z2_100_0[10];
+  felem t0[10];
+  felem t1[10];
+  int i;
+
+  /* 2 */ fsquare(z2,z);
+  /* 4 */ fsquare(t1,z2);
+  /* 8 */ fsquare(t0,t1);
+  /* 9 */ fmul(z9,t0,z);
+  /* 11 */ fmul(z11,z9,z2);
+  /* 22 */ fsquare(t0,z11);
+  /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
+  /* 2^7 - 2^2 */ fsquare(t1,t0);
+  /* 2^8 - 2^3 */ fsquare(t0,t1);
+  /* 2^9 - 2^4 */ fsquare(t1,t0);
+  /* 2^10 - 2^5 */ fsquare(t0,t1);
+  /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
+  /* 2^12 - 2^2 */ fsquare(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
+  /* 2^22 - 2^2 */ fsquare(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ fsquare(t1,t0);
+  /* 2^42 - 2^2 */ fsquare(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
+  /* 2^52 - 2^2 */ fsquare(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
+  /* 2^102 - 2^2 */ fsquare(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ fsquare(t0,t1);
+  /* 2^202 - 2^2 */ fsquare(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ fsquare(t1,t0);
+  /* 2^252 - 2^2 */ fsquare(t0,t1);
+  /* 2^253 - 2^3 */ fsquare(t1,t0);
+  /* 2^254 - 2^4 */ fsquare(t0,t1);
+  /* 2^255 - 2^5 */ fsquare(t1,t0);
+  /* 2^255 - 21 */ fmul(out,t1,z11);
+}
+
+void
+curve25519(uchar mypublic[32], uchar secret[32], uchar basepoint[32]) {
+  felem bp[10], x[10], z[10], zmone[10];
+  fexpand(bp, basepoint);
+  cmult(x, z, secret, bp);
+  crecip(zmone, z);
+  fmul(z, x, zmone);
+  fcontract(mypublic, z);
+}
--- /dev/null	Wed Jul 14 01:55:45 2021
+++ /sys/src/libsec/port/curve25519_dh.c	Mon Jul 12 11:09:54 2021
@@ -0,0 +1,38 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+static uchar nine[32] = {9};
+static uchar zero[32] = {0};
+
+void
+curve25519_dh_new(uchar x[32], uchar y[32])
+{
+	uchar b;
+
+	/* new public/private key pair */
+	genrandom(x, 32);
+	b = x[31];
+	x[0] &= ~7;			/* clear bit 0,1,2 */
+	x[31] = 0x40 | (b & 0x7f);	/* set bit 254, clear bit 255 */
+	curve25519(y, x, nine);
+
+	/* bit 255 is always 0, so make it random */
+	y[31] |= b & 0x80;
+}
+
+int
+curve25519_dh_finish(uchar x[32], uchar y[32], uchar z[32])
+{
+	/* remove the random bit */
+	y[31] &= 0x7f;
+
+	/* calculate dhx key */
+	curve25519(z, x, y);
+
+	memset(x, 0, 32);
+	memset(y, 0, 32);
+
+	return tsmemcmp(z, zero, 32) != 0;
+}
--- sys/src/libsec/port/decodepem.c	Wed May 12 00:41:09 2004
+++ /sys/src/libsec/port/decodepem.c	Sat Jun 26 21:21:41 2021
@@ -27,13 +27,15 @@
 			t++;
 		if(strncmp(tt, "-----BEGIN ", STRLEN("-----BEGIN ")) == 0
 		&& strncmp(&tt[STRLEN("-----BEGIN ")], type, n) == 0
-		&& strncmp(&tt[STRLEN("-----BEGIN ")+n], "-----\n", STRLEN("-----\n")) == 0)
+		&& strncmp(&tt[STRLEN("-----BEGIN ")+n], "-----", STRLEN("-----")) == 0
+		&& strchr("\r\n", tt[STRLEN("-----BEGIN ")+n+STRLEN("-----")]) != nil)
 			break;
 	}
 	for(tt = t; tt != nil && tt < e; tt++){
 		if(strncmp(tt, "-----END ", STRLEN("-----END ")) == 0
 		&& strncmp(&tt[STRLEN("-----END ")], type, n) == 0
-		&& strncmp(&tt[STRLEN("-----END ")+n], "-----\n", STRLEN("-----\n")) == 0)
+		&& strncmp(&tt[STRLEN("-----END ")+n], "-----", STRLEN("-----")) == 0
+		&& strchr("\r\n", tt[STRLEN("-----END ")+n+STRLEN("-----")]) != nil)
 			break;
 		tt = strchr(tt, '\n');
 		if(tt == nil)
--- sys/src/libsec/port/des.c	Fri Mar  1 21:12:59 2002
+++ /sys/src/libsec/port/des.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /*
--- sys/src/libsec/port/des3CBC.c	Wed Apr 25 20:01:32 2001
+++ /sys/src/libsec/port/des3CBC.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/des3ECB.c	Wed Apr 25 20:01:32 2001
+++ /sys/src/libsec/port/des3ECB.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/desCBC.c	Fri Mar 16 03:41:59 2001
+++ /sys/src/libsec/port/desCBC.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/desECB.c	Fri Mar 16 03:41:59 2001
+++ /sys/src/libsec/port/desECB.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/desmodes.c	Fri Mar  1 21:12:59 2002
+++ /sys/src/libsec/port/desmodes.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /*
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/dh.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,75 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+mpint*
+dh_new(DHstate *dh, mpint *p, mpint *q, mpint *g)
+{
+	mpint *pm1;
+	int n;
+
+	memset(dh, 0, sizeof(*dh));
+	if(mpcmp(g, mpone) <= 0)
+		return nil;
+
+	n = mpsignif(p);
+	pm1 = mpnew(n);
+	mpsub(p, mpone, pm1);
+	dh->p = mpcopy(p);
+	dh->g = mpcopy(g);
+	dh->q = mpcopy(q != nil ? q : pm1);
+	dh->x = mpnew(mpsignif(dh->q));
+	dh->y = mpnew(n);
+	for(;;){
+		mpnrand(dh->q, genrandom, dh->x);
+		mpexp(dh->g, dh->x, dh->p, dh->y);
+		if(mpcmp(dh->y, mpone) > 0 && mpcmp(dh->y, pm1) < 0)
+			break;
+	}
+	mpfree(pm1);
+
+	return dh->y;
+}
+
+mpint*
+dh_finish(DHstate *dh, mpint *y)
+{
+	mpint *k = nil;
+
+	if(y == nil || dh->x == nil || dh->p == nil || dh->q == nil)
+		goto Out;
+
+	/* y > 1 */
+	if(mpcmp(y, mpone) <= 0)
+		goto Out;
+
+	k = mpnew(mpsignif(dh->p));
+
+	/* y < p-1 */
+	mpsub(dh->p, mpone, k);
+	if(mpcmp(y, k) >= 0){
+Bad:
+		mpfree(k);
+		k = nil;
+		goto Out;
+	}
+
+	/* y**q % p == 1 if q < p-1 */
+	if(mpcmp(dh->q, k) < 0){
+		mpexp(y, dh->q, dh->p, k);
+		if(mpcmp(k, mpone) != 0)
+			goto Bad;
+	}
+
+	mpexp(y, dh->x, dh->p, k);
+
+Out:
+	mpfree(dh->p);
+	mpfree(dh->q);
+	mpfree(dh->g);
+	mpfree(dh->x);
+	mpfree(dh->y);
+	memset(dh, 0, sizeof(*dh));
+	return k;
+}
--- sys/src/libsec/port/dsaalloc.c	Thu Sep 18 03:53:20 2003
+++ /sys/src/libsec/port/dsaalloc.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/dsagen.c	Thu Sep 18 03:53:19 2003
+++ /sys/src/libsec/port/dsagen.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/dsaprimes.c	Fri Mar 16 03:42:00 2001
+++ /sys/src/libsec/port/dsaprimes.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -12,7 +13,7 @@
 static void
 Hrand(uchar *s)
 {
-	ulong *u = (ulong*)s;
+	u32int *u = (u32int*)s;
 	*u++ = fastrand();
 	*u++ = fastrand();
 	*u++ = fastrand();
--- sys/src/libsec/port/dsaprivtopub.c	Thu Jul 25 21:32:06 2002
+++ /sys/src/libsec/port/dsaprivtopub.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/dsasign.c	Thu Sep 18 03:53:20 2003
+++ /sys/src/libsec/port/dsasign.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/dsaverify.c	Thu Sep 18 03:53:21 2003
+++ /sys/src/libsec/port/dsaverify.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/ecc.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,613 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+#include <ctype.h>
+
+extern void jacobian_affine(mpint *p,
+	mpint *X, mpint *Y, mpint *Z);
+extern void jacobian_dbl(mpint *p, mpint *a,
+	mpint *X1, mpint *Y1, mpint *Z1,
+	mpint *X3, mpint *Y3, mpint *Z3);
+extern void jacobian_add(mpint *p, mpint *a,
+	mpint *X1, mpint *Y1, mpint *Z1,
+	mpint *X2, mpint *Y2, mpint *Z2,
+	mpint *X3, mpint *Y3, mpint *Z3);
+
+void
+ecassign(ECdomain *dom, ECpoint *a, ECpoint *b)
+{
+	if((b->inf = a->inf) != 0)
+		return;
+	mpassign(a->x, b->x);
+	mpassign(a->y, b->y);
+	if(b->z != nil){
+		mpassign(a->z != nil ? a->z : mpone, b->z);
+		return;
+	}
+	if(a->z != nil){
+		b->z = mpcopy(a->z);
+		jacobian_affine(dom->p, b->x, b->y, b->z);
+		mpfree(b->z);
+		b->z = nil;
+	}
+}
+
+void
+ecadd(ECdomain *dom, ECpoint *a, ECpoint *b, ECpoint *s)
+{
+	if(a->inf && b->inf){
+		s->inf = 1;
+		return;
+	}
+	if(a->inf){
+		ecassign(dom, b, s);
+		return;
+	}
+	if(b->inf){
+		ecassign(dom, a, s);
+		return;
+	}
+
+	if(s->z == nil){
+		s->z = mpcopy(mpone);
+		ecadd(dom, a, b, s);
+		if(!s->inf)
+			jacobian_affine(dom->p, s->x, s->y, s->z);
+		mpfree(s->z);
+		s->z = nil;
+		return;
+	}
+
+	if(a == b)
+		jacobian_dbl(dom->p, dom->a,
+			a->x, a->y, a->z != nil ? a->z : mpone,
+			s->x, s->y, s->z);
+	else
+		jacobian_add(dom->p, dom->a,
+			a->x, a->y, a->z != nil ? a->z : mpone,
+			b->x, b->y, b->z != nil ? b->z : mpone,
+			s->x, s->y, s->z);
+	s->inf = mpcmp(s->z, mpzero) == 0;
+}
+
+void
+ecmul(ECdomain *dom, ECpoint *a, mpint *k, ECpoint *s)
+{
+	ECpoint ns, na;
+	mpint *l;
+
+	if(a->inf || mpcmp(k, mpzero) == 0){
+		s->inf = 1;
+		return;
+	}
+	ns.inf = 1;
+	ns.x = mpnew(0);
+	ns.y = mpnew(0);
+	ns.z = mpnew(0);
+	na.x = mpnew(0);
+	na.y = mpnew(0);
+	na.z = mpnew(0);
+	ecassign(dom, a, &na);
+	l = mpcopy(k);
+	l->sign = 1;
+	while(mpcmp(l, mpzero) != 0){
+		if(l->p[0] & 1)
+			ecadd(dom, &na, &ns, &ns);
+		ecadd(dom, &na, &na, &na);
+		mpright(l, 1, l);
+	}
+	if(k->sign < 0 && !ns.inf){
+		ns.y->sign = -1;
+		mpmod(ns.y, dom->p, ns.y);
+	}
+	ecassign(dom, &ns, s);
+	mpfree(ns.x);
+	mpfree(ns.y);
+	mpfree(ns.z);
+	mpfree(na.x);
+	mpfree(na.y);
+	mpfree(na.z);
+	mpfree(l);
+}
+
+int
+ecverify(ECdomain *dom, ECpoint *a)
+{
+	mpint *p, *q;
+	int r;
+
+	if(a->inf)
+		return 1;
+
+	assert(a->z == nil);	/* need affine coordinates */
+	p = mpnew(0);
+	q = mpnew(0);
+	mpmodmul(a->y, a->y, dom->p, p);
+	mpmodmul(a->x, a->x, dom->p, q);
+	mpmodadd(q, dom->a, dom->p, q);
+	mpmodmul(q, a->x, dom->p, q);
+	mpmodadd(q, dom->b, dom->p, q);
+	r = mpcmp(p, q);
+	mpfree(p);
+	mpfree(q);
+	return r == 0;
+}
+
+int
+ecpubverify(ECdomain *dom, ECpub *a)
+{
+	ECpoint p;
+	int r;
+
+	if(a->inf)
+		return 0;
+	if(!ecverify(dom, a))
+		return 0;
+	p.x = mpnew(0);
+	p.y = mpnew(0);
+	p.z = mpnew(0);
+	ecmul(dom, a, dom->n, &p);
+	r = p.inf;
+	mpfree(p.x);
+	mpfree(p.y);
+	mpfree(p.z);
+	return r;
+}
+
+static void
+fixnibble(uchar *a)
+{
+	if(*a >= 'a')
+		*a -= 'a'-10;
+	else if(*a >= 'A')
+		*a -= 'A'-10;
+	else
+		*a -= '0';
+}
+
+static int
+octet(char **s)
+{
+	uchar c, d;
+	
+	c = *(*s)++;
+	if(!isxdigit(c))
+		return -1;
+	d = *(*s)++;
+	if(!isxdigit(d))
+		return -1;
+	fixnibble(&c);
+	fixnibble(&d);
+	return (c << 4) | d;
+}
+
+static mpint*
+halfpt(ECdomain *dom, char *s, char **rptr, mpint *out)
+{
+	char *buf, *r;
+	int n;
+	mpint *ret;
+	
+	n = ((mpsignif(dom->p)+7)/8)*2;
+	if(strlen(s) < n)
+		return 0;
+	buf = malloc(n+1);
+	buf[n] = 0;
+	memcpy(buf, s, n);
+	ret = strtomp(buf, &r, 16, out);
+	*rptr = s + (r - buf);
+	free(buf);
+	return ret;
+}
+
+static int
+mpleg(mpint *a, mpint *b)
+{
+	int r, k;
+	mpint *m, *n, *t;
+	
+	r = 1;
+	m = mpcopy(a);
+	n = mpcopy(b);
+	for(;;){
+		if(mpcmp(m, n) > 0)
+			mpmod(m, n, m);
+		if(mpcmp(m, mpzero) == 0){
+			r = 0;
+			break;
+		}
+		if(mpcmp(m, mpone) == 0)
+			break;
+		k = mplowbits0(m);
+		if(k > 0){
+			if(k & 1)
+				switch(n->p[0] & 15){
+				case 3: case 5: case 11: case 13:
+					r = -r;
+				}
+			mpright(m, k, m);
+		}
+		if((n->p[0] & 3) == 3 && (m->p[0] & 3) == 3)
+			r = -r;
+		t = m;
+		m = n;
+		n = t;
+	}
+	mpfree(m);
+	mpfree(n);
+	return r;
+}
+
+static int
+mpsqrt(mpint *n, mpint *p, mpint *r)
+{
+	mpint *a, *t, *s, *xp, *xq, *yp, *yq, *zp, *zq, *N;
+
+	if(mpleg(n, p) == -1)
+		return 0;
+	a = mpnew(0);
+	t = mpnew(0);
+	s = mpnew(0);
+	N = mpnew(0);
+	xp = mpnew(0);
+	xq = mpnew(0);
+	yp = mpnew(0);
+	yq = mpnew(0);
+	zp = mpnew(0);
+	zq = mpnew(0);
+	for(;;){
+		for(;;){
+			mpnrand(p, genrandom, a);
+			if(mpcmp(a, mpzero) > 0)
+				break;
+		}
+		mpmul(a, a, t);
+		mpsub(t, n, t);
+		mpmod(t, p, t);
+		if(mpleg(t, p) == -1)
+			break;
+	}
+	mpadd(p, mpone, N);
+	mpright(N, 1, N);
+	mpmul(a, a, t);
+	mpsub(t, n, t);
+	mpassign(a, xp);
+	uitomp(1, xq);
+	uitomp(1, yp);
+	uitomp(0, yq);
+	while(mpcmp(N, mpzero) != 0){
+		if(N->p[0] & 1){
+			mpmul(xp, yp, zp);
+			mpmul(xq, yq, zq);
+			mpmul(zq, t, zq);
+			mpadd(zp, zq, zp);
+			mpmod(zp, p, zp);
+			mpmul(xp, yq, zq);
+			mpmul(xq, yp, s);
+			mpadd(zq, s, zq);
+			mpmod(zq, p, yq);
+			mpassign(zp, yp);
+		}
+		mpmul(xp, xp, zp);
+		mpmul(xq, xq, zq);
+		mpmul(zq, t, zq);
+		mpadd(zp, zq, zp);
+		mpmod(zp, p, zp);
+		mpmul(xp, xq, zq);
+		mpadd(zq, zq, zq);
+		mpmod(zq, p, xq);
+		mpassign(zp, xp);
+		mpright(N, 1, N);
+	}
+	if(mpcmp(yq, mpzero) != 0)
+		abort();
+	mpassign(yp, r);
+	mpfree(a);
+	mpfree(t);
+	mpfree(s);
+	mpfree(N);
+	mpfree(xp);
+	mpfree(xq);
+	mpfree(yp);
+	mpfree(yq);
+	mpfree(zp);
+	mpfree(zq);
+	return 1;
+}
+
+ECpoint*
+strtoec(ECdomain *dom, char *s, char **rptr, ECpoint *ret)
+{
+	int allocd, o;
+	mpint *r;
+
+	allocd = 0;
+	if(ret == nil){
+		allocd = 1;
+		ret = mallocz(sizeof(*ret), 1);
+		if(ret == nil)
+			return nil;
+		ret->x = mpnew(0);
+		ret->y = mpnew(0);
+	}
+	ret->inf = 0;
+	o = 0;
+	switch(octet(&s)){
+	case 0:
+		ret->inf = 1;
+		break;
+	case 3:
+		o = 1;
+	case 2:
+		if(halfpt(dom, s, &s, ret->x) == nil)
+			goto err;
+		r = mpnew(0);
+		mpmul(ret->x, ret->x, r);
+		mpadd(r, dom->a, r);
+		mpmul(r, ret->x, r);
+		mpadd(r, dom->b, r);
+		if(!mpsqrt(r, dom->p, r)){
+			mpfree(r);
+			goto err;
+		}
+		if((r->p[0] & 1) != o)
+			mpsub(dom->p, r, r);
+		mpassign(r, ret->y);
+		mpfree(r);
+		if(!ecverify(dom, ret))
+			goto err;
+		break;
+	case 4:
+		if(halfpt(dom, s, &s, ret->x) == nil)
+			goto err;
+		if(halfpt(dom, s, &s, ret->y) == nil)
+			goto err;
+		if(!ecverify(dom, ret))
+			goto err;
+		break;
+	}
+	if(ret->z != nil && !ret->inf)
+		mpassign(mpone, ret->z);
+	return ret;
+
+err:
+	if(rptr)
+		*rptr = s;
+	if(allocd){
+		mpfree(ret->x);
+		mpfree(ret->y);
+		free(ret);
+	}
+	return nil;
+}
+
+ECpriv*
+ecgen(ECdomain *dom, ECpriv *p)
+{
+	if(p == nil){
+		p = mallocz(sizeof(*p), 1);
+		if(p == nil)
+			return nil;
+		p->x = mpnew(0);
+		p->y = mpnew(0);
+		p->d = mpnew(0);
+	}
+	for(;;){
+		mpnrand(dom->n, genrandom, p->d);
+		if(mpcmp(p->d, mpzero) > 0)
+			break;
+	}
+	ecmul(dom, &dom->G, p->d, p);
+	return p;
+}
+
+void
+ecdsasign(ECdomain *dom, ECpriv *priv, uchar *dig, int len, mpint *r, mpint *s)
+{
+	ECpriv tmp;
+	mpint *E, *t;
+
+	tmp.x = mpnew(0);
+	tmp.y = mpnew(0);
+	tmp.z = nil;
+	tmp.d = mpnew(0);
+	E = betomp(dig, len, nil);
+	t = mpnew(0);
+	if(mpsignif(dom->n) < 8*len)
+		mpright(E, 8*len - mpsignif(dom->n), E);
+	for(;;){
+		ecgen(dom, &tmp);
+		mpmod(tmp.x, dom->n, r);
+		if(mpcmp(r, mpzero) == 0)
+			continue;
+		mpmul(r, priv->d, s);
+		mpadd(E, s, s);
+		mpinvert(tmp.d, dom->n, t);
+		mpmodmul(s, t, dom->n, s);
+		if(mpcmp(s, mpzero) != 0)
+			break;
+	}
+	mpfree(t);
+	mpfree(E);
+	mpfree(tmp.x);
+	mpfree(tmp.y);
+	mpfree(tmp.d);
+}
+
+int
+ecdsaverify(ECdomain *dom, ECpub *pub, uchar *dig, int len, mpint *r, mpint *s)
+{
+	mpint *E, *t, *u1, *u2;
+	ECpoint R, S;
+	int ret;
+
+	if(mpcmp(r, mpone) < 0 || mpcmp(s, mpone) < 0 || mpcmp(r, dom->n) >= 0 || mpcmp(r, dom->n) >= 0)
+		return 0;
+	E = betomp(dig, len, nil);
+	if(mpsignif(dom->n) < 8*len)
+		mpright(E, 8*len - mpsignif(dom->n), E);
+	t = mpnew(0);
+	u1 = mpnew(0);
+	u2 = mpnew(0);
+	R.x = mpnew(0);
+	R.y = mpnew(0);
+	R.z = mpnew(0);
+	S.x = mpnew(0);
+	S.y = mpnew(0);
+	S.z = mpnew(0);
+	mpinvert(s, dom->n, t);
+	mpmodmul(E, t, dom->n, u1);
+	mpmodmul(r, t, dom->n, u2);
+	ecmul(dom, &dom->G, u1, &R);
+	ecmul(dom, pub, u2, &S);
+	ecadd(dom, &R, &S, &R);
+	ret = 0;
+	if(!R.inf){
+		jacobian_affine(dom->p, R.x, R.y, R.z);
+		mpmod(R.x, dom->n, t);
+		ret = mpcmp(r, t) == 0;
+	}
+	mpfree(E);
+	mpfree(t);
+	mpfree(u1);
+	mpfree(u2);
+	mpfree(R.x);
+	mpfree(R.y);
+	mpfree(R.z);
+	mpfree(S.x);
+	mpfree(S.y);
+	mpfree(S.z);
+	return ret;
+}
+
+static char *code = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+
+void
+base58enc(uchar *src, char *dst, int len)
+{
+	mpint *n, *r, *b;
+	char *sdst, t;
+	
+	sdst = dst;
+	n = betomp(src, len, nil);
+	b = uitomp(58, nil);
+	r = mpnew(0);
+	while(mpcmp(n, mpzero) != 0){
+		mpdiv(n, b, n, r);
+		*dst++ = code[mptoui(r)];
+	}
+	for(; *src == 0; src++)
+		*dst++ = code[0];
+	*dst-- = 0;
+	while(dst > sdst){
+		t = *sdst;
+		*sdst++ = *dst;
+		*dst-- = t;
+	}
+}
+
+int
+base58dec(char *src, uchar *dst, int len)
+{
+	mpint *n, *b, *r;
+	char *t;
+	
+	n = mpnew(0);
+	r = mpnew(0);
+	b = uitomp(58, nil);
+	for(; *src; src++){
+		t = strchr(code, *src);
+		if(t == nil){
+			mpfree(n);
+			mpfree(r);
+			mpfree(b);
+			werrstr("invalid base58 char");
+			return -1;
+		}
+		uitomp(t - code, r);
+		mpmul(n, b, n);
+		mpadd(n, r, n);
+	}
+	mptober(n, dst, len);
+	mpfree(n);
+	mpfree(r);
+	mpfree(b);
+	return 0;
+}
+
+void
+ecdominit(ECdomain *dom, void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h))
+{
+	memset(dom, 0, sizeof(*dom));
+	dom->p = mpnew(0);
+	dom->a = mpnew(0);
+	dom->b = mpnew(0);
+	dom->G.x = mpnew(0);
+	dom->G.y = mpnew(0);
+	dom->n = mpnew(0);
+	dom->h = mpnew(0);
+	if(init){
+		(*init)(dom->p, dom->a, dom->b, dom->G.x, dom->G.y, dom->n, dom->h);
+		dom->p = mpfield(dom->p);
+	}
+}
+
+void
+ecdomfree(ECdomain *dom)
+{
+	mpfree(dom->p);
+	mpfree(dom->a);
+	mpfree(dom->b);
+	mpfree(dom->G.x);
+	mpfree(dom->G.y);
+	mpfree(dom->n);
+	mpfree(dom->h);
+	memset(dom, 0, sizeof(*dom));
+}
+
+int
+ecencodepub(ECdomain *dom, ECpub *pub, uchar *data, int len)
+{
+	int n;
+
+	n = (mpsignif(dom->p)+7)/8;
+	if(len < 1 + 2*n)
+		return 0;
+	len = 1 + 2*n;
+	data[0] = 0x04;
+	mptober(pub->x, data+1, n);
+	mptober(pub->y, data+1+n, n);
+	return len;
+}
+
+ECpub*
+ecdecodepub(ECdomain *dom, uchar *data, int len)
+{
+	ECpub *pub;
+	int n;
+
+	n = (mpsignif(dom->p)+7)/8;
+	if(len != 1 + 2*n || data[0] != 0x04)
+		return nil;
+	pub = mallocz(sizeof(*pub), 1);
+	if(pub == nil)
+		return nil;
+	pub->x = betomp(data+1, n, nil);
+	pub->y = betomp(data+1+n, n, nil);
+	if(!ecpubverify(dom, pub)){
+		ecpubfree(pub);
+		pub = nil;
+	}
+	return pub;
+}
+
+void
+ecpubfree(ECpub *p)
+{
+	if(p == nil)
+		return;
+	mpfree(p->x);
+	mpfree(p->y);
+	free(p);
+}
--- sys/src/libsec/port/egalloc.c	Thu Sep 18 03:53:18 2003
+++ /sys/src/libsec/port/egalloc.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/egdecrypt.c	Fri Mar 16 03:42:00 2001
+++ /sys/src/libsec/port/egdecrypt.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/egencrypt.c	Fri Mar  1 21:12:59 2002
+++ /sys/src/libsec/port/egencrypt.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/eggen.c	Thu Jul 25 21:32:05 2002
+++ /sys/src/libsec/port/eggen.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/egprivtopub.c	Fri Mar 16 03:42:00 2001
+++ /sys/src/libsec/port/egprivtopub.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/egsign.c	Thu Jul 25 21:32:05 2002
+++ /sys/src/libsec/port/egsign.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/egtest.c	Fri Mar 16 03:42:01 2001
+++ /sys/src/libsec/port/egtest.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -9,7 +10,7 @@
 	mpint *m, *gamma, *delta, *in, *out;
 	int plen, shift;
 
-	fmtinstall('B', mpconv);
+	fmtinstall('B', mpfmt);
 
 	sk = egprivalloc();
 	sk->pub.p = uitomp(2357, nil);
--- sys/src/libsec/port/egverify.c	Fri Mar 16 03:42:01 2001
+++ /sys/src/libsec/port/egverify.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/fastrand.c	Mon Oct 28 20:21:41 2002
+++ /sys/src/libsec/port/fastrand.c	Sat Jun 26 21:21:41 2021
@@ -1,6 +1,7 @@
-#include	<u.h>
-#include	<libc.h>
-#include	<libsec.h>
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
 
 /* 
  *  use the X917 random number generator to create random
--- sys/src/libsec/port/genprime.c	Fri Mar 16 03:42:01 2001
+++ /sys/src/libsec/port/genprime.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -17,6 +18,7 @@
 	p->p[p->top-1] &= (x-1);
 	p->p[p->top-1] |= x;
 	p->p[0] |= 1;
+	mpnorm(p);
 
 	// keep icrementing till it looks prime
 	for(;;){
--- sys/src/libsec/port/genrandom.c	Fri Mar 16 03:42:01 2001
+++ /sys/src/libsec/port/genrandom.c	Sat Jun 26 21:21:41 2021
@@ -1,62 +1,46 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
-typedef struct State{
-	QLock		lock;
-	int		seeded;
-	uvlong		seed;
-	DES3state	des3;
-} State;
-static State x917state;
-
 static void
-X917(uchar *rand, int nrand)
+init(Chachastate *cs)
 {
-	int i, m, n8;
-	uvlong I, x;
+	ulong seed[11];
+	int i;
+
+	for(i=0; i<nelem(seed); i++)
+		seed[i] = truerand();
 
-	/* 1. Compute intermediate value I = Ek(time). */
-	I = nsec();
-	triple_block_cipher(x917state.des3.expanded, (uchar*)&I, 0); /* two-key EDE */
-
-	/* 2. x[i] = Ek(I^seed);  seed = Ek(x[i]^I); */
-	m = (nrand+7)/8;
-	for(i=0; i<m; i++){
-		x = I ^ x917state.seed;
-		triple_block_cipher(x917state.des3.expanded, (uchar*)&x, 0);
-		n8 = (nrand>8) ? 8 : nrand;
-		memcpy(rand, (uchar*)&x, n8);
-		rand += 8;
-		nrand -= 8;
-		x ^= I;
-		triple_block_cipher(x917state.des3.expanded, (uchar*)&x, 0);
-		x917state.seed = x;
-	}
+	setupChachastate(cs, (uchar*)&seed[0], 32, (uchar*)&seed[8], 12, 20);
+	memset(seed, 0, sizeof(seed));
 }
 
 static void
-X917init(void)
+fill(Chachastate *cs, uchar *p, int n)
 {
-	int n;
-	uchar mix[128];
-	uchar key3[3][8];
-	ulong *ulp;
-
-	ulp = (ulong*)key3;
-	for(n = 0; n < sizeof(key3)/sizeof(ulong); n++)
-		ulp[n] = truerand();
-	setupDES3state(&x917state.des3, key3, nil);
-	X917(mix, sizeof mix);
-	x917state.seeded = 1;
+	Chachastate c;
+
+	c = *cs;
+	chacha_encrypt((uchar*)&cs->input[4], 32, &c);
+	if(++cs->input[13] == 0)
+		if(++cs->input[14] == 0)
+			++cs->input[15];
+
+	chacha_encrypt(p, n, &c);
+	memset(&c, 0, sizeof(c));
 }
 
 void
 genrandom(uchar *p, int n)
 {
-	qlock(&x917state.lock);
-	if(x917state.seeded == 0)
-		X917init();
-	X917(p, n);
-	qunlock(&x917state.lock);
+	static QLock lk;
+	static Chachastate cs;
+
+	qlock(&lk);
+	if(cs.rounds == 0)
+		init(&cs);
+	cs.input[4] ^= getpid();	/* fork protection */
+	fill(&cs, p, n);
+	qunlock(&lk);
 }
--- sys/src/libsec/port/gensafeprime.c	Thu Jul 25 21:32:04 2002
+++ /sys/src/libsec/port/gensafeprime.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/genstrongprime.c	Fri Mar 16 03:42:02 2001
+++ /sys/src/libsec/port/genstrongprime.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/hkdf.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,40 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/* rfc5869 */
+void
+hkdf_x(salt, nsalt, info, ninfo, key, nkey, d, dlen, x, xlen)
+	uchar *salt, *info, *key, *d;
+	ulong nsalt, ninfo, nkey, dlen;
+	DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+	int xlen;
+{
+	uchar prk[256], tmp[256], cnt;
+	DigestState *ds;
+
+	assert(xlen <= sizeof(tmp));
+
+	memset(tmp, 0, xlen);
+	if(nsalt == 0){
+		salt = tmp;
+		nsalt = xlen;
+	}
+	/* note that salt and key are swapped in this case */
+	(*x)(key, nkey, salt, nsalt, prk, nil);
+	ds = nil;
+	for(cnt=1;; cnt++) {
+		if(ninfo > 0)
+			ds = (*x)(info, ninfo, prk, xlen, nil, ds);
+		(*x)(&cnt, 1, prk, xlen, tmp, ds);
+		if(dlen <= xlen){
+			memmove(d, tmp, dlen);
+			break;
+		}
+		memmove(d, tmp, xlen);
+		dlen -= xlen;
+		d += xlen;
+		ds = (*x)(tmp, xlen, prk, xlen, nil, nil);
+	}
+}
--- sys/src/libsec/port/hmac.c	Tue Aug 28 20:13:48 2007
+++ /sys/src/libsec/port/hmac.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /* rfc2104 */
@@ -11,8 +13,13 @@
 
 	if(xlen > sizeof(innerdigest))
 		return nil;
-	if(klen > Hmacblksz)
-		return nil;
+	if(klen > Hmacblksz){
+		if(xlen > Hmacblksz)
+			return nil;
+		(*x)(key, klen, innerdigest, nil);
+		key = innerdigest;
+		klen = xlen;
+	}
 
 	/* first time through */
 	if(s == nil || s->seeded == 0){
--- sys/src/libsec/port/hmactest.c	Fri Mar 16 03:42:02 2001
+++ /sys/src/libsec/port/hmactest.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/jacobian.c	Wed Jun 30 17:38:31 2021
@@ -0,0 +1,220 @@
+#include "os.h"
+#include <mp.h>
+
+void
+jacobian_new(mpint *x, mpint *y, mpint *z, mpint *X, mpint *Y, mpint *Z)
+{
+	mpassign(x, X);
+	mpassign(y, Y);
+	mpassign(z, Z);
+}
+
+void
+jacobian_inf(mpint *X, mpint *Y, mpint *Z)
+{
+	jacobian_new(mpzero, mpone, mpzero, X, Y, Z);
+}
+
+void
+jacobian_affine(mpint *p, mpint *X, mpint *Y, mpint *Z)
+{
+	mpint *ZZZ = mpnew(0);
+	mpint *ZZ = mpnew(0);
+
+	if(mpcmp(Z, mpzero) != 0){
+		mpmodmul(Z, Z, p, ZZ);
+		mpmodmul(ZZ, Z, p, ZZZ);
+		mpint *inv = mpnew(0);
+		mpinvert(ZZ, p, inv);
+		mpmodmul(X, inv, p, X);
+		mpfree(inv);
+		inv = mpnew(0);
+		mpinvert(ZZZ, p, inv);
+		mpmodmul(Y, inv, p, Y);
+		mpfree(inv);
+		mpassign(mpone, Z);
+	}
+	mpfree(ZZZ);
+	mpfree(ZZ);
+}
+
+void
+jacobian_dbl(mpint *p, mpint *a, mpint *X1, mpint *Y1, mpint *Z1, mpint *X3, mpint *Y3, mpint *Z3)
+{
+	/*
+	*	if(Y1 == 0) {
+	*		X3,Y3,Z3 = jacobian_inf();
+	*	} else {
+	*		XX = X1^2;
+	*		YY = Y1^2;
+	*		YYYY = YY^2;
+	*		ZZ = Z1^2;
+	*		S = 2*((X1+YY)^2-XX-YYYY);
+	*		M = 3*XX+a*ZZ^2;
+	*		Z3 = (Y1+Z1)^2-YY-ZZ;	
+	*		X3 = M^2-2*S;
+	*		Y3 = M*(S-X3)-8*YYYY;
+	*	}
+	*/
+	mpint *M = mpnew(0);
+	mpint *S = mpnew(0);
+	mpint *ZZ = mpnew(0);
+	mpint *YYYY = mpnew(0);
+	mpint *YY = mpnew(0);
+	mpint *XX = mpnew(0);
+	if(mpcmp(Y1, mpzero) == 0)
+		jacobian_inf(X3, Y3, Z3);
+	else{
+		mpmodmul(X1, X1, p, XX);
+		mpmodmul(Y1, Y1, p, YY);
+		mpmodmul(YY, YY, p, YYYY);
+		mpmodmul(Z1, Z1, p, ZZ);
+		mpint *tmp1 = mpnew(0);
+		mpmodadd(X1, YY, p, tmp1);
+		mpmodmul(tmp1, tmp1, p, tmp1);
+		mpmodsub(tmp1, XX, p, tmp1);
+		mpmodsub(tmp1, YYYY, p, tmp1);
+		mpmodadd(tmp1, tmp1, p, S); // 2*tmp1
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		uitomp(3UL, tmp1);
+		mpmodmul(tmp1, XX, p, M);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		mpint *tmp2 = mpnew(0);
+		mpmodmul(ZZ, ZZ, p, tmp2);
+		mpmodmul(a, tmp2, p, tmp1);
+		mpfree(tmp2);
+		mpmodadd(M, tmp1, p, M);
+		mpfree(tmp1);
+		mpmodadd(Y1, Z1, p, Z3);
+		mpmodmul(Z3, Z3, p, Z3);
+		mpmodsub(Z3, YY, p, Z3);
+		mpmodsub(Z3, ZZ, p, Z3);
+		mpmodmul(M, M, p, X3);
+		tmp1 = mpnew(0);
+		mpmodadd(S, S, p, tmp1); // 2*S
+		mpmodsub(X3, tmp1, p, X3);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		mpmodsub(S, X3, p, tmp1);
+		mpmodmul(M, tmp1, p, Y3);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		tmp2 = mpnew(0);
+		uitomp(8UL, tmp2);
+		mpmodmul(tmp2, YYYY, p, tmp1);
+		mpfree(tmp2);
+		mpmodsub(Y3, tmp1, p, Y3);
+		mpfree(tmp1);
+	}
+	mpfree(M);
+	mpfree(S);
+	mpfree(ZZ);
+	mpfree(YYYY);
+	mpfree(YY);
+	mpfree(XX);
+}
+
+void
+jacobian_add(mpint *p, mpint *a, mpint *X1, mpint *Y1, mpint *Z1, mpint *X2, mpint *Y2, mpint *Z2, mpint *X3, mpint *Y3, mpint *Z3)
+{
+	/*
+	*	Z1Z1 = Z1^2;
+	*	Z2Z2 = Z2^2;
+	*	U1 = X1*Z2Z2;
+	*	U2 = X2*Z1Z1;
+	*	S1 = Y1*Z2*Z2Z2;
+	*	S2 = Y2*Z1*Z1Z1;
+	*	if(U1 == U2) {
+	*		if(S1 != S2) {
+	*			X3,Y3,Z3 = jacobian_inf();
+	*		} else {
+	*			X3,Y3,Z3 = jacobian_dbl(p,a, X1,Y1,Z1);
+	*		}
+	*	} else {
+	*		H = U2-U1;
+	*		I = (2*H)^2;
+	*		J = H*I;
+	*		r = 2*(S2-S1);
+	*		V = U1*I;
+	*		X3 = r^2-J-2*V;
+	*		Y3 = r*(V-X3)-2*S1*J;
+	*		Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H;
+	*	}
+	*/
+	mpint *V = mpnew(0);
+	mpint *r = mpnew(0);
+	mpint *J = mpnew(0);
+	mpint *I = mpnew(0);
+	mpint *H = mpnew(0);
+	mpint *S2 = mpnew(0);
+	mpint *S1 = mpnew(0);
+	mpint *U2 = mpnew(0);
+	mpint *U1 = mpnew(0);
+	mpint *Z2Z2 = mpnew(0);
+	mpint *Z1Z1 = mpnew(0);
+	mpmodmul(Z1, Z1, p, Z1Z1);
+	mpmodmul(Z2, Z2, p, Z2Z2);
+	mpmodmul(X1, Z2Z2, p, U1);
+	mpmodmul(X2, Z1Z1, p, U2);
+	mpint *tmp1 = mpnew(0);
+	mpmodmul(Y1, Z2, p, tmp1);
+	mpmodmul(tmp1, Z2Z2, p, S1);
+	mpfree(tmp1);
+	tmp1 = mpnew(0);
+	mpmodmul(Y2, Z1, p, tmp1);
+	mpmodmul(tmp1, Z1Z1, p, S2);
+	mpfree(tmp1);
+	if(mpcmp(U1, U2) == 0){
+		if(mpcmp(S1, S2) != 0)
+			jacobian_inf(X3, Y3, Z3);
+		else
+			jacobian_dbl(p, a, X1, Y1, Z1, X3, Y3, Z3);
+	}else{
+		mpmodsub(U2, U1, p, H);
+		mpmodadd(H, H, p, I); // 2*H
+		mpmodmul(I, I, p, I);
+		mpmodmul(H, I, p, J);
+		mpint *tmp2 = mpnew(0);
+		mpmodsub(S2, S1, p, tmp2);
+		mpmodadd(tmp2, tmp2, p, r); // 2*tmp2
+		mpfree(tmp2);
+		mpmodmul(U1, I, p, V);
+		mpmodmul(r, r, p, X3);
+		mpmodsub(X3, J, p, X3);
+		tmp2 = mpnew(0);
+		mpmodadd(V, V, p, tmp2); // 2*V
+		mpmodsub(X3, tmp2, p, X3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpmodsub(V, X3, p, tmp2);
+		mpmodmul(r, tmp2, p, Y3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpint *tmp3 = mpnew(0);
+		mpmodadd(S1, S1, p, tmp3); // 2*S1
+		mpmodmul(tmp3, J, p, tmp2);
+		mpfree(tmp3);
+		mpmodsub(Y3, tmp2, p, Y3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpmodadd(Z1, Z2, p, tmp2);
+		mpmodmul(tmp2, tmp2, p, tmp2);
+		mpmodsub(tmp2, Z1Z1, p, tmp2);
+		mpmodsub(tmp2, Z2Z2, p, tmp2);
+		mpmodmul(tmp2, H, p, Z3);
+		mpfree(tmp2);
+	}
+	mpfree(V);
+	mpfree(r);
+	mpfree(J);
+	mpfree(I);
+	mpfree(H);
+	mpfree(S2);
+	mpfree(S1);
+	mpfree(U2);
+	mpfree(U1);
+	mpfree(Z2Z2);
+	mpfree(Z1Z1);
+}
--- sys/src/libsec/port/md4.c	Fri Mar  1 21:12:59 2002
+++ /sys/src/libsec/port/md4.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <libsec.h>
 
 /*
--- sys/src/libsec/port/md4test.c	Fri Mar 16 03:42:02 2001
+++ /sys/src/libsec/port/md4test.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/md5.c	Tue Aug 28 20:13:48 2007
+++ /sys/src/libsec/port/md5.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /*
--- sys/src/libsec/port/md5block.c	Tue Mar 20 22:48:05 2001
+++ /sys/src/libsec/port/md5block.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /*
--- sys/src/libsec/port/mkfile	Thu Jul  1 17:28:57 2021
+++ /sys/src/libsec/port/mkfile	Sat Jun 26 21:21:41 2021
@@ -3,11 +3,15 @@
 LIB=/$objtype/lib/libsec.a
 
 CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
-	aes.c blowfish.c chacha.c \
+	aes.c aesni.c aesCBC.c aesCFB.c aesCTR.c aesOFB.c aes_gcm.c \
+	blowfish.c \
 	hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
 	sha2_64.c sha2_128.c sha2block64.c sha2block128.c\
 	sha1pickle.c md5pickle.c\
+	poly1305.c\
 	rc4.c\
+	chacha.c chachablock.c\
+	salsa.c\
 	genrandom.c prng.c fastrand.c nfastrand.c\
 	probably_prime.c smallprimetest.c genprime.c dsaprimes.c\
 	gensafeprime.c genstrongprime.c\
@@ -17,7 +21,23 @@
 	egsign.c egverify.c \
 	dsagen.c dsaalloc.c dsaprivtopub.c dsasign.c dsaverify.c \
 	tlshand.c thumb.c readcert.c \
+	aes_xts.c  \
+	ecc.c\
+	jacobian.c\
+	ripemd.c\
+	dh.c\
+	curve25519.c\
+	curve25519_dh.c\
 	pbkdf2.c\
+	scrypt.c\
+	hkdf.c\
+	ccpoly.c\
+	tsmemcmp.c\
+	secp256r1.c\
+	secp384r1.c\
+	secp256k1.c\
+
+# CLEANFILES=secp256r1.c secp384r1.c secp256k1.c jacobian.c
 
 ALLOFILES=${CFILES:%.c=%.$O}
 
@@ -35,4 +55,10 @@
 </sys/src/cmd/mksyslib
 
 $O.rsatest: rsatest.$O
+	$LD -o $target $prereq
+
+$O.chachatest: chachatest.$O
+	$LD -o $target $prereq
+
+$O.aesgcmtest: aesgcmtest.$O
 	$LD -o $target $prereq
--- sys/src/libsec/port/nfastrand.c	Thu Sep 18 03:53:15 2003
+++ /sys/src/libsec/port/nfastrand.c	Sat Jun 26 21:21:41 2021
@@ -1,5 +1,6 @@
 #include <u.h>
 #include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 #define Maxrand	((1UL<<31)-1)
--- sys/src/libsec/port/pbkdf2.c	Thu Jul  1 17:28:55 2021
+++ /sys/src/libsec/port/pbkdf2.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 /* rfc2898 */
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/poly1305.c	Sat Jun 26 21:21:41 2021
@@ -0,0 +1,197 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*
+	poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
+
+	derived from http://github.com/floodberry/poly1305-donna
+*/
+
+#define U8TO32(p)	((u32int)(p)[0] | (u32int)(p)[1]<<8 | (u32int)(p)[2]<<16 | (u32int)(p)[3]<<24)
+#define U32TO8(p, v)	(p)[0]=(v), (p)[1]=(v)>>8, (p)[2]=(v)>>16, (p)[3]=(v)>>24
+
+/* (r,s) = (key[0:15],key[16:31]), the one time key */
+DigestState*
+poly1305(uchar *m, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s)
+{
+	u32int r0,r1,r2,r3,r4, s1,s2,s3,s4, h0,h1,h2,h3,h4, g0,g1,g2,g3,g4;
+	u64int d0,d1,d2,d3,d4, f;
+	u32int hibit, mask, c;
+
+	if(s == nil){
+		s = malloc(sizeof(*s));
+		if(s == nil)
+			return nil;
+		memset(s, 0, sizeof(*s));
+		s->malloced = 1;
+	}
+
+	if(s->seeded == 0){
+		assert(klen == 32);
+
+		/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+		s->state[0] = (U8TO32(&key[ 0])     ) & 0x3ffffff;
+		s->state[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
+		s->state[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
+		s->state[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
+		s->state[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
+
+		/* h = 0 */
+		s->state[5] = 0;
+		s->state[6] = 0;
+		s->state[7] = 0;
+		s->state[8] = 0;
+		s->state[9] = 0;
+
+		/* save pad for later */
+		s->state[10] = U8TO32(&key[16]);
+		s->state[11] = U8TO32(&key[20]);
+		s->state[12] = U8TO32(&key[24]);
+		s->state[13] = U8TO32(&key[28]);
+
+		s->seeded = 1;
+	}
+
+	if(s->blen){
+		c = 16 - s->blen;
+		if(c > len)
+			c = len;
+		memmove(s->buf + s->blen, m, c);
+		len -= c, m += c;
+		s->blen += c;
+		if(s->blen == 16){
+			s->blen = 0;
+			poly1305(s->buf, 16, key, klen, nil, s);
+		} else if(len == 0){
+			m = s->buf;
+			len = s->blen;
+			s->blen = 0;
+		}
+	}
+
+	r0 = s->state[0];
+	r1 = s->state[1];
+	r2 = s->state[2];
+	r3 = s->state[3];
+	r4 = s->state[4];
+
+	h0 = s->state[5];
+	h1 = s->state[6];
+	h2 = s->state[7];
+	h3 = s->state[8];
+	h4 = s->state[9];
+
+	s1 = r1 * 5;
+	s2 = r2 * 5;
+	s3 = r3 * 5;
+	s4 = r4 * 5;
+
+	hibit = 1<<24;	/* 1<<128 */
+
+	while(len >= 16){
+Block:
+		/* h += m[i] */
+		h0 += (U8TO32(&m[0])     ) & 0x3ffffff;
+		h1 += (U8TO32(&m[3]) >> 2) & 0x3ffffff;
+		h2 += (U8TO32(&m[6]) >> 4) & 0x3ffffff;
+		h3 += (U8TO32(&m[9]) >> 6) & 0x3ffffff;
+		h4 += (U8TO32(&m[12])>> 8) | hibit;
+
+		/* h *= r */
+		d0 = ((u64int)h0 * r0) + ((u64int)h1 * s4) + ((u64int)h2 * s3) + ((u64int)h3 * s2) + ((u64int)h4 * s1);
+		d1 = ((u64int)h0 * r1) + ((u64int)h1 * r0) + ((u64int)h2 * s4) + ((u64int)h3 * s3) + ((u64int)h4 * s2);
+		d2 = ((u64int)h0 * r2) + ((u64int)h1 * r1) + ((u64int)h2 * r0) + ((u64int)h3 * s4) + ((u64int)h4 * s3);
+		d3 = ((u64int)h0 * r3) + ((u64int)h1 * r2) + ((u64int)h2 * r1) + ((u64int)h3 * r0) + ((u64int)h4 * s4);
+		d4 = ((u64int)h0 * r4) + ((u64int)h1 * r3) + ((u64int)h2 * r2) + ((u64int)h3 * r1) + ((u64int)h4 * r0);
+
+		/* (partial) h %= p */
+		              c = (u32int)(d0 >> 26); h0 = (u32int)d0 & 0x3ffffff;
+		d1 += c;      c = (u32int)(d1 >> 26); h1 = (u32int)d1 & 0x3ffffff;
+		d2 += c;      c = (u32int)(d2 >> 26); h2 = (u32int)d2 & 0x3ffffff;
+		d3 += c;      c = (u32int)(d3 >> 26); h3 = (u32int)d3 & 0x3ffffff;
+		d4 += c;      c = (u32int)(d4 >> 26); h4 = (u32int)d4 & 0x3ffffff;
+		h0 += c * 5;  c = (h0 >> 26); h0 = h0 & 0x3ffffff;
+		h1 += c;
+
+		len -= 16, m += 16;
+	}
+
+	if(len){
+		s->blen = len;
+		memmove(s->buf, m, len);
+	}
+
+	if(digest == nil){
+		s->state[5] = h0;
+		s->state[6] = h1;
+		s->state[7] = h2;
+		s->state[8] = h3;
+		s->state[9] = h4;
+		return s;
+	}
+
+	if(len){
+		m = s->buf;
+		m[len++] = 1;
+		while(len < 16)
+			m[len++] = 0;
+		hibit = 0;
+		goto Block;
+	}
+
+	             c = h1 >> 26; h1 = h1 & 0x3ffffff;
+	h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
+	h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
+	h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
+	h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
+	h1 +=     c;
+
+	/* compute h + -p */
+	g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
+	g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
+	g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
+	g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
+	g4 = h4 + c - (1 << 26);
+
+	/* select h if h < p, or h + -p if h >= p */
+	mask = (g4 >> 31) - 1;
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	g4 &= mask;
+	mask = ~mask;
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+	h4 = (h4 & mask) | g4;
+
+	/* h = h % (2^128) */
+	h0 = (h0      ) | (h1 << 26);
+	h1 = (h1 >>  6) | (h2 << 20);
+	h2 = (h2 >> 12) | (h3 << 14);
+	h3 = (h3 >> 18) | (h4 <<  8);
+	
+	/* digest = (h + pad) % (2^128) */
+	f = (u64int)h0 + s->state[10]            ; h0 = (u32int)f;
+	f = (u64int)h1 + s->state[11] + (f >> 32); h1 = (u32int)f;
+	f = (u64int)h2 + s->state[12] + (f >> 32); h2 = (u32int)f;
+	f = (u64int)h3 + s->state[13] + (f >> 32); h3 = (u32int)f;
+
+	U32TO8(&digest[0], h0);
+	U32TO8(&digest[4], h1);
+	U32TO8(&digest[8], h2);
+	U32TO8(&digest[12], h3);
+
+	if(s->malloced){
+		memset(s, 0, sizeof(*s));
+		free(s);
+		return nil;
+	}
+
+	memset(s, 0, sizeof(*s));
+	return nil;
+}
--- sys/src/libsec/port/primetest.c	Fri Mar 16 03:42:03 2001
+++ /sys/src/libsec/port/primetest.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -10,7 +11,7 @@
 	mpint *q = mpnew(0);
 	mpint *nine = mpnew(0);
 
-	fmtinstall('B', mpconv);
+	fmtinstall('B', mpfmt);
 	strtomp("2492491", nil, 16, z);	// 38347921 = x*y = (2**28-9)/7, 
 				//    an example of 3**(n-1)=1 mod n
 	strtomp("15662C00E811", nil, 16, p);// 23528569104401, a prime
--- sys/src/libsec/port/prng.c	Fri Mar 16 03:42:03 2001
+++ /sys/src/libsec/port/prng.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/probably_prime.c	Sun Mar  7 21:36:11 2010
+++ /sys/src/libsec/port/probably_prime.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -21,10 +22,10 @@
 		nrep = 18;
 
 	k = mptoi(n);
-	if(k == 2)		/* 2 is prime */
-		return 1;
 	if(k < 2)		/* 1 is not prime */
 		return 0;
+	if(k == 2 || k == 3)	/* 2, 3 is prime */
+		return 1;
 	if((n->p[0] & 1) == 0)	/* even is not prime */
 		return 0;
 
--- sys/src/libsec/port/rc4.c	Fri Mar  1 21:13:00 2002
+++ /sys/src/libsec/port/rc4.c	Sat Jun 26 21:21:41 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 void
--- sys/src/libsec/port/readcert.c	Wed Aug 15 21:43:32 2007
+++ /sys/src/libsec/port/readcert.c	Sat Jun 26 21:21:41 2021
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <auth.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -11,7 +10,7 @@
 	char *s;
 	Dir *d;
 
-	fd = open(name, OREAD);
+	fd = open(name, OREAD|OCEXEC);
 	if(fd < 0)
 		return nil;
 	if((d = dirfstat(fd)) == nil) {
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/ripemd.c	Sat Jun 26 21:21:42 2021
@@ -0,0 +1,385 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+
+#include <libsec.h>
+
+#define BYTES_TO_DWORD(strptr)                    \
+            (((u32int) *((strptr)+3) << 24) | \
+             ((u32int) *((strptr)+2) << 16) | \
+             ((u32int) *((strptr)+1) <<  8) | \
+             ((u32int) *(strptr)))
+
+#define ROL(x, n)        (((x) << (n)) | ((x) >> (32-(n))))
+
+/* the five basic functions F(), G() and H() */
+#define F(x, y, z)        ((x) ^ (y) ^ (z)) 
+#define G(x, y, z)        (((x) & (y)) | (~(x) & (z))) 
+#define H(x, y, z)        (((x) | ~(y)) ^ (z))
+#define I(x, y, z)        (((x) & (z)) | ((y) & ~(z))) 
+#define J(x, y, z)        ((x) ^ ((y) | ~(z)))
+  
+/* the ten basic operations FF() through III() */
+#define FF(a, b, c, d, e, x, s)        {\
+      (a) += F((b), (c), (d)) + (x);\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define GG(a, b, c, d, e, x, s)        {\
+      (a) += G((b), (c), (d)) + (x) + 0x5a827999UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define HH(a, b, c, d, e, x, s)        {\
+      (a) += H((b), (c), (d)) + (x) + 0x6ed9eba1UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define II(a, b, c, d, e, x, s)        {\
+      (a) += I((b), (c), (d)) + (x) + 0x8f1bbcdcUL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define JJ(a, b, c, d, e, x, s)        {\
+      (a) += J((b), (c), (d)) + (x) + 0xa953fd4eUL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define FFF(a, b, c, d, e, x, s)        {\
+      (a) += F((b), (c), (d)) + (x);\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define GGG(a, b, c, d, e, x, s)        {\
+      (a) += G((b), (c), (d)) + (x) + 0x7a6d76e9UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define HHH(a, b, c, d, e, x, s)        {\
+      (a) += H((b), (c), (d)) + (x) + 0x6d703ef3UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define III(a, b, c, d, e, x, s)        {\
+      (a) += I((b), (c), (d)) + (x) + 0x5c4dd124UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define JJJ(a, b, c, d, e, x, s)        {\
+      (a) += J((b), (c), (d)) + (x) + 0x50a28be6UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+
+
+static void MDinit(u32int *MDbuf)
+{
+   MDbuf[0] = 0x67452301UL;
+   MDbuf[1] = 0xefcdab89UL;
+   MDbuf[2] = 0x98badcfeUL;
+   MDbuf[3] = 0x10325476UL;
+   MDbuf[4] = 0xc3d2e1f0UL;
+
+   return;
+}
+
+static void compress(u32int *MDbuf, u32int *X)
+{
+   u32int aa = MDbuf[0],  bb = MDbuf[1],  cc = MDbuf[2],
+         dd = MDbuf[3],  ee = MDbuf[4];
+   u32int aaa = MDbuf[0], bbb = MDbuf[1], ccc = MDbuf[2],
+         ddd = MDbuf[3], eee = MDbuf[4];
+
+   /* round 1 */
+   FF(aa, bb, cc, dd, ee, X[ 0], 11);
+   FF(ee, aa, bb, cc, dd, X[ 1], 14);
+   FF(dd, ee, aa, bb, cc, X[ 2], 15);
+   FF(cc, dd, ee, aa, bb, X[ 3], 12);
+   FF(bb, cc, dd, ee, aa, X[ 4],  5);
+   FF(aa, bb, cc, dd, ee, X[ 5],  8);
+   FF(ee, aa, bb, cc, dd, X[ 6],  7);
+   FF(dd, ee, aa, bb, cc, X[ 7],  9);
+   FF(cc, dd, ee, aa, bb, X[ 8], 11);
+   FF(bb, cc, dd, ee, aa, X[ 9], 13);
+   FF(aa, bb, cc, dd, ee, X[10], 14);
+   FF(ee, aa, bb, cc, dd, X[11], 15);
+   FF(dd, ee, aa, bb, cc, X[12],  6);
+   FF(cc, dd, ee, aa, bb, X[13],  7);
+   FF(bb, cc, dd, ee, aa, X[14],  9);
+   FF(aa, bb, cc, dd, ee, X[15],  8);
+                             
+   /* round 2 */
+   GG(ee, aa, bb, cc, dd, X[ 7],  7);
+   GG(dd, ee, aa, bb, cc, X[ 4],  6);
+   GG(cc, dd, ee, aa, bb, X[13],  8);
+   GG(bb, cc, dd, ee, aa, X[ 1], 13);
+   GG(aa, bb, cc, dd, ee, X[10], 11);
+   GG(ee, aa, bb, cc, dd, X[ 6],  9);
+   GG(dd, ee, aa, bb, cc, X[15],  7);
+   GG(cc, dd, ee, aa, bb, X[ 3], 15);
+   GG(bb, cc, dd, ee, aa, X[12],  7);
+   GG(aa, bb, cc, dd, ee, X[ 0], 12);
+   GG(ee, aa, bb, cc, dd, X[ 9], 15);
+   GG(dd, ee, aa, bb, cc, X[ 5],  9);
+   GG(cc, dd, ee, aa, bb, X[ 2], 11);
+   GG(bb, cc, dd, ee, aa, X[14],  7);
+   GG(aa, bb, cc, dd, ee, X[11], 13);
+   GG(ee, aa, bb, cc, dd, X[ 8], 12);
+
+   /* round 3 */
+   HH(dd, ee, aa, bb, cc, X[ 3], 11);
+   HH(cc, dd, ee, aa, bb, X[10], 13);
+   HH(bb, cc, dd, ee, aa, X[14],  6);
+   HH(aa, bb, cc, dd, ee, X[ 4],  7);
+   HH(ee, aa, bb, cc, dd, X[ 9], 14);
+   HH(dd, ee, aa, bb, cc, X[15],  9);
+   HH(cc, dd, ee, aa, bb, X[ 8], 13);
+   HH(bb, cc, dd, ee, aa, X[ 1], 15);
+   HH(aa, bb, cc, dd, ee, X[ 2], 14);
+   HH(ee, aa, bb, cc, dd, X[ 7],  8);
+   HH(dd, ee, aa, bb, cc, X[ 0], 13);
+   HH(cc, dd, ee, aa, bb, X[ 6],  6);
+   HH(bb, cc, dd, ee, aa, X[13],  5);
+   HH(aa, bb, cc, dd, ee, X[11], 12);
+   HH(ee, aa, bb, cc, dd, X[ 5],  7);
+   HH(dd, ee, aa, bb, cc, X[12],  5);
+
+   /* round 4 */
+   II(cc, dd, ee, aa, bb, X[ 1], 11);
+   II(bb, cc, dd, ee, aa, X[ 9], 12);
+   II(aa, bb, cc, dd, ee, X[11], 14);
+   II(ee, aa, bb, cc, dd, X[10], 15);
+   II(dd, ee, aa, bb, cc, X[ 0], 14);
+   II(cc, dd, ee, aa, bb, X[ 8], 15);
+   II(bb, cc, dd, ee, aa, X[12],  9);
+   II(aa, bb, cc, dd, ee, X[ 4],  8);
+   II(ee, aa, bb, cc, dd, X[13],  9);
+   II(dd, ee, aa, bb, cc, X[ 3], 14);
+   II(cc, dd, ee, aa, bb, X[ 7],  5);
+   II(bb, cc, dd, ee, aa, X[15],  6);
+   II(aa, bb, cc, dd, ee, X[14],  8);
+   II(ee, aa, bb, cc, dd, X[ 5],  6);
+   II(dd, ee, aa, bb, cc, X[ 6],  5);
+   II(cc, dd, ee, aa, bb, X[ 2], 12);
+
+   /* round 5 */
+   JJ(bb, cc, dd, ee, aa, X[ 4],  9);
+   JJ(aa, bb, cc, dd, ee, X[ 0], 15);
+   JJ(ee, aa, bb, cc, dd, X[ 5],  5);
+   JJ(dd, ee, aa, bb, cc, X[ 9], 11);
+   JJ(cc, dd, ee, aa, bb, X[ 7],  6);
+   JJ(bb, cc, dd, ee, aa, X[12],  8);
+   JJ(aa, bb, cc, dd, ee, X[ 2], 13);
+   JJ(ee, aa, bb, cc, dd, X[10], 12);
+   JJ(dd, ee, aa, bb, cc, X[14],  5);
+   JJ(cc, dd, ee, aa, bb, X[ 1], 12);
+   JJ(bb, cc, dd, ee, aa, X[ 3], 13);
+   JJ(aa, bb, cc, dd, ee, X[ 8], 14);
+   JJ(ee, aa, bb, cc, dd, X[11], 11);
+   JJ(dd, ee, aa, bb, cc, X[ 6],  8);
+   JJ(cc, dd, ee, aa, bb, X[15],  5);
+   JJ(bb, cc, dd, ee, aa, X[13],  6);
+
+   /* parallel round 1 */
+   JJJ(aaa, bbb, ccc, ddd, eee, X[ 5],  8);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[14],  9);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 7],  9);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[ 0], 11);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 9], 13);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[ 2], 15);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[11], 15);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 4],  5);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[13],  7);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 6],  7);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[15],  8);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[ 8], 11);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 1], 14);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[10], 14);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 3], 12);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[12],  6);
+
+   /* parallel round 2 */
+   III(eee, aaa, bbb, ccc, ddd, X[ 6],  9); 
+   III(ddd, eee, aaa, bbb, ccc, X[11], 13);
+   III(ccc, ddd, eee, aaa, bbb, X[ 3], 15);
+   III(bbb, ccc, ddd, eee, aaa, X[ 7],  7);
+   III(aaa, bbb, ccc, ddd, eee, X[ 0], 12);
+   III(eee, aaa, bbb, ccc, ddd, X[13],  8);
+   III(ddd, eee, aaa, bbb, ccc, X[ 5],  9);
+   III(ccc, ddd, eee, aaa, bbb, X[10], 11);
+   III(bbb, ccc, ddd, eee, aaa, X[14],  7);
+   III(aaa, bbb, ccc, ddd, eee, X[15],  7);
+   III(eee, aaa, bbb, ccc, ddd, X[ 8], 12);
+   III(ddd, eee, aaa, bbb, ccc, X[12],  7);
+   III(ccc, ddd, eee, aaa, bbb, X[ 4],  6);
+   III(bbb, ccc, ddd, eee, aaa, X[ 9], 15);
+   III(aaa, bbb, ccc, ddd, eee, X[ 1], 13);
+   III(eee, aaa, bbb, ccc, ddd, X[ 2], 11);
+
+   /* parallel round 3 */
+   HHH(ddd, eee, aaa, bbb, ccc, X[15],  9);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 5],  7);
+   HHH(bbb, ccc, ddd, eee, aaa, X[ 1], 15);
+   HHH(aaa, bbb, ccc, ddd, eee, X[ 3], 11);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 7],  8);
+   HHH(ddd, eee, aaa, bbb, ccc, X[14],  6);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 6],  6);
+   HHH(bbb, ccc, ddd, eee, aaa, X[ 9], 14);
+   HHH(aaa, bbb, ccc, ddd, eee, X[11], 12);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 8], 13);
+   HHH(ddd, eee, aaa, bbb, ccc, X[12],  5);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 2], 14);
+   HHH(bbb, ccc, ddd, eee, aaa, X[10], 13);
+   HHH(aaa, bbb, ccc, ddd, eee, X[ 0], 13);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 4],  7);
+   HHH(ddd, eee, aaa, bbb, ccc, X[13],  5);
+
+   /* parallel round 4 */   
+   GGG(ccc, ddd, eee, aaa, bbb, X[ 8], 15);
+   GGG(bbb, ccc, ddd, eee, aaa, X[ 6],  5);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 4],  8);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 1], 11);
+   GGG(ddd, eee, aaa, bbb, ccc, X[ 3], 14);
+   GGG(ccc, ddd, eee, aaa, bbb, X[11], 14);
+   GGG(bbb, ccc, ddd, eee, aaa, X[15],  6);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 0], 14);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 5],  6);
+   GGG(ddd, eee, aaa, bbb, ccc, X[12],  9);
+   GGG(ccc, ddd, eee, aaa, bbb, X[ 2], 12);
+   GGG(bbb, ccc, ddd, eee, aaa, X[13],  9);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 9], 12);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 7],  5);
+   GGG(ddd, eee, aaa, bbb, ccc, X[10], 15);
+   GGG(ccc, ddd, eee, aaa, bbb, X[14],  8);
+
+   /* parallel round 5 */
+   FFF(bbb, ccc, ddd, eee, aaa, X[12] ,  8);
+   FFF(aaa, bbb, ccc, ddd, eee, X[15] ,  5);
+   FFF(eee, aaa, bbb, ccc, ddd, X[10] , 12);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 4] ,  9);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 1] , 12);
+   FFF(bbb, ccc, ddd, eee, aaa, X[ 5] ,  5);
+   FFF(aaa, bbb, ccc, ddd, eee, X[ 8] , 14);
+   FFF(eee, aaa, bbb, ccc, ddd, X[ 7] ,  6);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 6] ,  8);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 2] , 13);
+   FFF(bbb, ccc, ddd, eee, aaa, X[13] ,  6);
+   FFF(aaa, bbb, ccc, ddd, eee, X[14] ,  5);
+   FFF(eee, aaa, bbb, ccc, ddd, X[ 0] , 15);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 3] , 13);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 9] , 11);
+   FFF(bbb, ccc, ddd, eee, aaa, X[11] , 11);
+
+   /* combine results */
+   ddd += cc + MDbuf[1];               /* final result for MDbuf[0] */
+   MDbuf[1] = MDbuf[2] + dd + eee;
+   MDbuf[2] = MDbuf[3] + ee + aaa;
+   MDbuf[3] = MDbuf[4] + aa + bbb;
+   MDbuf[4] = MDbuf[0] + bb + ccc;
+   MDbuf[0] = ddd;
+
+   return;
+}
+
+static void MDfinish(u32int *MDbuf, uchar *strptr, u32int lswlen, u32int mswlen)
+{
+   unsigned int i;                                 /* counter       */
+   u32int        X[16];                             /* message words */
+
+   memset(X, 0, 16*sizeof(u32int));
+
+   /* put bytes from strptr into X */
+   for (i=0; i<(lswlen&63); i++) {
+      /* byte i goes into word X[i div 4] at pos.  8*(i mod 4)  */
+      X[i>>2] ^= (u32int) *strptr++ << (8 * (i&3));
+   }
+
+   /* append the bit m_n == 1 */
+   X[(lswlen>>2)&15] ^= (u32int)1 << (8*(lswlen&3) + 7);
+
+   if ((lswlen & 63) > 55) {
+      /* length goes to next block */
+      compress(MDbuf, X);
+      memset(X, 0, 16*sizeof(u32int));
+   }
+
+   /* append length in bits*/
+   X[14] = lswlen << 3;
+   X[15] = (lswlen >> 29) | (mswlen << 3);
+   compress(MDbuf, X);
+
+   return;
+}
+
+DigestState*
+ripemd160(uchar *p, ulong len, uchar *digest, DigestState *s)
+{
+	u32int x[16];
+	int i, j, k;
+
+	if(s == nil){
+		s = malloc(sizeof(*s));
+		if(s == nil)
+			return nil;
+		memset(s, 0, sizeof(*s));
+		s->malloced = 1;
+	}
+
+	if(s->seeded == 0){
+		MDinit(s->state);
+		s->seeded = 1;
+	}
+
+	/* fill out the partial 64 byte block from previous calls */
+	if(s->blen){
+		i = 64 - s->blen;
+		if(len < i)
+			i = len;
+		memmove(s->buf + s->blen, p, i);
+		len -= i;
+		s->blen += i;
+		p += i;
+		if(s->blen == 64){
+			for(i = 0; i < 16; i++)
+				x[i] = BYTES_TO_DWORD(s->buf + i * 4);
+			compress(s->state, x);
+			s->len += s->blen;
+			s->blen = 0;
+		}
+	}
+
+	/* do 64 byte blocks */
+	i = len & ~0x3f;
+	if(i){
+		for(j = 0; j < i; j += 64){
+			for(k = 0; k < 16; k++)
+				x[k] = BYTES_TO_DWORD(p + j + k * 4);
+			compress(s->state, x);
+		}
+		s->len += i;
+		len -= i;
+		p += i;
+	}
+
+	/* save the left overs if not last call */
+	if(digest == 0){
+		if(len){
+			memmove(s->buf, p, len);
+			s->blen += len;
+		}
+		return s;
+	}
+
+	MDfinish(s->state, p, s->len + len, 0);
+	for(i = 0; i < 5; i++){
+		digest[4 * i] = s->state[i];
+		digest[4 * i + 1] = s->state[i] >> 8;
+		digest[4 * i + 2] = s->state[i] >> 16;
+		digest[4 * i + 3] = s->state[i] >> 24;
+
+	}
+	if(s->malloced == 1)
+		free(s);
+	return nil;
+
+}
--- sys/src/libsec/port/rsaalloc.c	Fri Mar 16 03:42:04 2001
+++ /sys/src/libsec/port/rsaalloc.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/rsadecrypt.c	Fri Mar 16 03:42:04 2001
+++ /sys/src/libsec/port/rsadecrypt.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/rsaencrypt.c	Fri Mar 16 03:42:04 2001
+++ /sys/src/libsec/port/rsaencrypt.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/rsafill.c	Mon Feb 17 18:16:11 2003
+++ /sys/src/libsec/port/rsafill.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/rsagen.c	Wed May 12 00:41:09 2004
+++ /sys/src/libsec/port/rsagen.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
@@ -26,9 +27,13 @@
 	// find an e relatively prime to phi
 	t1 = mpnew(0);
 	t2 = mpnew(0);
-	mprand(elen, genrandom, e);
-	if(mpcmp(e,mptwo) <= 0)
-		itomp(3, e);
+	if(elen == 0)
+		itomp(65537, e);
+	else {
+		mprand(elen, genrandom, e);
+		if(mpcmp(e,mptwo) <= 0)
+			itomp(3, e);
+	}
 	// See Menezes et al. p.291 "8.8 Note (selecting primes)" for discussion
 	// of the merits of various choices of primes and exponents.  e=3 is a
 	// common and recommended exponent, but doesn't necessarily work here
@@ -39,6 +44,8 @@
 			break;
 		mpadd(mpone, e, e);
 	}
+	if(d->sign < 0)
+		mpadd(phi, d, d);
 	mpfree(t1);
 	mpfree(t2);
 
--- sys/src/libsec/port/rsaprivtopub.c	Fri Mar 16 03:42:05 2001
+++ /sys/src/libsec/port/rsaprivtopub.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/rsatest.c	Sat May 31 22:53:08 2008
+++ /sys/src/libsec/port/rsatest.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 #include <bio.h>
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/salsa.c	Sat Jun 26 21:21:42 2021
@@ -0,0 +1,310 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/* little-endian data order */
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+
+#define ROTATE(v,c) (t = v, (u32int)(t << (c)) | (t >> (32 - (c))))
+
+#define ENCRYPT(s, x, y, d) {\
+	u32int v; \
+	v = GET4(s); \
+	v ^= (x)+(y); \
+	PUT4(d, v); \
+}
+
+static uchar sigma[16] = "expand 32-byte k";
+static uchar tau[16] = "expand 16-byte k";
+
+static void
+load(u32int *d, uchar *s, int nw)
+{
+	int i;
+
+	for(i = 0; i < nw; i++, s+=4)
+		d[i] = GET4(s);
+}
+
+void
+setupSalsastate(Salsastate *s, uchar *key, ulong keylen, uchar *iv, ulong ivlen, int rounds)
+{
+	if(keylen != 256/8 && keylen != 128/8)
+		sysfatal("invalid salsa key length");
+	if(ivlen != 64/8
+	&& ivlen != 128/8 && ivlen != 192/8)	/* hsalsa, xsalsa */
+		sysfatal("invalid salsa iv length");
+	if(rounds == 0)
+		rounds = 20;
+	s->rounds = rounds;
+	if(keylen == 256/8) { /* recommended */
+		load(&s->input[0],  sigma+4*0, 1);
+		load(&s->input[1],  key +16*0, 4);
+		load(&s->input[5],  sigma+4*1, 1);
+		load(&s->input[10], sigma+4*2, 1);
+		load(&s->input[11], key +16*1, 4);
+		load(&s->input[15], sigma+4*3, 1);
+	}else{
+		load(&s->input[0],  tau +4*0, 1);
+		load(&s->input[1],  key, 4);
+		load(&s->input[5],  tau +4*1, 1);
+		load(&s->input[10], tau +4*2, 1);
+		load(&s->input[11], key, 4);
+		load(&s->input[15], tau +4*3, 1);
+	}
+	s->xkey[0] = s->input[1];
+	s->xkey[1] = s->input[2];
+	s->xkey[2] = s->input[3];
+	s->xkey[3] = s->input[4];
+	s->xkey[4] = s->input[11];
+	s->xkey[5] = s->input[12];
+	s->xkey[6] = s->input[13];
+	s->xkey[7] = s->input[14];
+
+	s->ivwords = ivlen/4;
+	s->input[8] = 0;
+	s->input[9] = 0;
+	if(iv == nil){
+		s->input[6] = 0;
+		s->input[7] = 0;
+	}else
+		salsa_setiv(s, iv);
+}
+
+static void
+dorounds(u32int x[16], int rounds)
+{
+	u32int t;
+
+	for(; rounds > 0; rounds -= 2) {
+	     x[4] ^= ROTATE( x[0]+x[12], 7);
+	     x[8] ^= ROTATE( x[4]+ x[0], 9);
+	    x[12] ^= ROTATE( x[8]+ x[4],13);
+	     x[0] ^= ROTATE(x[12]+ x[8],18);
+	     x[9] ^= ROTATE( x[5]+ x[1], 7);
+	    x[13] ^= ROTATE( x[9]+ x[5], 9);
+	     x[1] ^= ROTATE(x[13]+ x[9],13);
+	     x[5] ^= ROTATE( x[1]+x[13],18);
+	    x[14] ^= ROTATE(x[10]+ x[6], 7);
+	     x[2] ^= ROTATE(x[14]+x[10], 9);
+	     x[6] ^= ROTATE( x[2]+x[14],13);
+	    x[10] ^= ROTATE( x[6]+ x[2],18);
+	     x[3] ^= ROTATE(x[15]+x[11], 7);
+	     x[7] ^= ROTATE( x[3]+x[15], 9);
+	    x[11] ^= ROTATE( x[7]+ x[3],13);
+	    x[15] ^= ROTATE(x[11]+ x[7],18);
+	     x[1] ^= ROTATE( x[0]+ x[3], 7);
+	     x[2] ^= ROTATE( x[1]+ x[0], 9);
+	     x[3] ^= ROTATE( x[2]+ x[1],13);
+	     x[0] ^= ROTATE( x[3]+ x[2],18);
+	     x[6] ^= ROTATE( x[5]+ x[4], 7);
+	     x[7] ^= ROTATE( x[6]+ x[5], 9);
+	     x[4] ^= ROTATE( x[7]+ x[6],13);
+	     x[5] ^= ROTATE( x[4]+ x[7],18);
+	    x[11] ^= ROTATE(x[10]+ x[9], 7);
+	     x[8] ^= ROTATE(x[11]+x[10], 9);
+	     x[9] ^= ROTATE( x[8]+x[11],13);
+	    x[10] ^= ROTATE( x[9]+ x[8],18);
+	    x[12] ^= ROTATE(x[15]+x[14], 7);
+	    x[13] ^= ROTATE(x[12]+x[15], 9);
+	    x[14] ^= ROTATE(x[13]+x[12],13);
+	    x[15] ^= ROTATE(x[14]+x[13],18);
+	}
+}
+
+static void
+hsalsablock(uchar h[32], Salsastate *s)
+{
+	u32int x[16];
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	dorounds(x, s->rounds);
+
+	PUT4(h+0*4, x[0]);
+	PUT4(h+1*4, x[5]);
+	PUT4(h+2*4, x[10]);
+	PUT4(h+3*4, x[15]);
+	PUT4(h+4*4, x[6]);
+	PUT4(h+5*4, x[7]);
+	PUT4(h+6*4, x[8]);
+	PUT4(h+7*4, x[9]);
+}
+
+void
+salsa_setiv(Salsastate *s, uchar *iv)
+{
+	if(s->ivwords == 128/32){
+		/* hsalsa with 128-bit iv */
+		load(&s->input[6], iv, 4);
+		return;
+	}
+	if(s->ivwords == 192/32){
+		/* xsalsa with 192-bit iv */
+		u32int counter[2];
+		uchar h[32];
+
+		counter[0] = s->input[8];
+		counter[1] = s->input[9];
+
+		s->input[1] = s->xkey[0];
+		s->input[2] = s->xkey[1];
+		s->input[3] = s->xkey[2];
+		s->input[4] = s->xkey[3];
+		s->input[11] = s->xkey[4];
+		s->input[12] = s->xkey[5];
+		s->input[13] = s->xkey[6];
+		s->input[14] = s->xkey[7];
+
+		load(&s->input[6], iv, 4);
+
+		hsalsablock(h, s);
+		load(&s->input[1],  h+16*0, 4);
+		load(&s->input[11], h+16*1, 4);
+		memset(h, 0, 32);
+
+		s->input[8] = counter[0];
+		s->input[9] = counter[1];
+
+		iv += 16;
+	}
+	/* 64-bit iv */
+	load(&s->input[6], iv, 2);
+}
+
+void
+salsa_setblock(Salsastate *s, u64int blockno)
+{
+	s->input[8] = blockno;
+	s->input[9] = blockno>>32;
+}
+
+static void
+encryptblock(Salsastate *s, uchar *src, uchar *dst)
+{
+	u32int x[16];
+	int i;
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	dorounds(x, s->rounds);
+
+	for(i=0; i<nelem(x); i+=4){
+		ENCRYPT(src, x[i], s->input[i], dst);
+		ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
+		ENCRYPT(src+8, x[i+2], s->input[i+2], dst+8);
+		ENCRYPT(src+12, x[i+3], s->input[i+3], dst+12);
+		src += 16;
+		dst += 16;
+	}
+
+	if(++s->input[8] == 0)
+		s->input[9]++;
+}
+
+void
+salsa_encrypt2(uchar *src, uchar *dst, ulong bytes, Salsastate *s)
+{
+	uchar tmp[SalsaBsize];
+
+	for(; bytes >= SalsaBsize; bytes -= SalsaBsize){
+		encryptblock(s, src, dst);
+		src += SalsaBsize;
+		dst += SalsaBsize;
+	}
+	if(bytes > 0){
+		memmove(tmp, src, bytes);
+		encryptblock(s, tmp, tmp);
+		memmove(dst, tmp, bytes);
+	}
+}
+
+void
+salsa_encrypt(uchar *buf, ulong bytes, Salsastate *s)
+{
+	salsa_encrypt2(buf, buf, bytes, s);
+}
+
+void
+salsa_core(u32int in[16], u32int out[16], int rounds)
+{
+	u32int x[16];
+
+	x[0] = in[0];
+	x[1] = in[1];
+	x[2] = in[2];
+	x[3] = in[3];
+	x[4] = in[4];
+	x[5] = in[5];
+	x[6] = in[6];
+	x[7] = in[7];
+	x[8] = in[8];
+	x[9] = in[9];
+	x[10] = in[10];
+	x[11] = in[11];
+	x[12] = in[12];
+	x[13] = in[13];
+	x[14] = in[14];
+	x[15] = in[15];
+
+	dorounds(x, rounds);
+
+	out[0] = x[0] + in[0];
+	out[1] = x[1] + in[1];
+	out[2] = x[2] + in[2];
+	out[3] = x[3] + in[3];
+	out[4] = x[4] + in[4];
+	out[5] = x[5] + in[5];
+	out[6] = x[6] + in[6];
+	out[7] = x[7] + in[7];
+	out[8] = x[8] + in[8];
+	out[9] = x[9] + in[9];
+	out[10] = x[10] + in[10];
+	out[11] = x[11] + in[11];
+	out[12] = x[12] + in[12];
+	out[13] = x[13] + in[13];
+	out[14] = x[14] + in[14];
+	out[15] = x[15] + in[15];
+}
+
+void
+hsalsa(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds)
+{
+	Salsastate s[1];
+
+	setupSalsastate(s, key, keylen, nonce, 16, rounds);
+	hsalsablock(h, s);
+	memset(s, 0, sizeof(s));
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/scrypt.c	Sat Jun 26 21:21:42 2021
@@ -0,0 +1,121 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+#define movw(w, S, D)	memmove(D, S, (w)*4)
+
+static void
+xorw(ulong w, u32int *S, u32int *D)
+{
+	for(w /= 8; w; w--, D += 8, S += 8){
+		D[0] ^= S[0];
+		D[1] ^= S[1];
+		D[2] ^= S[2];
+		D[3] ^= S[3];
+		D[4] ^= S[4];
+		D[5] ^= S[5];
+		D[6] ^= S[6];
+		D[7] ^= S[7];
+	}
+}
+
+static void
+scryptBlockMix(ulong R, u32int *B, u32int *Y)
+{
+	u32int X[16];
+	ulong i;
+
+	R *= 2;
+	movw(16, &B[(R-1)*16], X);
+	for(i = 0; i < R; i += 2){
+		xorw(16, &B[i*16], X);
+		salsa_core(X, X, 8);
+		movw(16, X, &Y[i*8]);
+
+		xorw(16, &B[(i+1)*16], X);
+		salsa_core(X, X, 8);
+		movw(16, X, &Y[i*8 + R*8]);
+	}
+}
+
+static void
+scryptROMix(ulong R, ulong N, u32int *V, u32int *X, uchar *B)
+{
+	ulong w, i, d;
+	u32int *Y;
+
+	w = R*32;
+	for(i=0; i<w; i++, B+=4)
+		X[i] = B[0] | (B[1]<<8) | (B[2]<<16) | (B[3]<<24);
+
+	Y = &X[w];
+	for(i=0; i<N; i += 2){
+		movw(w, X, &V[i*w]);
+		scryptBlockMix(R, X, Y);
+
+		movw(w, Y, &V[(i+1)*w]);
+		scryptBlockMix(R, Y, X);
+	}
+	for(i=0; i<N; i += 2){
+		xorw(w, &V[(X[w-16] & (N-1))*w], X);
+		scryptBlockMix(R, X, Y);
+
+		xorw(w, &V[(Y[w-16] & (N-1))*w], Y);
+		scryptBlockMix(R, Y, X);
+	}
+
+	B -= w*4;
+	for(i=0; i<w; i++, B+=4)
+		d = X[i], B[0]=d, B[1]=d>>8, B[2]=d>>16, B[3]=d>>24;
+}
+
+char*
+scrypt(p, plen, s, slen, N, R, P, d, dlen)
+	ulong plen, slen, dlen, N, R, P;
+	uchar *p, *s, *d;
+{
+	static char oom[] = "out of memory";
+
+	ulong rb, i;
+	u32int *V, *X;
+	uchar *B;
+
+	if(P < 1)
+		return "invalid parallelization parameter P";
+	if(R < 1 || R >= (1UL<<(31-7))/P)
+		return "invalid block size parameter R";
+	if(N < 2 || (N & (N-1)) != 0 || N >= (1UL<<(31-7))/R)
+		return "invalid cpu/memory cost parameter N";
+
+	rb = R<<7;
+	if((B = malloc(P*rb)) == nil)
+		return oom;
+	if((V = malloc(N*rb)) == nil){
+		free(B);
+		return oom;
+	}
+	if((X = malloc(2*rb)) == nil){
+		free(V);
+		free(B);
+		return oom;
+	}
+
+	pbkdf2_x(p, plen, s, slen, 1, B, P*rb, hmac_sha2_256, SHA2_256dlen);
+
+	for(i=0; i<P; i++)
+		scryptROMix(R, N, V, X, &B[i*rb]);
+
+	memset(X, 0, 2*rb);
+	free(X);
+
+	memset(V, 0, N*rb);
+	free(V);
+
+	pbkdf2_x(p, plen, B, P*rb, 1, d, dlen, hmac_sha2_256, SHA2_256dlen);
+
+	memset(B, 0, P*rb);
+	free(B);
+
+	return nil;
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/secp256k1.c	Wed Jun 30 17:06:13 2021
@@ -0,0 +1,17 @@
+#include "os.h"
+#include <mp.h>
+
+/* E: y² = x³ + ax + b */
+
+void
+secp256k1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h)
+{
+	/* p = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1 */
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", nil, 16, p);
+	mpassign(mpzero, a);
+	uitomp(7UL, b);
+	strtomp("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", nil, 16, x);
+	strtomp("483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", nil, 16, y);
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", nil, 16, n);
+	mpassign(mpone, h);
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/secp256r1.c	Wed Jun 30 17:03:27 2021
@@ -0,0 +1,18 @@
+#include "os.h"
+#include <mp.h>
+
+/* E: y² = x³ + ax + b */
+
+void
+secp256r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h)
+{
+	/* p = 2^256 - 2^224 + 2^192 + 2^96 - 1 */
+	strtomp("FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", nil, 16, p);
+	uitomp(3UL, a);
+	mpsub(p, a, a);
+	strtomp("5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", nil, 16, b);
+	strtomp("6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", nil, 16, x);
+	strtomp("4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", nil, 16, y);
+	strtomp("FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", nil, 16, n);
+	mpassign(mpone, h);
+}
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/secp384r1.c	Wed Jun 30 17:08:30 2021
@@ -0,0 +1,17 @@
+#include "os.h"
+#include <mp.h>
+
+/* E: y² = x³ + ax + b */
+
+void
+secp384r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h){
+	/* p = 2^384 - 2^128 - 2^96 + 2^32 - 1 */
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", nil, 16, p);
+	uitomp(3UL, a);
+	mpsub(p, a, a);
+	strtomp("B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", nil, 16, b);
+	strtomp("AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", nil, 16, x);
+	strtomp("3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", nil, 16, y);
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", nil, 16, n);
+	mpassign(mpone, h);
+}
--- sys/src/libsec/port/sha1.c	Tue Aug 28 20:13:46 2007
+++ /sys/src/libsec/port/sha1.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 #include <libsec.h>
 
 static void encode(uchar*, u32int*, ulong);
--- sys/src/libsec/port/sha1block.c	Fri Mar  1 21:13:00 2002
+++ /sys/src/libsec/port/sha1block.c	Sat Jun 26 21:21:42 2021
@@ -1,12 +1,18 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+
+#define ROTL(x,n)	(((x)<<n)|((x)>>32-n))
+
+#define F0(x,y,z)	(0x5a827999 + ((z) ^ ((x) & ((y) ^ (z)))))
+#define F1(x,y,z)	(0x6ed9eba1 + ((x) ^ (y) ^ (z)))
+#define F2(x,y,z)	(0x8f1bbcdc + (((x) & (y)) | (((x) | (y)) & (z))))
+#define F3(x,y,z)	(0xca62c1d6 + ((x) ^ (y) ^ (z)))
 
 void
 _sha1block(uchar *p, ulong len, u32int *s)
 {
-	u32int a, b, c, d, e, x;
+	u32int w[16], a, b, c, d, e;
 	uchar *end;
-	u32int *wp, *wend;
-	u32int w[80];
 
 	/* at this point, we have a multiple of 64 bytes */
 	for(end = p+len; p < end;){
@@ -16,168 +22,113 @@
 		d = s[3];
 		e = s[4];
 
-		wend = w + 15;
-		for(wp = w; wp < wend; wp += 5){
-			wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x5a827999 + (((c^d)&b)^d);
-			b = (b<<30)|(b>>2);
-
-			wp[1] = (p[4]<<24) | (p[5]<<16) | (p[6]<<8) | p[7];
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x5a827999 + (((b^c)&a)^c);
-			a = (a<<30)|(a>>2);
-
-			wp[2] = (p[8]<<24) | (p[9]<<16) | (p[10]<<8) | p[11];
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x5a827999 + (((a^b)&e)^b);
-			e = (e<<30)|(e>>2);
-
-			wp[3] = (p[12]<<24) | (p[13]<<16) | (p[14]<<8) | p[15];
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x5a827999 + (((e^a)&d)^a);
-			d = (d<<30)|(d>>2);
-
-			wp[4] = (p[16]<<24) | (p[17]<<16) | (p[18]<<8) | p[19];
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x5a827999 + (((d^e)&c)^e);
-			c = (c<<30)|(c>>2);
-			
-			p += 20;
-		}
-
-		wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
-		e += ((a<<5) | (a>>27)) + wp[0];
-		e += 0x5a827999 + (((c^d)&b)^d);
-		b = (b<<30)|(b>>2);
-
-		x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-		wp[1] = (x<<1) | (x>>31);
-		d += ((e<<5) | (e>>27)) + wp[1];
-		d += 0x5a827999 + (((b^c)&a)^c);
-		a = (a<<30)|(a>>2);
-
-		x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-		wp[2] = (x<<1) | (x>>31);
-		c += ((d<<5) | (d>>27)) + wp[2];
-		c += 0x5a827999 + (((a^b)&e)^b);
-		e = (e<<30)|(e>>2);
-
-		x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-		wp[3] = (x<<1) | (x>>31);
-		b += ((c<<5) | (c>>27)) + wp[3];
-		b += 0x5a827999 + (((e^a)&d)^a);
-		d = (d<<30)|(d>>2);
-
-		x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-		wp[4] = (x<<1) | (x>>31);
-		a += ((b<<5) | (b>>27)) + wp[4];
-		a += 0x5a827999 + (((d^e)&c)^e);
-		c = (c<<30)|(c>>2);
-
-		wp += 5;
-		p += 4;
-
-		wend = w + 40;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x6ed9eba1 + (b^c^d);
-			b = (b<<30)|(b>>2);
-
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x6ed9eba1 + (a^b^c);
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x6ed9eba1 + (e^a^b);
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x6ed9eba1 + (d^e^a);
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x6ed9eba1 + (c^d^e);
-			c = (c<<30)|(c>>2);
-		}
-
-		wend = w + 60;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x8f1bbcdc + ((b&c)|((b|c)&d));
-			b = (b<<30)|(b>>2);
-
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x8f1bbcdc + ((a&b)|((a|b)&c));
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x8f1bbcdc + ((e&a)|((e|a)&b));
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x8f1bbcdc + ((d&e)|((d|e)&a));
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x8f1bbcdc + ((c&d)|((c|d)&e));
-			c = (c<<30)|(c>>2);
-		}
-
-		wend = w + 80;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0xca62c1d6 + (b^c^d);
-			b = (b<<30)|(b>>2);
-
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0xca62c1d6 + (a^b^c);
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0xca62c1d6 + (e^a^b);
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0xca62c1d6 + (d^e^a);
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0xca62c1d6 + (c^d^e);
-			c = (c<<30)|(c>>2);
-		}
+#define STEP(a,b,c,d,e,f,i) \
+	if(i < 16) {\
+		w[i] = p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3]; \
+		p += 4; \
+	} else { \
+		u32int x = w[i-3&15] ^ w[i-8&15] ^ w[i-14&15] ^ w[i-16&15]; \
+		w[i&15] = ROTL(x, 1); \
+	} \
+	e += ROTL(a, 5) + w[i&15] + f(b,c,d); \
+	b = ROTL(b, 30);
+
+		STEP(a,b,c,d,e,F0,0);
+		STEP(e,a,b,c,d,F0,1);
+		STEP(d,e,a,b,c,F0,2);
+		STEP(c,d,e,a,b,F0,3);
+		STEP(b,c,d,e,a,F0,4);
+	
+		STEP(a,b,c,d,e,F0,5);
+		STEP(e,a,b,c,d,F0,6);
+		STEP(d,e,a,b,c,F0,7);
+		STEP(c,d,e,a,b,F0,8);
+		STEP(b,c,d,e,a,F0,9);
+	
+		STEP(a,b,c,d,e,F0,10);
+		STEP(e,a,b,c,d,F0,11);
+		STEP(d,e,a,b,c,F0,12);
+		STEP(c,d,e,a,b,F0,13);
+		STEP(b,c,d,e,a,F0,14);
+	
+		STEP(a,b,c,d,e,F0,15);
+		STEP(e,a,b,c,d,F0,16);
+		STEP(d,e,a,b,c,F0,17);
+		STEP(c,d,e,a,b,F0,18);
+		STEP(b,c,d,e,a,F0,19);
+	
+		STEP(a,b,c,d,e,F1,20);
+		STEP(e,a,b,c,d,F1,21);
+		STEP(d,e,a,b,c,F1,22);
+		STEP(c,d,e,a,b,F1,23);
+		STEP(b,c,d,e,a,F1,24);
+	
+		STEP(a,b,c,d,e,F1,25);
+		STEP(e,a,b,c,d,F1,26);
+		STEP(d,e,a,b,c,F1,27);
+		STEP(c,d,e,a,b,F1,28);
+		STEP(b,c,d,e,a,F1,29);
+	
+		STEP(a,b,c,d,e,F1,30);
+		STEP(e,a,b,c,d,F1,31);
+		STEP(d,e,a,b,c,F1,32);
+		STEP(c,d,e,a,b,F1,33);
+		STEP(b,c,d,e,a,F1,34);
+	
+		STEP(a,b,c,d,e,F1,35);
+		STEP(e,a,b,c,d,F1,36);
+		STEP(d,e,a,b,c,F1,37);
+		STEP(c,d,e,a,b,F1,38);
+		STEP(b,c,d,e,a,F1,39);
+	
+		STEP(a,b,c,d,e,F2,40);
+		STEP(e,a,b,c,d,F2,41);
+		STEP(d,e,a,b,c,F2,42);
+		STEP(c,d,e,a,b,F2,43);
+		STEP(b,c,d,e,a,F2,44);
+	
+		STEP(a,b,c,d,e,F2,45);
+		STEP(e,a,b,c,d,F2,46);
+		STEP(d,e,a,b,c,F2,47);
+		STEP(c,d,e,a,b,F2,48);
+		STEP(b,c,d,e,a,F2,49);
+	
+		STEP(a,b,c,d,e,F2,50);
+		STEP(e,a,b,c,d,F2,51);
+		STEP(d,e,a,b,c,F2,52);
+		STEP(c,d,e,a,b,F2,53);
+		STEP(b,c,d,e,a,F2,54);
+	
+		STEP(a,b,c,d,e,F2,55);
+		STEP(e,a,b,c,d,F2,56);
+		STEP(d,e,a,b,c,F2,57);
+		STEP(c,d,e,a,b,F2,58);
+		STEP(b,c,d,e,a,F2,59);
+	
+		STEP(a,b,c,d,e,F3,60);
+		STEP(e,a,b,c,d,F3,61);
+		STEP(d,e,a,b,c,F3,62);
+		STEP(c,d,e,a,b,F3,63);
+		STEP(b,c,d,e,a,F3,64);
+	
+		STEP(a,b,c,d,e,F3,65);
+		STEP(e,a,b,c,d,F3,66);
+		STEP(d,e,a,b,c,F3,67);
+		STEP(c,d,e,a,b,F3,68);
+		STEP(b,c,d,e,a,F3,69);
+	
+		STEP(a,b,c,d,e,F3,70);
+		STEP(e,a,b,c,d,F3,71);
+		STEP(d,e,a,b,c,F3,72);
+		STEP(c,d,e,a,b,F3,73);
+		STEP(b,c,d,e,a,F3,74);
+	
+		STEP(a,b,c,d,e,F3,75);
+		STEP(e,a,b,c,d,F3,76);
+		STEP(d,e,a,b,c,F3,77);
+		STEP(c,d,e,a,b,F3,78);
+		STEP(b,c,d,e,a,F3,79);
 
-		/* save state */
 		s[0] += a;
 		s[1] += b;
 		s[2] += c;
--- sys/src/libsec/port/sha2block128.c	Fri Mar 26 21:15:52 2010
+++ /sys/src/libsec/port/sha2block128.c	Sat Jun 26 21:21:42 2021
@@ -1,21 +1,21 @@
 /*
- * sha2_512 block cipher
+ * sha2_512 block cipher - unrolled version
  *
- * Implementation straight from Federal Information Processing Standards
- * publication 180-2 (+Change Notice to include SHA-224) August 1, 2002
  *   note: the following upper and lower case macro names are distinct
  *	   and reflect the functions defined in FIPS pub. 180-2.
  */
+
 #include <u.h>
 #include <libc.h>
+#include <mp.h>
 
 #define ROTR(x,n)	(((x) >> (n)) | ((x) << (64-(n))))
 #define sigma0(x)	(ROTR((x),1) ^ ROTR((x),8) ^ ((x) >> 7))
 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x) >> 6))
 #define SIGMA0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
 #define SIGMA1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
-#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
-#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Ch(x,y,z)	((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z)	(((x) | (y)) & ((z) | ((x) & (y))))
 
 /*
  * first 64 bits of the fractional parts of cube roots of
@@ -41,14 +41,13 @@
 	0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL, 0xf57d4f7fee6ed178LL,
 	0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL, 0x1b710b35131c471bLL,
 	0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL, 0x431d67c49c100d4cLL,
-	0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL, 0x6c44198c4a475817LL };
+	0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL, 0x6c44198c4a475817LL
+};
 
 void
 _sha2block128(uchar *p, ulong len, u64int *s)
 {
-	u64int a, b, c, d, e, f, g, h, t1, t2;
-	u64int *kp, *wp;
-	u64int w[80];
+	u64int w[16], a, b, c, d, e, f, g, h;
 	uchar *end;
 
 	/* at this point, we have a multiple of 64 bytes */
@@ -62,33 +61,111 @@
 		g = s[6];
 		h = s[7];
 
-		for(wp = w; wp < &w[16]; wp++, p += 8)
-			wp[0] = ((vlong)p[0])<<56 | ((vlong)p[1])<<48 |
-				((vlong)p[2])<<40 | ((vlong)p[3])<<32 |
-				p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
-		for(; wp < &w[80]; wp++) {
-			u64int s0, s1;
-
-			s0 = sigma0(wp[-15]);
-			s1 = sigma1(wp[-2]);
-//			wp[0] = sigma1(wp[-2]) + wp[-7] + sigma0(wp[-15]) + wp[-16];
-			wp[0] = s1 + wp[-7] + s0 + wp[-16];
-		}
-
-		for(kp = K512, wp = w; wp < &w[80]; ) {
-			t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++;
-			t2 = SIGMA0(a) + Maj(a,b,c);
-			h = g;
-			g = f;
-			f = e;
-			e = d + t1;
-			d = c;
-			c = b;
-			b = a;
-			a = t1 + t2;
-		}
+#define STEP(a,b,c,d,e,f,g,h,i) \
+	if(i < 16) { \
+		w[i] = 	(u64int)(p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3])<<32 | \
+			(p[4]<<24 | p[5]<<16 | p[6]<<8 | p[7]); \
+		p += 8; \
+	} else { \
+		u64int s0, s1; \
+		s1 = sigma1(w[i-2&15]); \
+		s0 = sigma0(w[i-15&15]); \
+		w[i&15] += s1 + w[i-7&15] + s0; \
+	} \
+	h += SIGMA1(e) + Ch(e,f,g) + K512[i] + w[i&15]; \
+	d += h; \
+	h += SIGMA0(a) + Maj(a,b,c);
+
+		STEP(a,b,c,d,e,f,g,h,0);
+		STEP(h,a,b,c,d,e,f,g,1);
+		STEP(g,h,a,b,c,d,e,f,2);
+		STEP(f,g,h,a,b,c,d,e,3);
+		STEP(e,f,g,h,a,b,c,d,4);
+		STEP(d,e,f,g,h,a,b,c,5);
+		STEP(c,d,e,f,g,h,a,b,6);
+		STEP(b,c,d,e,f,g,h,a,7);
+
+		STEP(a,b,c,d,e,f,g,h,8);
+		STEP(h,a,b,c,d,e,f,g,9);
+		STEP(g,h,a,b,c,d,e,f,10);
+		STEP(f,g,h,a,b,c,d,e,11);
+		STEP(e,f,g,h,a,b,c,d,12);
+		STEP(d,e,f,g,h,a,b,c,13);
+		STEP(c,d,e,f,g,h,a,b,14);
+		STEP(b,c,d,e,f,g,h,a,15);
+
+		STEP(a,b,c,d,e,f,g,h,16);
+		STEP(h,a,b,c,d,e,f,g,17);
+		STEP(g,h,a,b,c,d,e,f,18);
+		STEP(f,g,h,a,b,c,d,e,19);
+		STEP(e,f,g,h,a,b,c,d,20);
+		STEP(d,e,f,g,h,a,b,c,21);
+		STEP(c,d,e,f,g,h,a,b,22);
+		STEP(b,c,d,e,f,g,h,a,23);
+
+		STEP(a,b,c,d,e,f,g,h,24);
+		STEP(h,a,b,c,d,e,f,g,25);
+		STEP(g,h,a,b,c,d,e,f,26);
+		STEP(f,g,h,a,b,c,d,e,27);
+		STEP(e,f,g,h,a,b,c,d,28);
+		STEP(d,e,f,g,h,a,b,c,29);
+		STEP(c,d,e,f,g,h,a,b,30);
+		STEP(b,c,d,e,f,g,h,a,31);
+
+		STEP(a,b,c,d,e,f,g,h,32);
+		STEP(h,a,b,c,d,e,f,g,33);
+		STEP(g,h,a,b,c,d,e,f,34);
+		STEP(f,g,h,a,b,c,d,e,35);
+		STEP(e,f,g,h,a,b,c,d,36);
+		STEP(d,e,f,g,h,a,b,c,37);
+		STEP(c,d,e,f,g,h,a,b,38);
+		STEP(b,c,d,e,f,g,h,a,39);
+
+		STEP(a,b,c,d,e,f,g,h,40);
+		STEP(h,a,b,c,d,e,f,g,41);
+		STEP(g,h,a,b,c,d,e,f,42);
+		STEP(f,g,h,a,b,c,d,e,43);
+		STEP(e,f,g,h,a,b,c,d,44);
+		STEP(d,e,f,g,h,a,b,c,45);
+		STEP(c,d,e,f,g,h,a,b,46);
+		STEP(b,c,d,e,f,g,h,a,47);
+
+		STEP(a,b,c,d,e,f,g,h,48);
+		STEP(h,a,b,c,d,e,f,g,49);
+		STEP(g,h,a,b,c,d,e,f,50);
+		STEP(f,g,h,a,b,c,d,e,51);
+		STEP(e,f,g,h,a,b,c,d,52);
+		STEP(d,e,f,g,h,a,b,c,53);
+		STEP(c,d,e,f,g,h,a,b,54);
+		STEP(b,c,d,e,f,g,h,a,55);
+
+		STEP(a,b,c,d,e,f,g,h,56);
+		STEP(h,a,b,c,d,e,f,g,57);
+		STEP(g,h,a,b,c,d,e,f,58);
+		STEP(f,g,h,a,b,c,d,e,59);
+		STEP(e,f,g,h,a,b,c,d,60);
+		STEP(d,e,f,g,h,a,b,c,61);
+		STEP(c,d,e,f,g,h,a,b,62);
+		STEP(b,c,d,e,f,g,h,a,63);
+
+		STEP(a,b,c,d,e,f,g,h,64);
+		STEP(h,a,b,c,d,e,f,g,65);
+		STEP(g,h,a,b,c,d,e,f,66);
+		STEP(f,g,h,a,b,c,d,e,67);
+		STEP(e,f,g,h,a,b,c,d,68);
+		STEP(d,e,f,g,h,a,b,c,69);
+		STEP(c,d,e,f,g,h,a,b,70);
+		STEP(b,c,d,e,f,g,h,a,71);
+
+		STEP(a,b,c,d,e,f,g,h,72);
+		STEP(h,a,b,c,d,e,f,g,73);
+		STEP(g,h,a,b,c,d,e,f,74);
+		STEP(f,g,h,a,b,c,d,e,75);
+		STEP(e,f,g,h,a,b,c,d,76);
+		STEP(d,e,f,g,h,a,b,c,77);
+		STEP(c,d,e,f,g,h,a,b,78);
+		STEP(b,c,d,e,f,g,h,a,79);
 
-		/* save state */
 		s[0] += a;
 		s[1] += b;
 		s[2] += c;
--- sys/src/libsec/port/sha2block64.c	Fri Mar 26 21:15:52 2010
+++ /sys/src/libsec/port/sha2block64.c	Sat Jun 26 21:21:42 2021
@@ -1,22 +1,21 @@
 /*
- * sha2_256 block cipher
+ * sha2_256 block cipher - unrolled version
  *
- * Implementation straight from Federal Information Processing Standards
- * publication 180-2 (+Change Notice to include SHA-224) August 1, 2002
  *   note: the following upper and lower case macro names are distinct
  *	   and reflect the functions defined in FIPS pub. 180-2.
  */
 
 #include <u.h>
 #include <libc.h>
+#include <mp.h>
 
 #define ROTR(x,n)	(((x) >> (n)) | ((x) << (32-(n))))
 #define sigma0(x)	(ROTR((x),7) ^ ROTR((x),18) ^ ((x) >> 3))
 #define sigma1(x)	(ROTR((x),17) ^ ROTR((x),19) ^ ((x) >> 10))
 #define SIGMA0(x)	(ROTR((x),2) ^ ROTR((x),13) ^ ROTR((x),22))
 #define SIGMA1(x)	(ROTR((x),6) ^ ROTR((x),11) ^ ROTR((x),25))
-#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
-#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Ch(x,y,z)	((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z)	(((x) | (y)) & ((z) | ((x) & (y))))
 
 /*
  * first 32 bits of the fractional parts of cube roots of
@@ -44,9 +43,7 @@
 void
 _sha2block64(uchar *p, ulong len, u32int *s)
 {
-	u32int a, b, c, d, e, f, g, h, t1, t2;
-	u32int *kp, *wp;
-	u32int w[64];
+	u32int w[16], a, b, c, d, e, f, g, h;
 	uchar *end;
 
 	/* at this point, we have a multiple of 64 bytes */
@@ -60,26 +57,89 @@
 		g = s[6];
 		h = s[7];
 
-		for(wp = w; wp < &w[16]; wp++, p += 4)
-			wp[0] = p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
-		for(; wp < &w[64]; wp++)
-			wp[0] = sigma1(wp[-2]) + wp[-7] +
-				sigma0(wp[-15]) + wp[-16];
-
-		for(kp = K256, wp = w; wp < &w[64]; ) {
-			t1 = h + SIGMA1(e) + Ch(e,f,g) + *kp++ + *wp++;
-			t2 = SIGMA0(a) + Maj(a,b,c);
-			h = g;
-			g = f;
-			f = e;
-			e = d + t1;
-			d = c;
-			c = b;
-			b = a;
-			a = t1 + t2;
-		}
+#define STEP(a,b,c,d,e,f,g,h,i) \
+	if(i < 16) {\
+		w[i] = p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3]; \
+		p += 4; \
+	} else { \
+		w[i&15] += sigma1(w[i-2&15]) + w[i-7&15] + sigma0(w[i-15&15]); \
+	} \
+	h += SIGMA1(e) + Ch(e,f,g) + K256[i] + w[i&15]; \
+	d += h; \
+	h += SIGMA0(a) + Maj(a,b,c);
+
+		STEP(a,b,c,d,e,f,g,h,0);
+		STEP(h,a,b,c,d,e,f,g,1);
+		STEP(g,h,a,b,c,d,e,f,2);
+		STEP(f,g,h,a,b,c,d,e,3);
+		STEP(e,f,g,h,a,b,c,d,4);
+		STEP(d,e,f,g,h,a,b,c,5);
+		STEP(c,d,e,f,g,h,a,b,6);
+		STEP(b,c,d,e,f,g,h,a,7);
+
+		STEP(a,b,c,d,e,f,g,h,8);
+		STEP(h,a,b,c,d,e,f,g,9);
+		STEP(g,h,a,b,c,d,e,f,10);
+		STEP(f,g,h,a,b,c,d,e,11);
+		STEP(e,f,g,h,a,b,c,d,12);
+		STEP(d,e,f,g,h,a,b,c,13);
+		STEP(c,d,e,f,g,h,a,b,14);
+		STEP(b,c,d,e,f,g,h,a,15);
+
+		STEP(a,b,c,d,e,f,g,h,16);
+		STEP(h,a,b,c,d,e,f,g,17);
+		STEP(g,h,a,b,c,d,e,f,18);
+		STEP(f,g,h,a,b,c,d,e,19);
+		STEP(e,f,g,h,a,b,c,d,20);
+		STEP(d,e,f,g,h,a,b,c,21);
+		STEP(c,d,e,f,g,h,a,b,22);
+		STEP(b,c,d,e,f,g,h,a,23);
+
+		STEP(a,b,c,d,e,f,g,h,24);
+		STEP(h,a,b,c,d,e,f,g,25);
+		STEP(g,h,a,b,c,d,e,f,26);
+		STEP(f,g,h,a,b,c,d,e,27);
+		STEP(e,f,g,h,a,b,c,d,28);
+		STEP(d,e,f,g,h,a,b,c,29);
+		STEP(c,d,e,f,g,h,a,b,30);
+		STEP(b,c,d,e,f,g,h,a,31);
+
+		STEP(a,b,c,d,e,f,g,h,32);
+		STEP(h,a,b,c,d,e,f,g,33);
+		STEP(g,h,a,b,c,d,e,f,34);
+		STEP(f,g,h,a,b,c,d,e,35);
+		STEP(e,f,g,h,a,b,c,d,36);
+		STEP(d,e,f,g,h,a,b,c,37);
+		STEP(c,d,e,f,g,h,a,b,38);
+		STEP(b,c,d,e,f,g,h,a,39);
+
+		STEP(a,b,c,d,e,f,g,h,40);
+		STEP(h,a,b,c,d,e,f,g,41);
+		STEP(g,h,a,b,c,d,e,f,42);
+		STEP(f,g,h,a,b,c,d,e,43);
+		STEP(e,f,g,h,a,b,c,d,44);
+		STEP(d,e,f,g,h,a,b,c,45);
+		STEP(c,d,e,f,g,h,a,b,46);
+		STEP(b,c,d,e,f,g,h,a,47);
+
+		STEP(a,b,c,d,e,f,g,h,48);
+		STEP(h,a,b,c,d,e,f,g,49);
+		STEP(g,h,a,b,c,d,e,f,50);
+		STEP(f,g,h,a,b,c,d,e,51);
+		STEP(e,f,g,h,a,b,c,d,52);
+		STEP(d,e,f,g,h,a,b,c,53);
+		STEP(c,d,e,f,g,h,a,b,54);
+		STEP(b,c,d,e,f,g,h,a,55);
+
+		STEP(a,b,c,d,e,f,g,h,56);
+		STEP(h,a,b,c,d,e,f,g,57);
+		STEP(g,h,a,b,c,d,e,f,58);
+		STEP(f,g,h,a,b,c,d,e,59);
+		STEP(e,f,g,h,a,b,c,d,60);
+		STEP(d,e,f,g,h,a,b,c,61);
+		STEP(c,d,e,f,g,h,a,b,62);
+		STEP(b,c,d,e,f,g,h,a,63);
 
-		/* save state */
 		s[0] += a;
 		s[1] += b;
 		s[2] += c;
--- sys/src/libsec/port/smallprimes.c	Fri Mar 16 03:42:05 2001
+++ /sys/src/libsec/port/smallprimes.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,6 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
 
 ulong smallprimes[1000] = {
 	2,
--- sys/src/libsec/port/smallprimetest.c	Fri Mar 16 03:42:05 2001
+++ /sys/src/libsec/port/smallprimetest.c	Sat Jun 26 21:21:42 2021
@@ -1,4 +1,5 @@
-#include "os.h"
+#include <u.h>
+#include <libc.h>
 #include <mp.h>
 #include <libsec.h>
 
--- sys/src/libsec/port/thumb.c	Fri Apr 26 15:40:52 2002
+++ /sys/src/libsec/port/thumb.c	Sat Jun 26 21:21:42 2021
@@ -1,32 +1,26 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
-#include <auth.h>
 #include <mp.h>
+#include <bio.h>
 #include <libsec.h>
 
 enum{ ThumbTab = 1<<10 };
 
-static void *
-emalloc(int n)
+static Thumbprint*
+tablehead(uchar *hash, Thumbprint *table)
 {
-	void *p;
-	if(n==0)
-		n=1;
-	p = malloc(n);
-	if(p == nil){
-		exits("out of memory");
-	}
-	memset(p, 0, n);
-	return p;
+	return &table[((hash[0]<<8) + hash[1]) & (ThumbTab-1)];
 }
 
 void
 freeThumbprints(Thumbprint *table)
 {
 	Thumbprint *hd, *p, *q;
+
+	if(table == nil)
+		return;
 	for(hd = table; hd < table+ThumbTab; hd++){
-		for(p = hd->next; p; p = q){
+		for(p = hd->next; p && p != hd; p = q){
 			q = p->next;
 			free(p);
 		}
@@ -35,63 +29,144 @@
 }
 
 int
-okThumbprint(uchar *sum, Thumbprint *table)
+okThumbprint(uchar *hash, int len, Thumbprint *table)
 {
-	Thumbprint *p;
-	int i = ((sum[0]<<8) + sum[1]) & (ThumbTab-1);
+	Thumbprint *hd, *p;
 
-	for(p = table[i].next; p; p = p->next)
-		if(memcmp(sum, p->sha1, SHA1dlen) == 0)
+	if(table == nil)
+		return 0;
+	hd = tablehead(hash, table);
+	for(p = hd->next; p; p = p->next){
+		if(p->len == len && memcmp(hash, p->hash, len) == 0)
 			return 1;
+		if(p == hd)
+			break;
+	}
 	return 0;
 }
 
-static void
-loadThumbprints(char *file, Thumbprint *table, Thumbprint *crltab)
+int
+okCertificate(uchar *cert, int len, Thumbprint *table)
 {
-	Thumbprint *entry;
-	Biobuf *bin;
+	uchar hash[SHA2_256dlen];
+	char thumb[2*SHA2_256dlen+1];
+
+	if(table == nil){
+		werrstr("no thumbprints provided");
+		return 0;
+	}
+	if(cert == nil || len <= 0){
+		werrstr("no certificate provided");
+		return 0;
+	}
+
+	sha1(cert, len, hash, nil);
+	if(okThumbprint(hash, SHA1dlen, table))
+		return 1;
+
+	sha2_256(cert, len, hash, nil);
+	if(okThumbprint(hash, SHA2_256dlen, table))
+		return 1;
+
+	if(X509digestSPKI(cert, len, sha2_256, hash) < 0)
+		return 0;
+	if(okThumbprint(hash, SHA2_256dlen, table))
+		return 1;
+
+	len = enc64(thumb, sizeof(thumb), hash, SHA2_256dlen);
+	while(len > 0 && thumb[len-1] == '=')
+		len--;
+	thumb[len] = '\0';
+	werrstr("sha256=%s", thumb);
+
+	return 0;
+}
+
+static int
+loadThumbprints(char *file, char *tag, Thumbprint *table, Thumbprint *crltab, int depth)
+{
+	Thumbprint *hd, *entry;
 	char *line, *field[50];
-	uchar sum[SHA1dlen];
-	int i;
+	uchar hash[SHA2_256dlen];
+	Biobuf *bin;
+	int len, n;
 
-	bin = Bopen(file, OREAD);
-	if(bin == nil)
-		return;
-	for(; (line = Brdstr(bin, '\n', 1)) != 0; free(line)){
+	if(depth > 8){
+		werrstr("too many includes, last file %s", file);
+		return -1;
+	}
+	if(access(file, AEXIST) < 0)
+		return 0;	/* not an error */
+	if((bin = Bopen(file, OREAD|OCEXEC)) == nil)
+		return -1;
+	for(; (line = Brdstr(bin, '\n', 1)) != nil; free(line)){
 		if(tokenize(line, field, nelem(field)) < 2)
 			continue;
 		if(strcmp(field[0], "#include") == 0){
-			loadThumbprints(field[1], table, crltab);
+			if(loadThumbprints(field[1], tag, table, crltab, depth+1) < 0)
+				goto err;
 			continue;
 		}
-		if(strcmp(field[0], "x509") != 0 || strncmp(field[1], "sha1=", strlen("sha1=")) != 0)
+		if(strcmp(field[0], tag) != 0)
+			continue;
+		if(strncmp(field[1], "sha1=", 5) == 0){
+			field[1] += 5;
+			len = SHA1dlen;
+		} else if(strncmp(field[1], "sha256=", 7) == 0){
+			field[1] += 7;
+			len = SHA2_256dlen;
+		} else {
 			continue;
-		field[1] += strlen("sha1=");
-		dec16(sum, sizeof(sum), field[1], strlen(field[1]));
-		if(crltab && okThumbprint(sum, crltab))
+		}
+		n = strlen(field[1]);
+		if((n != len*2 || dec16(hash, len, field[1], n) != len)
+		&& dec64(hash, len, field[1], n) != len){
+			werrstr("malformed %s entry in %s: %s", tag, file, field[1]);
+			goto err;
+		}
+		if(crltab && okThumbprint(hash, len, crltab))
 			continue;
-		entry = (Thumbprint*)emalloc(sizeof(*entry));
-		memcpy(entry->sha1, sum, SHA1dlen);
-		i = ((sum[0]<<8) + sum[1]) & (ThumbTab-1);
-		entry->next = table[i].next;
-		table[i].next = entry;
+		hd = tablehead(hash, table);
+		if(hd->next == nil)
+			entry = hd;
+		else {
+			if((entry = malloc(sizeof(*entry))) == nil)
+				goto err;
+			entry->next = hd->next;
+		}
+		hd->next = entry;
+		entry->len = len;
+		memcpy(entry->hash, hash, len);
 	}
 	Bterm(bin);
+	return 0;
+err:
+	free(line);
+	Bterm(bin);
+	return -1;
 }
 
 Thumbprint *
-initThumbprints(char *ok, char *crl)
+initThumbprints(char *ok, char *crl, char *tag)
 {
-	Thumbprint *table, *crltab = nil;
+	Thumbprint *table, *crltab;
 
+	table = crltab = nil;
 	if(crl){
-		crltab = emalloc(ThumbTab * sizeof(*table));
-		loadThumbprints(crl, crltab, nil);
+		if((crltab = malloc(ThumbTab * sizeof(*crltab))) == nil)
+			goto err;
+		memset(crltab, 0, ThumbTab * sizeof(*crltab));
+		if(loadThumbprints(crl, tag, crltab, nil, 0) < 0)
+			goto err;
+	}
+	if((table = malloc(ThumbTab * sizeof(*table))) == nil)
+		goto err;
+	memset(table, 0, ThumbTab * sizeof(*table));
+	if(loadThumbprints(ok, tag, table, crltab, 0) < 0){
+		freeThumbprints(table);
+		table = nil;
 	}
-	table = emalloc(ThumbTab * sizeof(*table));
-	loadThumbprints(ok, table, crltab);
-	free(crltab);
+err:
+	freeThumbprints(crltab);
 	return table;
 }
-
--- sys/src/libsec/port/tlshand.c	Thu Jul  1 17:28:58 2021
+++ /sys/src/libsec/port/tlshand.c	Sat Jun 26 21:21:42 2021
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include <auth.h>
 #include <mp.h>
 #include <libsec.h>
@@ -17,25 +16,22 @@
 enum {
 	TLSFinishedLen = 12,
 	SSL3FinishedLen = MD5dlen+SHA1dlen,
-	MaxKeyData = 136,	// amount of secret we may need
-	MaxChunk = 1<<14,
+	MaxKeyData = 160,	// amount of secret we may need
+	MAXdlen = SHA2_512dlen,
 	RandomSize = 32,
-	SidSize = 32,
 	MasterSecretSize = 48,
 	AQueue = 0,
 	AFlush = 1,
 };
 
-typedef struct TlsSec TlsSec;
-
 typedef struct Bytes{
 	int len;
-	uchar data[1];  // [len]
+	uchar data[];
 } Bytes;
 
 typedef struct Ints{
 	int len;
-	int data[1];  // [len]
+	int data[];
 } Ints;
 
 typedef struct Algs{
@@ -46,46 +42,65 @@
 	int ok;
 } Algs;
 
+typedef struct Namedcurve{
+	int tlsid;
+	void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+} Namedcurve;
+
 typedef struct Finished{
 	uchar verify[SSL3FinishedLen];
 	int n;
 } Finished;
 
-typedef struct HandHash{
+typedef struct HandshakeHash {
 	MD5state	md5;
 	SHAstate	sha1;
 	SHA2_256state	sha2_256;
-} HandHash;
+} HandshakeHash;
+
+typedef struct TlsSec TlsSec;
+struct TlsSec {
+	RSApub *rsapub;
+	AuthRpc *rpc;	// factotum for rsa private key
+	uchar *psk;	// pre-shared key
+	int psklen;
+	int clientVers;			// version in ClientHello
+	uchar sec[MasterSecretSize];	// master secret
+	uchar crandom[RandomSize];	// client random
+	uchar srandom[RandomSize];	// server random
+
+	// diffie hellman state
+	DHstate dh;
+	struct {
+		ECdomain dom;
+		ECpriv Q;
+	} ec;
+
+	// byte generation and handshake checksum
+	void (*prf)(uchar*, int, uchar*, int, char*, uchar*, int, uchar*, int);
+	void (*setFinished)(TlsSec*, HandshakeHash, uchar*, int);
+	int nfin;
+};
 
 typedef struct TlsConnection{
-	TlsSec *sec;	// security management goo
+	TlsSec sec[1];	// security management goo
 	int hand, ctl;	// record layer file descriptors
 	int erred;		// set when tlsError called
 	int (*trace)(char*fmt, ...); // for debugging
 	int version;	// protocol we are speaking
-	int verset;		// version has been set
-	int ver2hi;		// server got a version 2 hello
-	int isClient;	// is this the client or server?
-	Bytes *sid;		// SessionID
-	Bytes *cert;	// only last - no chain
-
-	Lock statelk;
-	int state;		// must be set using setstate
-
-	// input buffer for handshake messages
-	uchar buf[MaxChunk+8*1024];
-	uchar *rp, *ep;
+	Bytes *cert;	// server certificate; only last - no chain
 
-	uchar crandom[RandomSize];	// client random
-	uchar srandom[RandomSize];	// server random
-	int clientVersion;	// version in ClientHello
-	char *digest;	// name of digest algorithm to use
-	char *enc;		// name of encryption algorithm to use
+	int cipher;
 	int nsecret;	// amount of secret data to init keys
+	char *digest;	// name of digest algorithm to use
+	char *enc;	// name of encryption algorithm to use
 
 	// for finished messages
-	HandHash	hs;	// handshake hash
+	HandshakeHash	handhash;
 	Finished	finished;
+
+	uchar *sendp;
+	uchar buf[1<<16];
 } TlsConnection;
 
 typedef struct Msg{
@@ -97,14 +112,15 @@
 			Bytes*	sid;
 			Ints*	ciphers;
 			Bytes*	compressors;
-			Ints*	sigAlgs;
+			Bytes*	extensions;
 		} clientHello;
 		struct {
 			int version;
-			uchar 	random[RandomSize];
+			uchar	random[RandomSize];
 			Bytes*	sid;
-			int cipher;
-			int compressor;
+			int	cipher;
+			int	compressor;
+			Bytes*	extensions;
 		} serverHello;
 		struct {
 			int ncert;
@@ -112,40 +128,40 @@
 		} certificate;
 		struct {
 			Bytes *types;
+			Ints *sigalgs;
 			int nca;
 			Bytes **cas;
 		} certificateRequest;
 		struct {
+			Bytes *pskid;
 			Bytes *key;
 		} clientKeyExchange;
+		struct {
+			Bytes *pskid;
+			Bytes *dh_p;
+			Bytes *dh_g;
+			Bytes *dh_Ys;
+			Bytes *dh_parameters;
+			Bytes *dh_signature;
+			int sigalg;
+			int curve;
+		} serverKeyExchange;
+		struct {
+			int sigalg;
+			Bytes *signature;
+		} certificateVerify;		
 		Finished finished;
 	} u;
 } Msg;
 
-typedef struct TlsSec{
-	char *server;	// name of remote; nil for server
-	int ok;	// <0 killed; == 0 in progress; >0 reusable
-	RSApub *rsapub;
-	AuthRpc *rpc;	// factotum for rsa private key
-	uchar sec[MasterSecretSize];	// master secret
-	uchar crandom[RandomSize];	// client random
-	uchar srandom[RandomSize];	// server random
-	int clientVers;		// version in ClientHello
-	int vers;			// final version
-	// byte generation and handshake checksum
-	void (*prf)(uchar*, int, uchar*, int, char*, uchar*, int, uchar*, int);
-	void (*setFinished)(TlsSec*, HandHash, uchar*, int);
-	int nfin;
-} TlsSec;
-
 
 enum {
-	SSL3Version  = 0x0300,
-	TLS10Version = 0x0301,
-	TLS11Version = 0x0302,
-	TLS12Version = 0x0303,
-	ProtocolVersion = TLS12Version,	// maximum version we speak
-	MinProtoVersion = 0x0300,	// limits on version we accept
+	SSL3Version	= 0x0300,
+	TLS10Version	= 0x0301,
+	TLS11Version	= 0x0302,
+	TLS12Version	= 0x0303,
+	ProtocolVersion	= TLS12Version,	// maximum version we speak
+	MinProtoVersion	= 0x0300,	// limits on version we accept
 	MaxProtoVersion	= 0x03ff,
 };
 
@@ -189,21 +205,23 @@
 	EProtocolVersion = 70,
 	EInsufficientSecurity = 71,
 	EInternalError = 80,
+	EInappropriateFallback = 86,
 	EUserCanceled = 90,
 	ENoRenegotiation = 100,
+	EUnknownPSKidentity = 115,
 	EMax = 256
 };
 
 // cipher suites
 enum {
-	TLS_NULL_WITH_NULL_NULL	 		= 0x0000,
-	TLS_RSA_WITH_NULL_MD5 			= 0x0001,
-	TLS_RSA_WITH_NULL_SHA 			= 0x0002,
-	TLS_RSA_EXPORT_WITH_RC4_40_MD5 		= 0x0003,
-	TLS_RSA_WITH_RC4_128_MD5 		= 0x0004,
-	TLS_RSA_WITH_RC4_128_SHA 		= 0x0005,
+	TLS_NULL_WITH_NULL_NULL			= 0x0000,
+	TLS_RSA_WITH_NULL_MD5			= 0x0001,
+	TLS_RSA_WITH_NULL_SHA			= 0x0002,
+	TLS_RSA_EXPORT_WITH_RC4_40_MD5		= 0x0003,
+	TLS_RSA_WITH_RC4_128_MD5		= 0x0004,
+	TLS_RSA_WITH_RC4_128_SHA		= 0x0005,
 	TLS_RSA_EXPORT_WITH_RC2_CBC_40_MD5	= 0X0006,
-	TLS_RSA_WITH_IDEA_CBC_SHA 		= 0X0007,
+	TLS_RSA_WITH_IDEA_CBC_SHA		= 0X0007,
 	TLS_RSA_EXPORT_WITH_DES40_CBC_SHA	= 0X0008,
 	TLS_RSA_WITH_DES_CBC_SHA		= 0X0009,
 	TLS_RSA_WITH_3DES_EDE_CBC_SHA		= 0X000A,
@@ -220,12 +238,11 @@
 	TLS_DHE_RSA_WITH_DES_CBC_SHA		= 0X0015,
 	TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA	= 0X0016,
 	TLS_DH_anon_EXPORT_WITH_RC4_40_MD5	= 0x0017,
-	TLS_DH_anon_WITH_RC4_128_MD5 		= 0x0018,
+	TLS_DH_anon_WITH_RC4_128_MD5		= 0x0018,
 	TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA	= 0X0019,
 	TLS_DH_anon_WITH_DES_CBC_SHA		= 0X001A,
 	TLS_DH_anon_WITH_3DES_EDE_CBC_SHA	= 0X001B,
-
-	TLS_RSA_WITH_AES_128_CBC_SHA		= 0X002f,	// aes, aka rijndael with 128 bit blocks
+	TLS_RSA_WITH_AES_128_CBC_SHA		= 0X002F,	// aes, aka rijndael with 128 bit blocks
 	TLS_DH_DSS_WITH_AES_128_CBC_SHA		= 0X0030,
 	TLS_DH_RSA_WITH_AES_128_CBC_SHA		= 0X0031,
 	TLS_DHE_DSS_WITH_AES_128_CBC_SHA	= 0X0032,
@@ -237,7 +254,44 @@
 	TLS_DHE_DSS_WITH_AES_256_CBC_SHA	= 0X0038,
 	TLS_DHE_RSA_WITH_AES_256_CBC_SHA	= 0X0039,
 	TLS_DH_anon_WITH_AES_256_CBC_SHA	= 0X003A,
-	CipherMax
+	TLS_RSA_WITH_AES_128_CBC_SHA256		= 0X003C,
+	TLS_RSA_WITH_AES_256_CBC_SHA256		= 0X003D,
+	TLS_DHE_RSA_WITH_AES_128_CBC_SHA256	= 0X0067,
+
+	TLS_RSA_WITH_AES_128_GCM_SHA256		= 0x009C,
+	TLS_RSA_WITH_AES_256_GCM_SHA384		= 0x009D,
+	TLS_DHE_RSA_WITH_AES_128_GCM_SHA256	= 0x009E,
+	TLS_DHE_RSA_WITH_AES_256_GCM_SHA384	= 0x009F,
+	TLS_DH_RSA_WITH_AES_128_GCM_SHA256	= 0x00A0,
+	TLS_DH_RSA_WITH_AES_256_GCM_SHA384	= 0x00A1,
+	TLS_DHE_DSS_WITH_AES_128_GCM_SHA256	= 0x00A2,
+	TLS_DHE_DSS_WITH_AES_256_GCM_SHA384	= 0x00A3,
+	TLS_DH_DSS_WITH_AES_128_GCM_SHA256	= 0x00A4,
+	TLS_DH_DSS_WITH_AES_256_GCM_SHA384	= 0x00A5,
+	TLS_DH_anon_WITH_AES_128_GCM_SHA256	= 0x00A6,
+	TLS_DH_anon_WITH_AES_256_GCM_SHA384	= 0x00A7,
+
+	TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B,
+	TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256	= 0xC02F,
+
+	TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA	= 0xC013,
+	TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA	= 0xC014,
+	TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256	= 0xC027,
+	TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256	= 0xC023,
+
+	TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305	= 0xCCA8,
+	TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305	= 0xCCA9,
+	TLS_DHE_RSA_WITH_CHACHA20_POLY1305	= 0xCCAA,
+
+	GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305		= 0xCC13,
+	GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305	= 0xCC14,
+	GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305		= 0xCC15,
+
+	TLS_PSK_WITH_CHACHA20_POLY1305		= 0xCCAB,
+	TLS_PSK_WITH_AES_128_CBC_SHA256		= 0x00AE,
+	TLS_PSK_WITH_AES_128_CBC_SHA		= 0x008C,
+
+	TLS_FALLBACK_SCSV = 0x5600,
 };
 
 // compression methods
@@ -246,39 +300,91 @@
 	CompressionMax
 };
 
-// extensions
-enum {
-	ExtSigalgs = 0xd,
-};
-
-// signature algorithms
-enum {
-	RSA_PKCS1_SHA1   = 0x0201,
-	RSA_PKCS1_SHA256 = 0x0401,
-	RSA_PKCS1_SHA384 = 0x0501,
-	RSA_PKCS1_SHA512 = 0x0601
-};
-
 static Algs cipherAlgs[] = {
-	{"rc4_128", "md5", 2*(16+MD5dlen), TLS_RSA_WITH_RC4_128_MD5},
-	{"rc4_128", "sha1", 2*(16+SHA1dlen), TLS_RSA_WITH_RC4_128_SHA},
-	{"3des_ede_cbc", "sha1", 2*(4*8+SHA1dlen), TLS_RSA_WITH_3DES_EDE_CBC_SHA},
+	// ECDHE-ECDSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256},
+
+	// ECDHE-RSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA},
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA},
+
+	// DHE-RSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_DHE_RSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_DHE_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_DHE_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_DHE_RSA_WITH_AES_128_CBC_SHA},
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_DHE_RSA_WITH_AES_256_CBC_SHA},
+	{"3des_ede_cbc","sha1",	2*(4*8+SHA1dlen), TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA},
+
+	// RSA
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_256_cbc", "sha256", 2*(32+16+SHA2_256dlen), TLS_RSA_WITH_AES_256_CBC_SHA256},
 	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_RSA_WITH_AES_128_CBC_SHA},
-	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_RSA_WITH_AES_256_CBC_SHA}
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_RSA_WITH_AES_256_CBC_SHA},
+	{"3des_ede_cbc","sha1",	2*(4*8+SHA1dlen), TLS_RSA_WITH_3DES_EDE_CBC_SHA},
+
+	// PSK
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_PSK_WITH_CHACHA20_POLY1305},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_PSK_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_PSK_WITH_AES_128_CBC_SHA},
 };
 
 static uchar compressors[] = {
 	CompressionNull,
 };
 
-static int sigAlgs[] = {
-	RSA_PKCS1_SHA256,
-	RSA_PKCS1_SHA1,
+static Namedcurve namedcurves[] = {
+	0x0017, secp256r1,
+	0x0018, secp384r1,
+};
+
+static uchar pointformats[] = {
+	CompressionNull /* support of uncompressed point format is mandatory */
+};
+
+static struct {
+	DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*);
+	int len;
+} hashfun[] = {
+/*	[0x00]  is reserved for MD5+SHA1 for < TLS1.2 */
+	[0x01]	{md5,		MD5dlen},
+	[0x02]	{sha1,		SHA1dlen},
+	[0x03]	{sha2_224,	SHA2_224dlen},
+	[0x04]	{sha2_256,	SHA2_256dlen},
+	[0x05]	{sha2_384,	SHA2_384dlen},
+	[0x06]	{sha2_512,	SHA2_512dlen},
 };
 
-static TlsConnection *tlsServer2(int ctl, int hand, uchar *cert, int ncert, int (*trace)(char*fmt, ...), PEMChain *chain);
-static TlsConnection *tlsClient2(int ctl, int hand, uchar *csid, int ncsid, int (*trace)(char*fmt, ...));
+// signature algorithms (only RSA and ECDSA at the moment)
+static int sigalgs[] = {
+	0x0603,		/* SHA512 ECDSA */
+	0x0503,		/* SHA384 ECDSA */
+	0x0403,		/* SHA256 ECDSA */
+	0x0203,		/* SHA1 ECDSA */
+
+	0x0601,		/* SHA512 RSA */
+	0x0501,		/* SHA384 RSA */
+	0x0401,		/* SHA256 RSA */
+	0x0201,		/* SHA1 RSA */
+};
 
+static TlsConnection *tlsServer2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	int (*trace)(char*fmt, ...), PEMChain *chain);
+static TlsConnection *tlsClient2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	uchar *ext, int extlen, int (*trace)(char*fmt, ...));
 static void	msgClear(Msg *m);
 static char* msgPrint(char *buf, int n, Msg *m);
 static int	msgRecv(TlsConnection *c, Msg *m);
@@ -286,54 +392,58 @@
 static void	tlsError(TlsConnection *c, int err, char *msg, ...);
 #pragma	varargck argpos	tlsError 3
 static int setVersion(TlsConnection *c, int version);
+static int setSecrets(TlsConnection *c, int isclient);
 static int finishedMatch(TlsConnection *c, Finished *f);
 static void tlsConnectionFree(TlsConnection *c);
 
+static int isDHE(int tlsid);
+static int isECDHE(int tlsid);
+static int isPSK(int tlsid);
+static int isECDSA(int tlsid);
+
 static int setAlgs(TlsConnection *c, int a);
-static int okCipher(Ints *cv);
+static int okCipher(Ints *cv, int ispsk);
 static int okCompression(Bytes *cv);
 static int initCiphers(void);
-static Ints* makeciphers(void);
+static Ints* makeciphers(int ispsk);
+
+static AuthRpc* factotum_rsa_open(RSApub *rsapub);
+static mpint* factotum_rsa_decrypt(AuthRpc *rpc, mpint *cipher);
+static void factotum_rsa_close(AuthRpc *rpc);
 
-static TlsSec* tlsSecInits(int cvers, uchar *csid, int ncsid, uchar *crandom, uchar *ssid, int *nssid, uchar *srandom);
-static int	tlsSecSecrets(TlsSec *sec, int vers, uchar *epm, int nepm, uchar *kd, int nkd);
-static TlsSec*	tlsSecInitc(int cvers, uchar *crandom);
-static int	tlsSecSecretc(TlsSec *sec, uchar *sid, int nsid, uchar *srandom, uchar *cert, int ncert, int vers, uchar **epm, int *nepm, uchar *kd, int nkd);
-static int	tlsSecFinished(TlsSec *sec, HandHash hs, uchar *fin, int nfin, int isclient);
-static void	tlsSecOk(TlsSec *sec);
-static void	tlsSecKill(TlsSec *sec);
-static void	tlsSecClose(TlsSec *sec);
+static void	tlsSecInits(TlsSec *sec, int cvers, uchar *crandom);
+static int	tlsSecRSAs(TlsSec *sec, Bytes *epm);
+static Bytes*	tlsSecECDHEs1(TlsSec *sec, Namedcurve *nc);
+static int	tlsSecECDHEs2(TlsSec *sec, Bytes *Yc);
+static void	tlsSecInitc(TlsSec *sec, int cvers);
+static Bytes*	tlsSecRSAc(TlsSec *sec, uchar *cert, int ncert);
+static Bytes*	tlsSecDHEc(TlsSec *sec, Bytes *p, Bytes *g, Bytes *Ys);
+static Bytes*	tlsSecECDHEc(TlsSec *sec, int curve, Bytes *Ys);
+static void	tlsSecVers(TlsSec *sec, int v);
+static int	tlsSecFinished(TlsSec *sec, HandshakeHash hsh, uchar *fin, int nfin, int isclient);
 static void	setMasterSecret(TlsSec *sec, Bytes *pm);
-static void	serverMasterSecret(TlsSec *sec, uchar *epm, int nepm);
-static void	setSecrets(TlsSec *sec, uchar *kd, int nkd);
-static int	clientMasterSecret(TlsSec *sec, RSApub *pub, uchar **epm, int *nepm);
-static Bytes *pkcs1_encrypt(Bytes* data, RSApub* key, int blocktype);
-static Bytes *pkcs1_decrypt(TlsSec *sec, uchar *epm, int nepm);
-static void	tlsSetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient);
-static void	tls12SetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient);
-static void	sslSetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient);
-static void	sslPRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label,
-			uchar *seed0, int nseed0, uchar *seed1, int nseed1);
-static int setVers(TlsSec *sec, int version);
+static int	digestDHparams(TlsSec *sec, Bytes *par, uchar digest[MAXdlen], int sigalg);
+static char*	verifyDHparams(TlsSec *sec, Bytes *par, Bytes *cert, Bytes *sig, int sigalg);
 
-static AuthRpc* factotum_rsa_open(uchar *cert, int certlen);
-static mpint* factotum_rsa_decrypt(AuthRpc *rpc, mpint *cipher);
-static void factotum_rsa_close(AuthRpc*rpc);
+static Bytes*	pkcs1_encrypt(Bytes* data, RSApub* key);
+static Bytes*	pkcs1_decrypt(TlsSec *sec, Bytes *data);
+static Bytes*	pkcs1_sign(TlsSec *sec, uchar *digest, int digestlen, int sigalg);
 
 static void* emalloc(int);
 static void* erealloc(void*, int);
 static void put32(uchar *p, u32int);
 static void put24(uchar *p, int);
 static void put16(uchar *p, int);
-static u32int get32(uchar *p);
 static int get24(uchar *p);
 static int get16(uchar *p);
 static Bytes* newbytes(int len);
 static Bytes* makebytes(uchar* buf, int len);
+static Bytes* mptobytes(mpint* big, int len);
+static mpint* bytestomp(Bytes* bytes);
 static void freebytes(Bytes* b);
 static Ints* newints(int len);
-static Ints* makeints(int* buf, int len);
 static void freeints(Ints* b);
+static int lookupid(Ints* b, int id);
 
 //================= client/server ========================
 
@@ -349,7 +459,7 @@
 
 	if(conn == nil)
 		return -1;
-	ctl = open("#a/tls/clone", ORDWR);
+	ctl = open("#a/tls/clone", ORDWR|OCEXEC);
 	if(ctl < 0)
 		return -1;
 	n = read(ctl, buf, sizeof(buf)-1);
@@ -358,40 +468,117 @@
 		return -1;
 	}
 	buf[n] = 0;
-	snprint(conn->dir, sizeof conn->dir, "#a/tls/%s", buf);
-	snprint(dname, sizeof dname, "#a/tls/%s/hand", buf);
-	hand = open(dname, ORDWR);
+	snprint(conn->dir, sizeof(conn->dir), "#a/tls/%s", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/hand", buf);
+	hand = open(dname, ORDWR|OCEXEC);
 	if(hand < 0){
 		close(ctl);
 		return -1;
 	}
+	data = -1;
 	fprint(ctl, "fd %d 0x%x", fd, ProtocolVersion);
-	tls = tlsServer2(ctl, hand, conn->cert, conn->certlen, conn->trace, conn->chain);
-	snprint(dname, sizeof dname, "#a/tls/%s/data", buf);
-	data = open(dname, ORDWR);
-	close(fd);
+	tls = tlsServer2(ctl, hand,
+		conn->cert, conn->certlen,
+		conn->pskID, conn->psk, conn->psklen,
+		conn->trace, conn->chain);
+	if(tls != nil){
+		snprint(dname, sizeof(dname), "#a/tls/%s/data", buf);
+		data = open(dname, ORDWR);
+	}
 	close(hand);
 	close(ctl);
 	if(data < 0){
+		tlsConnectionFree(tls);
 		return -1;
 	}
-	if(tls == nil){
-		close(data);
-		return -1;
-	}
-	if(conn->cert)
-		free(conn->cert);
-	conn->cert = 0;  // client certificates are not yet implemented
+	free(conn->cert);
+	conn->cert = nil;  // client certificates are not yet implemented
 	conn->certlen = 0;
-	conn->sessionIDlen = tls->sid->len;
-	conn->sessionID = emalloc(conn->sessionIDlen);
-	memcpy(conn->sessionID, tls->sid->data, conn->sessionIDlen);
-	if(conn->sessionKey != nil && conn->sessionType != nil && strcmp(conn->sessionType, "ttls") == 0)
-		tls->sec->prf(conn->sessionKey, conn->sessionKeylen, tls->sec->sec, MasterSecretSize, conn->sessionConst,  tls->sec->crandom, RandomSize, tls->sec->srandom, RandomSize);
+	conn->sessionIDlen = 0;
+	conn->sessionID = nil;
+	if(conn->sessionKey != nil
+	&& conn->sessionType != nil
+	&& strcmp(conn->sessionType, "ttls") == 0)
+		tls->sec->prf(
+			conn->sessionKey, conn->sessionKeylen,
+			tls->sec->sec, MasterSecretSize,
+			conn->sessionConst, 
+			tls->sec->crandom, RandomSize,
+			tls->sec->srandom, RandomSize);
 	tlsConnectionFree(tls);
+	close(fd);
 	return data;
 }
 
+static uchar*
+tlsClientExtensions(TLSconn *conn, int *plen)
+{
+	uchar *b, *p;
+	int i, n, m;
+
+	p = b = nil;
+
+	// RFC6066 - Server Name Identification
+	if(conn->serverName != nil){
+		n = strlen(conn->serverName);
+
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+1+2+n);
+		p = b + m;
+
+		put16(p, 0), p += 2;		/* Type: server_name */
+		put16(p, 2+1+2+n), p += 2;	/* Length */
+		put16(p, 1+2+n), p += 2;	/* Server Name list length */
+		*p++ = 0;			/* Server Name Type: host_name */
+		put16(p, n), p += 2;		/* Server Name length */
+		memmove(p, conn->serverName, n);
+		p += n;
+	}
+
+	// ECDHE
+	if(ProtocolVersion >= TLS10Version){
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+nelem(namedcurves)*2 + 2+2+1+nelem(pointformats));
+		p = b + m;
+
+		n = nelem(namedcurves);
+		put16(p, 0x000a), p += 2;	/* Type: elliptic_curves */
+		put16(p, (n+1)*2), p += 2;	/* Length */
+		put16(p, n*2), p += 2;		/* Elliptic Curves Length */
+		for(i=0; i < n; i++){		/* Elliptic curves */
+			put16(p, namedcurves[i].tlsid);
+			p += 2;
+		}
+
+		n = nelem(pointformats);
+		put16(p, 0x000b), p += 2;	/* Type: ec_point_formats */
+		put16(p, n+1), p += 2;		/* Length */
+		*p++ = n;			/* EC point formats Length */
+		for(i=0; i < n; i++)		/* Elliptic curves point formats */
+			*p++ = pointformats[i];
+	}
+
+	// signature algorithms
+	if(ProtocolVersion >= TLS12Version){
+		n = nelem(sigalgs);
+
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+n*2);
+		p = b + m;
+
+		put16(p, 0x000d), p += 2;
+		put16(p, n*2 + 2), p += 2;
+		put16(p, n*2), p += 2;
+		for(i=0; i < n; i++){
+			put16(p, sigalgs[i]);
+			p += 2;
+		}
+	}
+	
+	*plen = p - b;
+	return b;
+}
+
 //	push TLS onto fd, returning new (application) file descriptor
 //		or -1 if error.
 int
@@ -401,10 +588,11 @@
 	char dname[64];
 	int n, data, ctl, hand;
 	TlsConnection *tls;
+	uchar *ext;
 
-	if(!conn)
+	if(conn == nil)
 		return -1;
-	ctl = open("#a/tls/clone", ORDWR);
+	ctl = open("#a/tls/clone", ORDWR|OCEXEC);
 	if(ctl < 0)
 		return -1;
 	n = read(ctl, buf, sizeof(buf)-1);
@@ -413,38 +601,55 @@
 		return -1;
 	}
 	buf[n] = 0;
-	snprint(conn->dir, sizeof conn->dir, "#a/tls/%s", buf);
-	snprint(dname, sizeof dname, "#a/tls/%s/hand", buf);
-	hand = open(dname, ORDWR);
+	snprint(conn->dir, sizeof(conn->dir), "#a/tls/%s", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/hand", buf);
+	hand = open(dname, ORDWR|OCEXEC);
 	if(hand < 0){
 		close(ctl);
 		return -1;
 	}
-	snprint(dname, sizeof dname, "#a/tls/%s/data", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/data", buf);
 	data = open(dname, ORDWR);
-	if(data < 0) {
+	if(data < 0){
 		close(hand);
 		close(ctl);
 		return -1;
 	}
 	fprint(ctl, "fd %d 0x%x", fd, ProtocolVersion);
-	tls = tlsClient2(ctl, hand, conn->sessionID, conn->sessionIDlen, conn->trace);
-	close(fd);
+	ext = tlsClientExtensions(conn, &n);
+	tls = tlsClient2(ctl, hand,
+		conn->cert, conn->certlen, 
+		conn->pskID, conn->psk, conn->psklen,
+		ext, n, conn->trace);
+	free(ext);
 	close(hand);
 	close(ctl);
 	if(tls == nil){
 		close(data);
 		return -1;
 	}
-	conn->certlen = tls->cert->len;
-	conn->cert = emalloc(conn->certlen);
-	memcpy(conn->cert, tls->cert->data, conn->certlen);
-	conn->sessionIDlen = tls->sid->len;
-	conn->sessionID = emalloc(conn->sessionIDlen);
-	memcpy(conn->sessionID, tls->sid->data, conn->sessionIDlen);
-	if(conn->sessionKey != nil && conn->sessionType != nil && strcmp(conn->sessionType, "ttls") == 0)
-		tls->sec->prf(conn->sessionKey, conn->sessionKeylen, tls->sec->sec, MasterSecretSize, conn->sessionConst,  tls->sec->crandom, RandomSize, tls->sec->srandom, RandomSize);
+	free(conn->cert);
+	if(tls->cert != nil){
+		conn->certlen = tls->cert->len;
+		conn->cert = emalloc(conn->certlen);
+		memcpy(conn->cert, tls->cert->data, conn->certlen);
+	} else {
+		conn->certlen = 0;
+		conn->cert = nil;
+	}
+	conn->sessionIDlen = 0;
+	conn->sessionID = nil;
+	if(conn->sessionKey != nil
+	&& conn->sessionType != nil
+	&& strcmp(conn->sessionType, "ttls") == 0)
+		tls->sec->prf(
+			conn->sessionKey, conn->sessionKeylen,
+			tls->sec->sec, MasterSecretSize,
+			conn->sessionConst, 
+			tls->sec->crandom, RandomSize,
+			tls->sec->srandom, RandomSize);
 	tlsConnectionFree(tls);
+	close(fd);
 	return data;
 }
 
@@ -461,24 +666,26 @@
 }
 
 static TlsConnection *
-tlsServer2(int ctl, int hand, uchar *cert, int ncert, int (*trace)(char*fmt, ...), PEMChain *chp)
+tlsServer2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	int (*trace)(char*fmt, ...), PEMChain *chp)
 {
+	int cipher, compressor, numcerts, i;
 	TlsConnection *c;
 	Msg m;
-	Bytes *csid;
-	uchar sid[SidSize], kd[MaxKeyData];
-	char *secrets;
-	int cipher, compressor, nsid, rv, numcerts, i;
 
 	if(trace)
 		trace("tlsServer2\n");
 	if(!initCiphers())
 		return nil;
+
 	c = emalloc(sizeof(TlsConnection));
 	c->ctl = ctl;
 	c->hand = hand;
 	c->trace = trace;
 	c->version = ProtocolVersion;
+	c->sendp = c->buf;
 
 	memset(&m, 0, sizeof(m));
 	if(!msgRecv(c, &m)){
@@ -490,25 +697,19 @@
 		tlsError(c, EUnexpectedMessage, "expected a client hello");
 		goto Err;
 	}
-	c->clientVersion = m.u.clientHello.version;
 	if(trace)
-		trace("ClientHello version %x\n", c->clientVersion);
+		trace("ClientHello version %x\n", m.u.clientHello.version);
 	if(setVersion(c, m.u.clientHello.version) < 0) {
 		tlsError(c, EIllegalParameter, "incompatible version");
 		goto Err;
 	}
-
-	memmove(c->crandom, m.u.clientHello.random, RandomSize);
-	cipher = okCipher(m.u.clientHello.ciphers);
-	if(cipher < 0) {
-		// reply with EInsufficientSecurity if we know that's the case
-		if(cipher == -2)
-			tlsError(c, EInsufficientSecurity, "cipher suites too weak");
-		else
-			tlsError(c, EHandshakeFailure, "no matching cipher suite");
+	if(c->version < ProtocolVersion
+	&& lookupid(m.u.clientHello.ciphers, TLS_FALLBACK_SCSV) >= 0){
+		tlsError(c, EInappropriateFallback, "inappropriate fallback");
 		goto Err;
 	}
-	if(!setAlgs(c, cipher)){
+	cipher = okCipher(m.u.clientHello.ciphers, psklen > 0);
+	if(cipher < 0 || !setAlgs(c, cipher)) {
 		tlsError(c, EHandshakeFailure, "no matching cipher suite");
 		goto Err;
 	}
@@ -517,49 +718,84 @@
 		tlsError(c, EHandshakeFailure, "no matching compressor");
 		goto Err;
 	}
-
-	csid = m.u.clientHello.sid;
 	if(trace)
-		trace("  cipher %d, compressor %d, csidlen %d\n", cipher, compressor, csid->len);
-	c->sec = tlsSecInits(c->clientVersion, csid->data, csid->len, c->crandom, sid, &nsid, c->srandom);
-	if(c->sec == nil){
-		tlsError(c, EHandshakeFailure, "can't initialize security: %r");
-		goto Err;
-	}
-	c->sec->rpc = factotum_rsa_open(cert, ncert);
-	if(c->sec->rpc == nil){
-		tlsError(c, EHandshakeFailure, "factotum_rsa_open: %r");
-		goto Err;
+		trace("  cipher %x, compressor %x\n", cipher, compressor);
+
+	tlsSecInits(c->sec, m.u.clientHello.version, m.u.clientHello.random);
+	tlsSecVers(c->sec, c->version);
+	if(psklen > 0){
+		c->sec->psk = psk;
+		c->sec->psklen = psklen;
+	}
+	if(certlen > 0){
+		/* server certificate */
+		c->sec->rsapub = X509toRSApub(cert, certlen, nil, 0);
+		if(c->sec->rsapub == nil){
+			tlsError(c, EHandshakeFailure, "invalid X509/rsa certificate");
+			goto Err;
+		}
+		c->sec->rpc = factotum_rsa_open(c->sec->rsapub);
+		if(c->sec->rpc == nil){
+			tlsError(c, EHandshakeFailure, "factotum_rsa_open: %r");
+			goto Err;
+		}
 	}
-	c->sec->rsapub = X509toRSApub(cert, ncert, nil, 0);
 	msgClear(&m);
 
 	m.tag = HServerHello;
 	m.u.serverHello.version = c->version;
-	memmove(m.u.serverHello.random, c->srandom, RandomSize);
+	memmove(m.u.serverHello.random, c->sec->srandom, RandomSize);
 	m.u.serverHello.cipher = cipher;
 	m.u.serverHello.compressor = compressor;
-	c->sid = makebytes(sid, nsid);
-	m.u.serverHello.sid = makebytes(c->sid->data, c->sid->len);
+	m.u.serverHello.sid = makebytes(nil, 0);
 	if(!msgSend(c, &m, AQueue))
 		goto Err;
-	msgClear(&m);
 
-	m.tag = HCertificate;
-	numcerts = countchain(chp);
-	m.u.certificate.ncert = 1 + numcerts;
-	m.u.certificate.certs = emalloc(m.u.certificate.ncert * sizeof(Bytes));
-	m.u.certificate.certs[0] = makebytes(cert, ncert);
-	for (i = 0; i < numcerts && chp; i++, chp = chp->next)
-		m.u.certificate.certs[i+1] = makebytes(chp->pem, chp->pemlen);
-	if(!msgSend(c, &m, AQueue))
-		goto Err;
-	msgClear(&m);
+	if(certlen > 0){
+		m.tag = HCertificate;
+		numcerts = countchain(chp);
+		m.u.certificate.ncert = 1 + numcerts;
+		m.u.certificate.certs = emalloc(m.u.certificate.ncert * sizeof(Bytes*));
+		m.u.certificate.certs[0] = makebytes(cert, certlen);
+		for (i = 0; i < numcerts && chp; i++, chp = chp->next)
+			m.u.certificate.certs[i+1] = makebytes(chp->pem, chp->pemlen);
+		if(!msgSend(c, &m, AQueue))
+			goto Err;
+	}
+
+	if(isECDHE(cipher)){
+		Namedcurve *nc = &namedcurves[0];	/* secp256r1 */
+
+		m.tag = HServerKeyExchange;
+		m.u.serverKeyExchange.curve = nc->tlsid;
+		m.u.serverKeyExchange.dh_parameters = tlsSecECDHEs1(c->sec, nc);
+		if(m.u.serverKeyExchange.dh_parameters == nil){
+			tlsError(c, EInternalError, "can't set DH parameters");
+			goto Err;
+		}
+
+		/* sign the DH parameters */
+		if(certlen > 0){
+			uchar digest[MAXdlen];
+			int digestlen;
+
+			if(c->version >= TLS12Version)
+				m.u.serverKeyExchange.sigalg = 0x0401;	/* RSA SHA256 */
+			digestlen = digestDHparams(c->sec, m.u.serverKeyExchange.dh_parameters,
+				digest, m.u.serverKeyExchange.sigalg);
+			if((m.u.serverKeyExchange.dh_signature = pkcs1_sign(c->sec, digest, digestlen,
+				m.u.serverKeyExchange.sigalg)) == nil){
+				tlsError(c, EHandshakeFailure, "pkcs1_sign: %r");
+				goto Err;
+			}
+		}
+		if(!msgSend(c, &m, AQueue))
+			goto Err;
+	}
 
 	m.tag = HServerHelloDone;
 	if(!msgSend(c, &m, AFlush))
 		goto Err;
-	msgClear(&m);
 
 	if(!msgRecv(c, &m))
 		goto Err;
@@ -567,26 +803,40 @@
 		tlsError(c, EUnexpectedMessage, "expected a client key exchange");
 		goto Err;
 	}
-	if(tlsSecSecrets(c->sec, c->version, m.u.clientKeyExchange.key->data, m.u.clientKeyExchange.key->len, kd, c->nsecret) < 0){
-		tlsError(c, EHandshakeFailure, "couldn't set secrets: %r");
+	if(pskid != nil){
+		if(m.u.clientKeyExchange.pskid == nil
+		|| m.u.clientKeyExchange.pskid->len != strlen(pskid)
+		|| memcmp(pskid, m.u.clientKeyExchange.pskid->data, m.u.clientKeyExchange.pskid->len) != 0){
+			tlsError(c, EUnknownPSKidentity, "unknown or missing pskid");
+			goto Err;
+		}
+	}
+	if(isECDHE(cipher)){
+		if(tlsSecECDHEs2(c->sec, m.u.clientKeyExchange.key) < 0){
+			tlsError(c, EHandshakeFailure, "couldn't set keys: %r");
+			goto Err;
+		}
+	} else if(certlen > 0){
+		if(tlsSecRSAs(c->sec, m.u.clientKeyExchange.key) < 0){
+			tlsError(c, EHandshakeFailure, "couldn't set keys: %r");
+			goto Err;
+		}
+	} else if(psklen > 0){
+		setMasterSecret(c->sec, newbytes(psklen));
+	} else {
+		tlsError(c, EInternalError, "no psk or certificate");
 		goto Err;
 	}
+
 	if(trace)
 		trace("tls secrets\n");
-	secrets = (char*)emalloc(2*c->nsecret);
-	enc64(secrets, 2*c->nsecret, kd, c->nsecret);
-	rv = fprint(c->ctl, "secret %s %s 0 %s", c->digest, c->enc, secrets);
-	memset(secrets, 0, 2*c->nsecret);
-	free(secrets);
-	memset(kd, 0, c->nsecret);
-	if(rv < 0){
-		tlsError(c, EHandshakeFailure, "can't set keys: %r");
+	if(setSecrets(c, 0) < 0){
+		tlsError(c, EHandshakeFailure, "can't set secrets: %r");
 		goto Err;
 	}
-	msgClear(&m);
 
 	/* no CertificateVerify; skip to Finished */
-	if(tlsSecFinished(c->sec, c->hs, c->finished.verify, c->finished.n, 1) < 0){
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 1) < 0){
 		tlsError(c, EInternalError, "can't set finished: %r");
 		goto Err;
 	}
@@ -608,7 +858,7 @@
 		goto Err;
 	}
 
-	if(tlsSecFinished(c->sec, c->hs, c->finished.verify, c->finished.n, 0) < 0){
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 0) < 0){
 		tlsError(c, EInternalError, "can't set finished: %r");
 		goto Err;
 	}
@@ -616,58 +866,161 @@
 	m.u.finished = c->finished;
 	if(!msgSend(c, &m, AFlush))
 		goto Err;
-	msgClear(&m);
 	if(trace)
 		trace("tls finished\n");
 
 	if(fprint(c->ctl, "opened") < 0)
 		goto Err;
-	tlsSecOk(c->sec);
 	return c;
 
 Err:
 	msgClear(&m);
 	tlsConnectionFree(c);
-	return 0;
+	return nil;
+}
+
+static Bytes*
+tlsSecDHEc(TlsSec *sec, Bytes *p, Bytes *g, Bytes *Ys)
+{
+	DHstate *dh = &sec->dh;
+	mpint *G, *P, *Y, *K;
+	Bytes *Yc;
+	int n;
+
+	if(p == nil || g == nil || Ys == nil)
+		return nil;
+
+	Yc = nil;
+	P = bytestomp(p);
+	G = bytestomp(g);
+	Y = bytestomp(Ys);
+	K = nil;
+
+	if(dh_new(dh, P, nil, G) == nil)
+		goto Out;
+	n = (mpsignif(P)+7)/8;
+	Yc = mptobytes(dh->y, n);
+	K = dh_finish(dh, Y);	/* zeros dh */
+	if(K == nil){
+		freebytes(Yc);
+		Yc = nil;
+		goto Out;
+	}
+	setMasterSecret(sec, mptobytes(K, n));
+
+Out:
+	mpfree(K);
+	mpfree(Y);
+	mpfree(G);
+	mpfree(P);
+
+	return Yc;
+}
+
+static Bytes*
+tlsSecECDHEc(TlsSec *sec, int curve, Bytes *Ys)
+{
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	Namedcurve *nc;
+	ECpub *pub;
+	ECpoint K;
+	Bytes *Yc;
+	int n;
+
+	if(Ys == nil)
+		return nil;
+	for(nc = namedcurves; nc != &namedcurves[nelem(namedcurves)]; nc++)
+		if(nc->tlsid == curve)
+			goto Found;
+	return nil;
+
+Found:
+	ecdominit(dom, nc->init);
+	pub = ecdecodepub(dom, Ys->data, Ys->len);
+	if(pub == nil)
+		return nil;
+
+	memset(Q, 0, sizeof(*Q));
+	Q->x = mpnew(0);
+	Q->y = mpnew(0);
+	Q->d = mpnew(0);
+
+	memset(&K, 0, sizeof(K));
+	K.x = mpnew(0);
+	K.y = mpnew(0);
+
+	ecgen(dom, Q);
+	ecmul(dom, pub, Q->d, &K);
+
+	n = (mpsignif(dom->p)+7)/8;
+	setMasterSecret(sec, mptobytes(K.x, n));
+	Yc = newbytes(1 + 2*n);
+	Yc->len = ecencodepub(dom, Q, Yc->data, Yc->len);
+
+	mpfree(K.x);
+	mpfree(K.y);
+
+	ecpubfree(pub);
+
+	return Yc;
 }
 
 static TlsConnection *
-tlsClient2(int ctl, int hand, uchar *csid, int ncsid, int (*trace)(char*fmt, ...))
+tlsClient2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	uchar *ext, int extlen,
+	int (*trace)(char*fmt, ...))
 {
+	int creq, dhx, cipher;
 	TlsConnection *c;
+	Bytes *epm;
 	Msg m;
-	uchar kd[MaxKeyData], *epm;
-	char *secrets;
-	int creq, nepm, rv;
 
 	if(!initCiphers())
 		return nil;
+
 	epm = nil;
+	memset(&m, 0, sizeof(m));
 	c = emalloc(sizeof(TlsConnection));
-	c->version = ProtocolVersion;
+
 	c->ctl = ctl;
 	c->hand = hand;
 	c->trace = trace;
-	c->isClient = 1;
-	c->clientVersion = c->version;
+	c->cert = nil;
+	c->sendp = c->buf;
 
-	c->sec = tlsSecInitc(c->clientVersion, c->crandom);
-	if(c->sec == nil)
-		goto Err;
+	c->version = ProtocolVersion;
+	tlsSecInitc(c->sec, c->version);
+	if(psklen > 0){
+		c->sec->psk = psk;
+		c->sec->psklen = psklen;
+	}
+	if(certlen > 0){
+		/* client certificate */
+		c->sec->rsapub = X509toRSApub(cert, certlen, nil, 0);
+		if(c->sec->rsapub == nil){
+			tlsError(c, EInternalError, "invalid X509/rsa certificate");
+			goto Err;
+		}
+		c->sec->rpc = factotum_rsa_open(c->sec->rsapub);
+		if(c->sec->rpc == nil){
+			tlsError(c, EInternalError, "factotum_rsa_open: %r");
+			goto Err;
+		}
+	}
 
 	/* client hello */
-	memset(&m, 0, sizeof(m));
 	m.tag = HClientHello;
-	m.u.clientHello.version = c->clientVersion;
-	memmove(m.u.clientHello.random, c->crandom, RandomSize);
-	m.u.clientHello.sid = makebytes(csid, ncsid);
-	m.u.clientHello.ciphers = makeciphers();
+	m.u.clientHello.version = c->version;
+	memmove(m.u.clientHello.random, c->sec->crandom, RandomSize);
+	m.u.clientHello.sid = makebytes(nil, 0);
+	m.u.clientHello.ciphers = makeciphers(psklen > 0);
 	m.u.clientHello.compressors = makebytes(compressors,sizeof(compressors));
-	if(c->clientVersion >= TLS12Version)
-		m.u.clientHello.sigAlgs = makeints(sigAlgs, nelem(sigAlgs));
+	m.u.clientHello.extensions = makebytes(ext, extlen);
 	if(!msgSend(c, &m, AFlush))
 		goto Err;
-	msgClear(&m);
 
 	/* server hello */
 	if(!msgRecv(c, &m))
@@ -677,16 +1030,14 @@
 		goto Err;
 	}
 	if(setVersion(c, m.u.serverHello.version) < 0) {
-		tlsError(c, EIllegalParameter, "incompatible version %r");
+		tlsError(c, EIllegalParameter, "incompatible version: %r");
 		goto Err;
 	}
-	memmove(c->srandom, m.u.serverHello.random, RandomSize);
-	c->sid = makebytes(m.u.serverHello.sid->data, m.u.serverHello.sid->len);
-	if(c->sid->len != 0 && c->sid->len != SidSize) {
-		tlsError(c, EIllegalParameter, "invalid server session identifier");
-		goto Err;
-	}
-	if(!setAlgs(c, m.u.serverHello.cipher)) {
+	tlsSecVers(c->sec, c->version);
+	memmove(c->sec->srandom, m.u.serverHello.random, RandomSize);
+
+	cipher = m.u.serverHello.cipher;
+	if((psklen > 0) != isPSK(cipher) || !setAlgs(c, cipher)) {
 		tlsError(c, EIllegalParameter, "invalid cipher suite");
 		goto Err;
 	}
@@ -694,35 +1045,61 @@
 		tlsError(c, EIllegalParameter, "invalid compression");
 		goto Err;
 	}
-	msgClear(&m);
 
-	/* certificate */
-	if(!msgRecv(c, &m) || m.tag != HCertificate) {
-		tlsError(c, EUnexpectedMessage, "expected a certificate");
+	dhx = isDHE(cipher) || isECDHE(cipher);
+	if(!msgRecv(c, &m))
 		goto Err;
-	}
-	if(m.u.certificate.ncert < 1) {
-		tlsError(c, EIllegalParameter, "runt certificate");
+	if(m.tag == HCertificate){
+		if(m.u.certificate.ncert < 1) {
+			tlsError(c, EIllegalParameter, "runt certificate");
+			goto Err;
+		}
+		c->cert = makebytes(m.u.certificate.certs[0]->data, m.u.certificate.certs[0]->len);
+		if(!msgRecv(c, &m))
+			goto Err;
+	} else if(psklen == 0) {
+		tlsError(c, EUnexpectedMessage, "expected a certificate");
 		goto Err;
 	}
-	c->cert = makebytes(m.u.certificate.certs[0]->data, m.u.certificate.certs[0]->len);
-	msgClear(&m);
-
-	/* server key exchange (optional) */
-	if(!msgRecv(c, &m))
-		goto Err;
 	if(m.tag == HServerKeyExchange) {
-		tlsError(c, EUnexpectedMessage, "got an server key exchange");
+		if(dhx){
+			char *err = verifyDHparams(c->sec,
+				m.u.serverKeyExchange.dh_parameters,
+				c->cert,
+				m.u.serverKeyExchange.dh_signature,
+				c->version<TLS12Version ? 0x01 : m.u.serverKeyExchange.sigalg);
+			if(err != nil){
+				tlsError(c, EBadCertificate, "can't verify DH parameters: %s", err);
+				goto Err;
+			}
+			if(isECDHE(cipher))
+				epm = tlsSecECDHEc(c->sec,
+					m.u.serverKeyExchange.curve,
+					m.u.serverKeyExchange.dh_Ys);
+			else
+				epm = tlsSecDHEc(c->sec,
+					m.u.serverKeyExchange.dh_p, 
+					m.u.serverKeyExchange.dh_g,
+					m.u.serverKeyExchange.dh_Ys);
+			if(epm == nil){
+				tlsError(c, EHandshakeFailure, "bad DH parameters");
+				goto Err;
+			}
+		} else if(psklen == 0){
+			tlsError(c, EUnexpectedMessage, "got an server key exchange");
+			goto Err;
+		}
+		if(!msgRecv(c, &m))
+			goto Err;
+	} else if(dhx){
+		tlsError(c, EUnexpectedMessage, "expected server key exchange");
 		goto Err;
-		// If implementing this later, watch out for rollback attack
-		// described in Wagner Schneier 1996, section 4.4.
 	}
 
 	/* certificate request (optional) */
 	creq = 0;
 	if(m.tag == HCertificateRequest) {
 		creq = 1;
-		msgClear(&m);
 		if(!msgRecv(c, &m))
 			goto Err;
 	}
@@ -733,43 +1110,81 @@
 	}
 	msgClear(&m);
 
-	if(tlsSecSecretc(c->sec, c->sid->data, c->sid->len, c->srandom,
-			c->cert->data, c->cert->len, c->version, &epm, &nepm,
-			kd, c->nsecret) < 0){
-		tlsError(c, EBadCertificate, "invalid x509/rsa certificate");
-		goto Err;
+	if(!dhx){
+		if(c->cert != nil){
+			epm = tlsSecRSAc(c->sec, c->cert->data, c->cert->len);
+			if(epm == nil){
+				tlsError(c, EBadCertificate, "bad certificate: %r");
+				goto Err;
+			}
+		} else if(psklen > 0){
+			setMasterSecret(c->sec, newbytes(psklen));
+		} else {
+			tlsError(c, EInternalError, "no psk or certificate");
+			goto Err;
+		}
 	}
-	secrets = (char*)emalloc(2*c->nsecret);
-	enc64(secrets, 2*c->nsecret, kd, c->nsecret);
-	rv = fprint(c->ctl, "secret %s %s 1 %s", c->digest, c->enc, secrets);
-	memset(secrets, 0, 2*c->nsecret);
-	free(secrets);
-	memset(kd, 0, c->nsecret);
-	if(rv < 0){
-		tlsError(c, EHandshakeFailure, "can't set keys: %r");
+
+	if(trace)
+		trace("tls secrets\n");
+	if(setSecrets(c, 1) < 0){
+		tlsError(c, EHandshakeFailure, "can't set secrets: %r");
 		goto Err;
 	}
 
 	if(creq) {
-		/* send a zero length certificate */
 		m.tag = HCertificate;
+		if(certlen > 0){
+			m.u.certificate.ncert = 1;
+			m.u.certificate.certs = emalloc(m.u.certificate.ncert * sizeof(Bytes*));
+			m.u.certificate.certs[0] = makebytes(cert, certlen);
+		}		
 		if(!msgSend(c, &m, AFlush))
 			goto Err;
-		msgClear(&m);
 	}
 
 	/* client key exchange */
 	m.tag = HClientKeyExchange;
-	m.u.clientKeyExchange.key = makebytes(epm, nepm);
-	free(epm);
-	epm = nil;
-	if(m.u.clientKeyExchange.key == nil) {
-		tlsError(c, EHandshakeFailure, "can't set secret: %r");
-		goto Err;
+	if(psklen > 0){
+		if(pskid == nil)
+			pskid = "";
+		m.u.clientKeyExchange.pskid = makebytes((uchar*)pskid, strlen(pskid));
 	}
+	m.u.clientKeyExchange.key = epm;
+	epm = nil;
+	 
 	if(!msgSend(c, &m, AFlush))
 		goto Err;
-	msgClear(&m);
+
+	/* certificate verify */
+	if(creq && certlen > 0) {
+		HandshakeHash hsave;
+		uchar digest[MAXdlen];
+		int digestlen;
+
+		/* save the state for the Finish message */
+		hsave = c->handhash;
+		if(c->version < TLS12Version){
+			md5(nil, 0, digest, &c->handhash.md5);
+			sha1(nil, 0, digest+MD5dlen, &c->handhash.sha1);
+			digestlen = MD5dlen+SHA1dlen;
+		} else {
+			m.u.certificateVerify.sigalg = 0x0401;	/* RSA SHA256 */
+			sha2_256(nil, 0, digest, &c->handhash.sha2_256);
+			digestlen = SHA2_256dlen;
+		}
+		c->handhash = hsave;
+
+		if((m.u.certificateVerify.signature = pkcs1_sign(c->sec, digest, digestlen,
+			m.u.certificateVerify.sigalg)) == nil){
+			tlsError(c, EHandshakeFailure, "pkcs1_sign: %r");
+			goto Err;
+		}
+
+		m.tag = HCertificateVerify;
+		if(!msgSend(c, &m, AFlush))
+			goto Err;
+	} 
 
 	/* change cipher spec */
 	if(fprint(c->ctl, "changecipher") < 0){
@@ -779,32 +1194,26 @@
 
 	// Cipherchange must occur immediately before Finished to avoid
 	// potential hole;  see section 4.3 of Wagner Schneier 1996.
-	if(tlsSecFinished(c->sec, c->hs, c->finished.verify, c->finished.n, 1) < 0){
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 1) < 0){
 		tlsError(c, EInternalError, "can't set finished 1: %r");
 		goto Err;
 	}
 	m.tag = HFinished;
 	m.u.finished = c->finished;
-
 	if(!msgSend(c, &m, AFlush)) {
-		fprint(2, "tlsClient nepm=%d\n", nepm);
 		tlsError(c, EInternalError, "can't flush after client Finished: %r");
 		goto Err;
 	}
-	msgClear(&m);
 
-	if(tlsSecFinished(c->sec, c->hs, c->finished.verify, c->finished.n, 0) < 0){
-		fprint(2, "tlsClient nepm=%d\n", nepm);
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 0) < 0){
 		tlsError(c, EInternalError, "can't set finished 0: %r");
 		goto Err;
 	}
 	if(!msgRecv(c, &m)) {
-		fprint(2, "tlsClient nepm=%d\n", nepm);
 		tlsError(c, EInternalError, "can't read server Finished: %r");
 		goto Err;
 	}
 	if(m.tag != HFinished) {
-		fprint(2, "tlsClient nepm=%d\n", nepm);
 		tlsError(c, EUnexpectedMessage, "expected a Finished msg from server");
 		goto Err;
 	}
@@ -820,43 +1229,37 @@
 			trace("unable to do final open: %r\n");
 		goto Err;
 	}
-	tlsSecOk(c->sec);
 	return c;
 
 Err:
 	free(epm);
 	msgClear(&m);
 	tlsConnectionFree(c);
-	return 0;
+	return nil;
 }
 
 
 //================= message functions ========================
 
-static uchar sendbuf[9000], *sendp;
-
 static void
 msgHash(TlsConnection *c, uchar *p, int n)
 {
-	md5(p, n, 0, &c->hs.md5);
-	sha1(p, n, 0, &c->hs.sha1);
+	md5(p, n, 0, &c->handhash.md5);
+	sha1(p, n, 0, &c->handhash.sha1);
 	if(c->version >= TLS12Version)
-		sha2_256(p, n, 0, &c->hs.sha2_256);
-	else
-		memset(&c->hs.sha2_256, 0, sizeof c->hs.sha2_256);
+		sha2_256(p, n, 0, &c->handhash.sha2_256);
 }
 
 static int
 msgSend(TlsConnection *c, Msg *m, int act)
 {
-	uchar *p; // sendp = start of new message;  p = write pointer
-	int nn, n, i;
+	uchar *p, *e; // sendp = start of new message;  p = write pointer; e = end pointer
+	int n, i;
 
-	if(sendp == nil)
-		sendp = sendbuf;
-	p = sendp;
+	p = c->sendp;
+	e = &c->buf[sizeof(c->buf)];
 	if(c->trace)
-		c->trace("send %s", msgPrint((char*)p, (sizeof sendbuf) - (p-sendbuf), m));
+		c->trace("send %s", msgPrint((char*)p, e - p, m));
 
 	p[0] = m->tag;	// header - fill in size later
 	p += 4;
@@ -866,123 +1269,162 @@
 		tlsError(c, EInternalError, "can't encode a %d", m->tag);
 		goto Err;
 	case HClientHello:
-		// version
-		put16(p, m->u.clientHello.version);
-		p += 2;
-
-		// random
+		if(p+2+RandomSize > e)
+			goto Overflow;
+		put16(p, m->u.clientHello.version), p += 2;
 		memmove(p, m->u.clientHello.random, RandomSize);
 		p += RandomSize;
 
-		// sid
-		n = m->u.clientHello.sid->len;
-		assert(n < 256);
-		p[0] = n;
-		memmove(p+1, m->u.clientHello.sid->data, n);
-		p += n+1;
-
-		n = m->u.clientHello.ciphers->len;
-		assert(n > 0 && n < 200);
-		put16(p, n*2);
-		p += 2;
-		for(i=0; i<n; i++) {
-			put16(p, m->u.clientHello.ciphers->data[i]);
-			p += 2;
-		}
+		if(p+1+(n = m->u.clientHello.sid->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.clientHello.sid->data, n);
+		p += n;
 
-		n = m->u.clientHello.compressors->len;
-		assert(n > 0);
-		p[0] = n;
-		memmove(p+1, m->u.clientHello.compressors->data, n);
-		p += n+1;
-
-		if(m->u.clientHello.sigAlgs != nil) {
-			n = m->u.clientHello.sigAlgs->len;
-			put16(p, 6 + 2*n);   /* length of extensions */
-			put16(p+2, ExtSigalgs);
-			put16(p+4, 2 + 2*n); /* length of extension content */
-			put16(p+6, 2*n);     /* length of algorithm list */
-			p += 8;
-			for(i = 0; i < n; i++) {
-				put16(p, m->u.clientHello.sigAlgs->data[i]);
-				p += 2;
-			}
-		}
+		if(p+2+(n = m->u.clientHello.ciphers->len) > e)
+			goto Overflow;
+		put16(p, n*2), p += 2;
+		for(i=0; i<n; i++)
+			put16(p, m->u.clientHello.ciphers->data[i]), p += 2;
+
+		if(p+1+(n = m->u.clientHello.compressors->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.clientHello.compressors->data, n);
+		p += n;
+
+		if(m->u.clientHello.extensions == nil
+		|| (n = m->u.clientHello.extensions->len) == 0)
+			break;
+		if(p+2+n > e)
+			goto Overflow;
+		put16(p, n), p += 2;
+		memmove(p, m->u.clientHello.extensions->data, n);
+		p += n;
 		break;
 	case HServerHello:
-		put16(p, m->u.serverHello.version);
-		p += 2;
-
-		// random
+		if(p+2+RandomSize > e)
+			goto Overflow;
+		put16(p, m->u.serverHello.version), p += 2;
 		memmove(p, m->u.serverHello.random, RandomSize);
 		p += RandomSize;
 
-		// sid
-		n = m->u.serverHello.sid->len;
-		assert(n < 256);
-		p[0] = n;
-		memmove(p+1, m->u.serverHello.sid->data, n);
-		p += n+1;
-
-		put16(p, m->u.serverHello.cipher);
-		p += 2;
-		p[0] = m->u.serverHello.compressor;
-		p += 1;
+		if(p+1+(n = m->u.serverHello.sid->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.serverHello.sid->data, n);
+		p += n;
+
+		if(p+2+1 > e)
+			goto Overflow;
+		put16(p, m->u.serverHello.cipher), p += 2;
+		*p++ = m->u.serverHello.compressor;
+
+		if(m->u.serverHello.extensions == nil
+		|| (n = m->u.serverHello.extensions->len) == 0)
+			break;
+		if(p+2+n > e)
+			goto Overflow;
+		put16(p, n), p += 2;
+		memmove(p, m->u.serverHello.extensions->data, n);
+		p += n;
 		break;
 	case HServerHelloDone:
 		break;
 	case HCertificate:
-		nn = 0;
+		n = 0;
 		for(i = 0; i < m->u.certificate.ncert; i++)
-			nn += 3 + m->u.certificate.certs[i]->len;
-		if(p + 3 + nn - sendbuf > sizeof(sendbuf)) {
-			tlsError(c, EInternalError, "output buffer too small for certificate");
-			goto Err;
-		}
-		put24(p, nn);
-		p += 3;
+			n += 3 + m->u.certificate.certs[i]->len;
+		if(p+3+n > e)
+			goto Overflow;
+		put24(p, n), p += 3;
 		for(i = 0; i < m->u.certificate.ncert; i++){
-			put24(p, m->u.certificate.certs[i]->len);
-			p += 3;
-			memmove(p, m->u.certificate.certs[i]->data, m->u.certificate.certs[i]->len);
-			p += m->u.certificate.certs[i]->len;
+			n = m->u.certificate.certs[i]->len;
+			put24(p, n), p += 3;
+			memmove(p, m->u.certificate.certs[i]->data, n);
+			p += n;
+		}
+		break;
+	case HCertificateVerify:
+		if(p+2+2+(n = m->u.certificateVerify.signature->len) > e)
+			goto Overflow;
+		if(m->u.certificateVerify.sigalg != 0)
+			put16(p, m->u.certificateVerify.sigalg), p += 2;
+		put16(p, n), p += 2;
+		memmove(p, m->u.certificateVerify.signature->data, n);
+		p += n;
+		break;
+	case HServerKeyExchange:
+		if(m->u.serverKeyExchange.pskid != nil){
+			if(p+2+(n = m->u.serverKeyExchange.pskid->len) > e)
+				goto Overflow;
+			put16(p, n), p += 2;
+			memmove(p, m->u.serverKeyExchange.pskid->data, n);
+			p += n;
 		}
+		if(m->u.serverKeyExchange.dh_parameters == nil)
+			break;
+		if(p+(n = m->u.serverKeyExchange.dh_parameters->len) > e)
+			goto Overflow;
+		memmove(p, m->u.serverKeyExchange.dh_parameters->data, n);
+		p += n;
+		if(m->u.serverKeyExchange.dh_signature == nil)
+			break;
+		if(p+2+2+(n = m->u.serverKeyExchange.dh_signature->len) > e)
+			goto Overflow;
+		if(c->version >= TLS12Version)
+			put16(p, m->u.serverKeyExchange.sigalg), p += 2;
+		put16(p, n), p += 2;
+		memmove(p, m->u.serverKeyExchange.dh_signature->data, n);
+		p += n;
 		break;
 	case HClientKeyExchange:
-		n = m->u.clientKeyExchange.key->len;
-		if(c->version != SSL3Version){
-			put16(p, n);
-			p += 2;
+		if(m->u.clientKeyExchange.pskid != nil){
+			if(p+2+(n = m->u.clientKeyExchange.pskid->len) > e)
+				goto Overflow;
+			put16(p, n), p += 2;
+			memmove(p, m->u.clientKeyExchange.pskid->data, n);
+			p += n;
 		}
+		if(m->u.clientKeyExchange.key == nil)
+			break;
+		if(p+2+(n = m->u.clientKeyExchange.key->len) > e)
+			goto Overflow;
+		if(isECDHE(c->cipher))
+			*p++ = n;
+		else if(isDHE(c->cipher) || c->version != SSL3Version)
+			put16(p, n), p += 2;
 		memmove(p, m->u.clientKeyExchange.key->data, n);
 		p += n;
 		break;
 	case HFinished:
+		if(p+m->u.finished.n > e)
+			goto Overflow;
 		memmove(p, m->u.finished.verify, m->u.finished.n);
 		p += m->u.finished.n;
 		break;
 	}
 
 	// go back and fill in size
-	n = p-sendp;
-	assert(p <= sendbuf+sizeof(sendbuf));
-	put24(sendp+1, n-4);
+	n = p - c->sendp;
+	put24(c->sendp+1, n-4);
 
 	// remember hash of Handshake messages
-	if(m->tag != HHelloRequest) {
-		msgHash(c, sendp, n);
-	}
+	if(m->tag != HHelloRequest)
+		msgHash(c, c->sendp, n);
 
-	sendp = p;
+	c->sendp = p;
 	if(act == AFlush){
-		sendp = sendbuf;
-		if(write(c->hand, sendbuf, p-sendbuf) < 0){
+		c->sendp = c->buf;
+		if(write(c->hand, c->buf, p - c->buf) < 0){
 			fprint(2, "write error: %r\n");
 			goto Err;
 		}
 	}
 	msgClear(m);
 	return 1;
+Overflow:
+	tlsError(c, EInternalError, "not enougth send buffer for message (%d)", m->tag);
 Err:
 	msgClear(m);
 	return 0;
@@ -991,34 +1433,27 @@
 static uchar*
 tlsReadN(TlsConnection *c, int n)
 {
-	uchar *p;
-	int nn, nr;
+	uchar *p, *w, *e;
 
-	nn = c->ep - c->rp;
-	if(nn < n){
-		if(c->rp != c->buf){
-			memmove(c->buf, c->rp, nn);
-			c->rp = c->buf;
-			c->ep = &c->buf[nn];
-		}
-		for(; nn < n; nn += nr) {
-			nr = read(c->hand, &c->rp[nn], n - nn);
-			if(nr <= 0)
-				return nil;
-			c->ep += nr;
-		}
+	e = &c->buf[sizeof(c->buf)];
+	p = e - n;
+	if(n > sizeof(c->buf) || p < c->sendp){
+		tlsError(c, EDecodeError, "handshake message too long %d", n);
+		return nil;
 	}
-	p = c->rp;
-	c->rp += n;
+	for(w = p; w < e; w += n)
+		if((n = read(c->hand, w, e - w)) <= 0)
+			return nil;
 	return p;
 }
 
 static int
 msgRecv(TlsConnection *c, Msg *m)
 {
-	uchar *p;
-	int type, n, nn, nx, i, nsid, nrandom, nciph;
+	uchar *p, *s;
+	int type, n, nn, i;
 
+	msgClear(m);
 	for(;;) {
 		p = tlsReadN(c, 4);
 		if(p == nil)
@@ -1034,15 +1469,12 @@
 		}
 	}
 
-	if(n > sizeof(c->buf)) {
-		tlsError(c, EDecodeError, "handshake message too long %d %d", n, sizeof(c->buf));
-		return 0;
-	}
-
 	if(type == HSSL2ClientHello){
 		/* Cope with an SSL3 ClientHello expressed in SSL2 record format.
 			This is sent by some clients that we must interoperate
 			with, such as Java's JSSE and Microsoft's Internet Explorer. */
+		int nsid, nrandom, nciph;
+
 		p = tlsReadN(c, n);
 		if(p == nil)
 			return 0;
@@ -1059,10 +1491,8 @@
 		p += 6;
 		n -= 6;
 		if(nsid != 0 	/* no sid's, since shouldn't restart using ssl2 header */
-				|| nrandom < 16 || nn % 3)
+		|| nrandom < 16 || nn % 3 || n - nrandom < nn)
 			goto Err;
-		if(c->trace && (n - nrandom != nn))
-			c->trace("n-nrandom!=nn: n=%d nrandom=%d nn=%d\n", n, nrandom, nn);
 		/* ignore ssl2 ciphers and look for {0x00, ssl3 cipher} */
 		nciph = 0;
 		for(i = 0; i < nn; i += 3)
@@ -1083,7 +1513,6 @@
 		m->u.clientHello.compressors->data[0] = CompressionNull;
 		goto Ok;
 	}
-
 	msgHash(c, p, 4);
 
 	p = tlsReadN(c, n);
@@ -1102,14 +1531,12 @@
 		if(n < 2)
 			goto Short;
 		m->u.clientHello.version = get16(p);
-		p += 2;
-		n -= 2;
+		p += 2, n -= 2;
 
 		if(n < RandomSize)
 			goto Short;
 		memmove(m->u.clientHello.random, p, RandomSize);
-		p += RandomSize;
-		n -= RandomSize;
+		p += RandomSize, n -= RandomSize;
 		if(n < 1 || n < p[0]+1)
 			goto Short;
 		m->u.clientHello.sid = makebytes(p+1, p[0]);
@@ -1119,69 +1546,39 @@
 		if(n < 2)
 			goto Short;
 		nn = get16(p);
-		p += 2;
-		n -= 2;
+		p += 2, n -= 2;
 
 		if((nn & 1) || n < nn || nn < 2)
 			goto Short;
 		m->u.clientHello.ciphers = newints(nn >> 1);
 		for(i = 0; i < nn; i += 2)
 			m->u.clientHello.ciphers->data[i >> 1] = get16(&p[i]);
-		p += nn;
-		n -= nn;
+		p += nn, n -= nn;
 
 		if(n < 1 || n < p[0]+1 || p[0] == 0)
 			goto Short;
 		nn = p[0];
-		m->u.clientHello.compressors = newbytes(nn);
-		memmove(m->u.clientHello.compressors->data, p+1, nn);
-		p += nn + 1;
-		n -= nn + 1;
+		m->u.clientHello.compressors = makebytes(p+1, nn);
+		p += nn + 1, n -= nn + 1;
 
-		/* extensions */
-		if(n == 0)
-			break;
 		if(n < 2)
+			break;
+		nn = get16(p);
+		if(nn > n-2)
 			goto Short;
-		nx = get16(p);
-		p += 2;
-		n -= 2;
-		while(nx > 0){
-			if(n < nx || nx < 4)
-				goto Short;
-			i = get16(p);
-			nn = get16(p+2);
-			if(nx < nn+4)
-				goto Short;
-			nx -= nn+4;
-			p += 4;
-			n -= 4;
-			if(i == ExtSigalgs){
-				if(get16(p) != nn-2)
-					goto Short;
-				p += 2;
-				n -= 2;
-				nn -= 2;
-				m->u.clientHello.sigAlgs = newints(nn/2);
-				for(i = 0; i < nn; i += 2)
-					m->u.clientHello.sigAlgs->data[i >> 1] = get16(&p[i]);
-			}
-			p += nn;
-			n -= nn;
-		}
+		m->u.clientHello.extensions = makebytes(p+2, nn);
+		n -= nn + 2;
 		break;
 	case HServerHello:
 		if(n < 2)
 			goto Short;
 		m->u.serverHello.version = get16(p);
-		p += 2;
-		n -= 2;
+		p += 2, n -= 2;
 
 		if(n < RandomSize)
 			goto Short;
 		memmove(m->u.serverHello.random, p, RandomSize);
-		p += RandomSize;
-		n -= RandomSize;
+		p += RandomSize, n -= RandomSize;
 
 		if(n < 1 || n < p[0]+1)
 			goto Short;
@@ -1193,15 +1590,22 @@
 			goto Short;
 		m->u.serverHello.cipher = get16(p);
 		m->u.serverHello.compressor = p[2];
-		n -= 3;
+		p += 3, n -= 3;
+
+		if(n < 2)
+			break;
+		nn = get16(p);
+		if(nn > n-2)
+			goto Short;
+		m->u.serverHello.extensions = makebytes(p+2, nn);
+		n -= nn + 2;
 		break;
 	case HCertificate:
 		if(n < 3)
 			goto Short;
 		nn = get24(p);
-		p += 3;
-		n -= 3;
-		if(n != nn)
+		p += 3, n -= 3;
+		if(nn == 0 && n > 0)
 			goto Short;
 		/* certs */
 		i = 0;
@@ -1209,15 +1613,13 @@
 			if(n < 3)
 				goto Short;
 			nn = get24(p);
-			p += 3;
-			n -= 3;
+			p += 3, n -= 3;
 			if(nn > n)
 				goto Short;
 			m->u.certificate.ncert = i+1;
-			m->u.certificate.certs = erealloc(m->u.certificate.certs, (i+1)*sizeof(Bytes));
+			m->u.certificate.certs = erealloc(m->u.certificate.certs, (i+1)*sizeof(Bytes*));
 			m->u.certificate.certs[i] = makebytes(p, nn);
-			p += nn;
-			n -= nn;
+			p += nn, n -= nn;
 			i++;
 		}
 		break;
@@ -1225,18 +1627,28 @@
 		if(n < 1)
 			goto Short;
 		nn = p[0];
-		p += 1;
-		n -= 1;
-		if(nn < 1 || nn > n)
+		p++, n--;
+		if(nn > n)
 			goto Short;
 		m->u.certificateRequest.types = makebytes(p, nn);
-		p += nn;
-		n -= nn;
+		p += nn, n -= nn;
+		if(c->version >= TLS12Version){
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn & 1)
+				goto Short;
+			m->u.certificateRequest.sigalgs = newints(nn>>1);
+			for(i = 0; i < nn; i += 2)
+				m->u.certificateRequest.sigalgs->data[i >> 1] = get16(&p[i]);
+			p += nn, n -= nn;
+
+		}
 		if(n < 2)
 			goto Short;
 		nn = get16(p);
-		p += 2;
-		n -= 2;
+		p += 2, n -= 2;
 		/* nn == 0 can happen; yahoo's servers do it */
 		if(nn != n)
 			goto Short;
@@ -1246,35 +1658,116 @@
 			if(n < 2)
 				goto Short;
 			nn = get16(p);
-			p += 2;
-			n -= 2;
+			p += 2, n -= 2;
 			if(nn < 1 || nn > n)
 				goto Short;
 			m->u.certificateRequest.nca = i+1;
 			m->u.certificateRequest.cas = erealloc(
-				m->u.certificateRequest.cas, (i+1)*sizeof(Bytes));
+				m->u.certificateRequest.cas, (i+1)*sizeof(Bytes*));
 			m->u.certificateRequest.cas[i] = makebytes(p, nn);
-			p += nn;
-			n -= nn;
+			p += nn, n -= nn;
 			i++;
 		}
 		break;
 	case HServerHelloDone:
 		break;
+	case HServerKeyExchange:
+		if(isPSK(c->cipher)){
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn > n)
+				goto Short;
+			m->u.serverKeyExchange.pskid = makebytes(p, nn);
+			p += nn, n -= nn;
+			if(n == 0)
+				break;
+		}
+		if(n < 2)
+			goto Short;
+		s = p;
+		if(isECDHE(c->cipher)){
+			nn = *p;
+			p++, n--;
+			if(nn != 3 || nn > n) /* not a named curve */
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			m->u.serverKeyExchange.curve = nn;
+
+			nn = *p++, n--;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_Ys = makebytes(p, nn);
+			p += nn, n -= nn;
+		}else if(isDHE(c->cipher)){
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_p = makebytes(p, nn);
+			p += nn, n -= nn;
+	
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_g = makebytes(p, nn);
+			p += nn, n -= nn;
+	
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_Ys = makebytes(p, nn);
+			p += nn, n -= nn;
+		} else {
+			/* should not happen */
+			goto Short;
+		}
+		m->u.serverKeyExchange.dh_parameters = makebytes(s, p - s);
+		if(n >= 2){
+			m->u.serverKeyExchange.sigalg = 0;
+			if(c->version >= TLS12Version){
+				m->u.serverKeyExchange.sigalg = get16(p);
+				p += 2, n -= 2;
+				if(n < 2)
+					goto Short;
+			}
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn > 0 && nn <= n){
+				m->u.serverKeyExchange.dh_signature = makebytes(p, nn);
+				n -= nn;
+			}
+		}
+		break;		
 	case HClientKeyExchange:
-		/*
-		 * this message depends upon the encryption selected
-		 * assume rsa.
-		 */
-		if(c->version == SSL3Version)
-			nn = n;
-		else{
+		if(isPSK(c->cipher)){
 			if(n < 2)
 				goto Short;
 			nn = get16(p);
-			p += 2;
-			n -= 2;
+			p += 2, n -= 2;
+			if(nn > n)
+				goto Short;
+			m->u.clientKeyExchange.pskid = makebytes(p, nn);
+			p += nn, n -= nn;
+			if(n == 0)
+				break;
 		}
+		if(n < 2)
+			goto Short;
+		if(isECDHE(c->cipher))
+			nn = *p++, n--;
+		else if(isDHE(c->cipher) || c->version != SSL3Version)
+			nn = get16(p), p += 2, n -= 2;
+		else
+			nn = n;
 		if(n < nn)
 			goto Short;
 		m->u.clientKeyExchange.key = makebytes(p, nn);
@@ -1289,18 +1782,14 @@
 		break;
 	}
 
-	if(type != HClientHello && n != 0)
+	if(n != 0 && type != HClientHello && type != HServerHello)
 		goto Short;
 Ok:
-	if(c->trace){
-		char *buf;
-		buf = emalloc(8000);
-		c->trace("recv %s", msgPrint(buf, 8000, m));
-		free(buf);
-	}
+	if(c->trace)
+		c->trace("recv %s", msgPrint((char*)c->sendp, &c->buf[sizeof(c->buf)] - c->sendp, m));
 	return 1;
 Short:
-	tlsError(c, EDecodeError, "handshake message has invalid length");
+	tlsError(c, EDecodeError, "handshake message (%d) has invalid length", type);
 Err:
 	msgClear(m);
 	return 0;
@@ -1312,19 +1801,17 @@
 	int i;
 
 	switch(m->tag) {
-	default:
-		sysfatal("msgClear: unknown message type: %d", m->tag);
 	case HHelloRequest:
 		break;
 	case HClientHello:
 		freebytes(m->u.clientHello.sid);
 		freeints(m->u.clientHello.ciphers);
-		m->u.clientHello.ciphers = nil;
 		freebytes(m->u.clientHello.compressors);
-		freeints(m->u.clientHello.sigAlgs);
+		freebytes(m->u.clientHello.extensions);
 		break;
 	case HServerHello:
-		freebytes(m->u.clientHello.sid);
+		freebytes(m->u.serverHello.sid);
+		freebytes(m->u.serverHello.extensions);
 		break;
 	case HCertificate:
 		for(i=0; i<m->u.certificate.ncert; i++)
@@ -1333,13 +1820,26 @@
 		break;
 	case HCertificateRequest:
 		freebytes(m->u.certificateRequest.types);
+		freeints(m->u.certificateRequest.sigalgs);
 		for(i=0; i<m->u.certificateRequest.nca; i++)
 			freebytes(m->u.certificateRequest.cas[i]);
 		free(m->u.certificateRequest.cas);
 		break;
+	case HCertificateVerify:
+		freebytes(m->u.certificateVerify.signature);
+		break;
 	case HServerHelloDone:
 		break;
+	case HServerKeyExchange:
+		freebytes(m->u.serverKeyExchange.pskid);
+		freebytes(m->u.serverKeyExchange.dh_p);
+		freebytes(m->u.serverKeyExchange.dh_g);
+		freebytes(m->u.serverKeyExchange.dh_Ys);
+		freebytes(m->u.serverKeyExchange.dh_parameters);
+		freebytes(m->u.serverKeyExchange.dh_signature);
+		break;
 	case HClientKeyExchange:
+		freebytes(m->u.clientKeyExchange.pskid);
 		freebytes(m->u.clientKeyExchange.key);
 		break;
 	case HFinished:
@@ -1355,13 +1855,14 @@
 
 	if(s0)
 		bs = seprint(bs, be, "%s", s0);
-	bs = seprint(bs, be, "[");
 	if(b == nil)
 		bs = seprint(bs, be, "nil");
-	else
+	else {
+		bs = seprint(bs, be, "<%d> [ ", b->len);
 		for(i=0; i<b->len; i++)
 			bs = seprint(bs, be, "%.2x ", b->data[i]);
-	bs = seprint(bs, be, "]");
+		bs = seprint(bs, be, "]");
+	}
 	if(s1)
 		bs = seprint(bs, be, "%s", s1);
 	return bs;
@@ -1374,13 +1875,14 @@
 
 	if(s0)
 		bs = seprint(bs, be, "%s", s0);
-	bs = seprint(bs, be, "[");
 	if(b == nil)
 		bs = seprint(bs, be, "nil");
-	else
+	else {
+		bs = seprint(bs, be, "[ ");
 		for(i=0; i<b->len; i++)
 			bs = seprint(bs, be, "%x ", b->data[i]);
-	bs = seprint(bs, be, "]");
+		bs = seprint(bs, be, "]");
+	}
 	if(s1)
 		bs = seprint(bs, be, "%s", s1);
 	return bs;
@@ -1406,8 +1908,8 @@
 		bs = bytesPrint(bs, be, "\tsid: ", m->u.clientHello.sid, "\n");
 		bs = intsPrint(bs, be, "\tciphers: ", m->u.clientHello.ciphers, "\n");
 		bs = bytesPrint(bs, be, "\tcompressors: ", m->u.clientHello.compressors, "\n");
-		if(m->u.clientHello.sigAlgs != nil)
-			bs = intsPrint(bs, be, "\tsigAlgs: ", m->u.clientHello.sigAlgs, "\n");
+		if(m->u.clientHello.extensions != nil)
+			bs = bytesPrint(bs, be, "\textensions: ", m->u.clientHello.extensions, "\n");
 		break;
 	case HServerHello:
 		bs = seprint(bs, be, "ServerHello\n");
@@ -1419,6 +1921,8 @@
 		bs = bytesPrint(bs, be, "\tsid: ", m->u.serverHello.sid, "\n");
 		bs = seprint(bs, be, "\tcipher: %.4x\n", m->u.serverHello.cipher);
 		bs = seprint(bs, be, "\tcompressor: %.2x\n", m->u.serverHello.compressor);
+		if(m->u.serverHello.extensions != nil)
+			bs = bytesPrint(bs, be, "\textensions: ", m->u.serverHello.extensions, "\n");
 		break;
 	case HCertificate:
 		bs = seprint(bs, be, "Certificate\n");
@@ -1428,16 +1932,45 @@
 	case HCertificateRequest:
 		bs = seprint(bs, be, "CertificateRequest\n");
 		bs = bytesPrint(bs, be, "\ttypes: ", m->u.certificateRequest.types, "\n");
+		if(m->u.certificateRequest.sigalgs != nil)
+			bs = intsPrint(bs, be, "\tsigalgs: ", m->u.certificateRequest.sigalgs, "\n");
 		bs = seprint(bs, be, "\tcertificateauthorities\n");
 		for(i=0; i<m->u.certificateRequest.nca; i++)
 			bs = bytesPrint(bs, be, "\t\t", m->u.certificateRequest.cas[i], "\n");
 		break;
+	case HCertificateVerify:
+		bs = seprint(bs, be, "HCertificateVerify\n");
+		if(m->u.certificateVerify.sigalg != 0)
+			bs = seprint(bs, be, "\tsigalg: %.4x\n", m->u.certificateVerify.sigalg);
+		bs = bytesPrint(bs, be, "\tsignature: ", m->u.certificateVerify.signature,"\n");
+		break;	
 	case HServerHelloDone:
 		bs = seprint(bs, be, "ServerHelloDone\n");
 		break;
+	case HServerKeyExchange:
+		bs = seprint(bs, be, "HServerKeyExchange\n");
+		if(m->u.serverKeyExchange.pskid != nil)
+			bs = bytesPrint(bs, be, "\tpskid: ", m->u.serverKeyExchange.pskid, "\n");
+		if(m->u.serverKeyExchange.dh_parameters == nil)
+			break;
+		if(m->u.serverKeyExchange.curve != 0){
+			bs = seprint(bs, be, "\tcurve: %.4x\n", m->u.serverKeyExchange.curve);
+		} else {
+			bs = bytesPrint(bs, be, "\tdh_p: ", m->u.serverKeyExchange.dh_p, "\n");
+			bs = bytesPrint(bs, be, "\tdh_g: ", m->u.serverKeyExchange.dh_g, "\n");
+		}
+		bs = bytesPrint(bs, be, "\tdh_Ys: ", m->u.serverKeyExchange.dh_Ys, "\n");
+		if(m->u.serverKeyExchange.sigalg != 0)
+			bs = seprint(bs, be, "\tsigalg: %.4x\n", m->u.serverKeyExchange.sigalg);
+		bs = bytesPrint(bs, be, "\tdh_parameters: ", m->u.serverKeyExchange.dh_parameters, "\n");
+		bs = bytesPrint(bs, be, "\tdh_signature: ", m->u.serverKeyExchange.dh_signature, "\n");
+		break;
 	case HClientKeyExchange:
 		bs = seprint(bs, be, "HClientKeyExchange\n");
-		bs = bytesPrint(bs, be, "\tkey: ", m->u.clientKeyExchange.key, "\n");
+		if(m->u.clientKeyExchange.pskid != nil)
+			bs = bytesPrint(bs, be, "\tpskid: ", m->u.clientKeyExchange.pskid, "\n");
+		if(m->u.clientKeyExchange.key != nil)
+			bs = bytesPrint(bs, be, "\tkey: ", m->u.clientKeyExchange.key, "\n");
 		break;
 	case HFinished:
 		bs = seprint(bs, be, "HFinished\n");
@@ -1461,10 +1994,10 @@
 	va_end(arg);
 	if(c->trace)
 		c->trace("tlsError: %s\n", msg);
-	else if(c->erred)
+	if(c->erred)
 		fprint(2, "double error: %r, %s", msg);
 	else
-		werrstr("tls: local %s", msg);
+		errstr(msg, sizeof(msg));
 	c->erred = 1;
 	fprint(c->ctl, "alert %d", err);
 }
@@ -1473,24 +2006,17 @@
 static int
 setVersion(TlsConnection *c, int version)
 {
-	if(c->verset || version > MaxProtoVersion || version < MinProtoVersion)
+	if(version > MaxProtoVersion || version < MinProtoVersion)
 		return -1;
 	if(version > c->version)
 		version = c->version;
-	switch(version) {
-	case SSL3Version:
+	if(version == SSL3Version) {
+		c->version = version;
 		c->finished.n = SSL3FinishedLen;
-		return -1;
-	case TLS10Version:
-	case TLS11Version:
-	case TLS12Version:
+	}else {
+		c->version = version;
 		c->finished.n = TLSFinishedLen;
-		break;
-	default:
-		return -1;
 	}
-	c->version = version;
-	c->verset = 1;
 	return fprint(c->ctl, "version 0x%x", version);
 }
 
@@ -1498,7 +2024,7 @@
 static int
 finishedMatch(TlsConnection *c, Finished *f)
 {
-	return memcmp(f->verify, c->finished.verify, f->n) == 0;
+	return tsmemcmp(f->verify, c->finished.verify, f->n) == 0;
 }
 
 // free memory associated with TlsConnection struct
@@ -1506,47 +2032,89 @@
 static void
 tlsConnectionFree(TlsConnection *c)
 {
-	tlsSecClose(c->sec);
-	freebytes(c->sid);
+	if(c == nil)
+		return;
+
+	dh_finish(&c->sec->dh, nil);
+
+	mpfree(c->sec->ec.Q.x);
+	mpfree(c->sec->ec.Q.y);
+	mpfree(c->sec->ec.Q.d);
+	ecdomfree(&c->sec->ec.dom);
+
+	factotum_rsa_close(c->sec->rpc);
+	rsapubfree(c->sec->rsapub);
 	freebytes(c->cert);
-	memset(c, 0, sizeof *c);
+
+	memset(c, 0, sizeof(*c));
 	free(c);
 }
 
 
 //================= cipher choices ========================
 
-static int weakCipher[CipherMax] =
+static int
+isDHE(int tlsid)
+{
+	switch(tlsid){
+	case TLS_DHE_RSA_WITH_AES_128_GCM_SHA256:
+	case TLS_DHE_RSA_WITH_AES_128_CBC_SHA256:
+ 	case TLS_DHE_RSA_WITH_AES_128_CBC_SHA:
+ 	case TLS_DHE_RSA_WITH_AES_256_CBC_SHA:
+ 	case TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA:
+	case TLS_DHE_RSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isECDHE(int tlsid)
 {
-	1,	/* TLS_NULL_WITH_NULL_NULL */
-	1,	/* TLS_RSA_WITH_NULL_MD5 */
-	1,	/* TLS_RSA_WITH_NULL_SHA */
-	1,	/* TLS_RSA_EXPORT_WITH_RC4_40_MD5 */
-	1,	/* TLS_RSA_WITH_RC4_128_MD5 */
-	1,	/* TLS_RSA_WITH_RC4_128_SHA */
-	1,	/* TLS_RSA_EXPORT_WITH_RC2_CBC_40_MD5 */
-	0,	/* TLS_RSA_WITH_IDEA_CBC_SHA */
-	1,	/* TLS_RSA_EXPORT_WITH_DES40_CBC_SHA */
-	0,	/* TLS_RSA_WITH_DES_CBC_SHA */
-	0,	/* TLS_RSA_WITH_3DES_EDE_CBC_SHA */
-	1,	/* TLS_DH_DSS_EXPORT_WITH_DES40_CBC_SHA */
-	0,	/* TLS_DH_DSS_WITH_DES_CBC_SHA */
-	0,	/* TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA */
-	1,	/* TLS_DH_RSA_EXPORT_WITH_DES40_CBC_SHA */
-	0,	/* TLS_DH_RSA_WITH_DES_CBC_SHA */
-	0,	/* TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA */
-	1,	/* TLS_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA */
-	0,	/* TLS_DHE_DSS_WITH_DES_CBC_SHA */
-	0,	/* TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA */
-	1,	/* TLS_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA */
-	0,	/* TLS_DHE_RSA_WITH_DES_CBC_SHA */
-	0,	/* TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA */
-	1,	/* TLS_DH_anon_EXPORT_WITH_RC4_40_MD5 */
-	1,	/* TLS_DH_anon_WITH_RC4_128_MD5 */
-	1,	/* TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA */
-	1,	/* TLS_DH_anon_WITH_DES_CBC_SHA */
-	1,	/* TLS_DH_anon_WITH_3DES_EDE_CBC_SHA */
-};
+	switch(tlsid){
+	case TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305:
+
+	case GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305:
+
+	case TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256:
+
+	case TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA:
+	case TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isPSK(int tlsid)
+{
+	switch(tlsid){
+	case TLS_PSK_WITH_CHACHA20_POLY1305:
+	case TLS_PSK_WITH_AES_128_CBC_SHA256:
+	case TLS_PSK_WITH_AES_128_CBC_SHA:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isECDSA(int tlsid)
+{
+	switch(tlsid){
+	case TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
+	case TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256:
+		return 1;
+	}
+	return 0;
+}
 
 static int
 setAlgs(TlsConnection *c, int a)
@@ -1555,6 +2123,7 @@
 
 	for(i = 0; i < nelem(cipherAlgs); i++){
 		if(cipherAlgs[i].tlsid == a){
+			c->cipher = a;
 			c->enc = cipherAlgs[i].enc;
 			c->digest = cipherAlgs[i].digest;
 			c->nsecret = cipherAlgs[i].nsecret;
@@ -1567,37 +2136,29 @@
 }
 
 static int
-okCipher(Ints *cv)
+okCipher(Ints *cv, int ispsk)
 {
-	int weak, i, j, c;
+	int i, c;
 
-	weak = 1;
-	for(i = 0; i < cv->len; i++) {
-		c = cv->data[i];
-		if(c >= CipherMax)
-			weak = 0;
-		else
-			weak &= weakCipher[c];
-		for(j = 0; j < nelem(cipherAlgs); j++)
-			if(cipherAlgs[j].ok && cipherAlgs[j].tlsid == c)
-				return c;
+	for(i = 0; i < nelem(cipherAlgs); i++) {
+		c = cipherAlgs[i].tlsid;
+		if(!cipherAlgs[i].ok || isECDSA(c) || isDHE(c) || isPSK(c) != ispsk)
+			continue;
+		if(lookupid(cv, c) >= 0)
+			return c;
 	}
-	if(weak)
-		return -2;
 	return -1;
 }
 
 static int
 okCompression(Bytes *cv)
 {
-	int i, j, c;
+	int i, c;
 
-	for(i = 0; i < cv->len; i++) {
-		c = cv->data[i];
-		for(j = 0; j < nelem(compressors); j++) {
-			if(compressors[j] == c)
-				return c;
-		}
+	for(i = 0; i < nelem(compressors); i++) {
+		c = compressors[i];
+		if(memchr(cv->data, c, cv->len) != nil)
+			return c;
 	}
 	return -1;
 }
@@ -1617,18 +2178,16 @@
 		unlock(&ciphLock);
 		return nciphers;
 	}
-	j = open("#a/tls/encalgs", OREAD);
+	j = open("#a/tls/encalgs", OREAD|OCEXEC);
 	if(j < 0){
 		werrstr("can't open #a/tls/encalgs: %r");
-		unlock(&ciphLock);
-		return 0;
+		goto out;
 	}
 	n = read(j, s, MaxAlgF-1);
 	close(j);
 	if(n <= 0){
 		werrstr("nothing in #a/tls/encalgs: %r");
-		unlock(&ciphLock);
-		return 0;
+		goto out;
 	}
 	s[n] = 0;
 	n = getfields(s, flds, MaxAlgs, 1, " \t\r\n");
@@ -1643,18 +2202,16 @@
 		cipherAlgs[i].ok = ok;
 	}
 
-	j = open("#a/tls/hashalgs", OREAD);
+	j = open("#a/tls/hashalgs", OREAD|OCEXEC);
 	if(j < 0){
 		werrstr("can't open #a/tls/hashalgs: %r");
-		unlock(&ciphLock);
-		return 0;
+		goto out;
 	}
 	n = read(j, s, MaxAlgF-1);
 	close(j);
 	if(n <= 0){
 		werrstr("nothing in #a/tls/hashalgs: %r");
-		unlock(&ciphLock);
-		return 0;
+		goto out;
 	}
 	s[n] = 0;
 	n = getfields(s, flds, MaxAlgs, 1, " \t\r\n");
@@ -1670,70 +2227,61 @@
 		if(cipherAlgs[i].ok)
 			nciphers++;
 	}
+out:
 	unlock(&ciphLock);
 	return nciphers;
 }
 
 static Ints*
-makeciphers(void)
+makeciphers(int ispsk)
 {
 	Ints *is;
 	int i, j;
 
 	is = newints(nciphers);
 	j = 0;
-	for(i = 0; i < nelem(cipherAlgs); i++){
-		if(cipherAlgs[i].ok)
+	for(i = 0; i < nelem(cipherAlgs); i++)
+		if(cipherAlgs[i].ok && isPSK(cipherAlgs[i].tlsid) == ispsk)
 			is->data[j++] = cipherAlgs[i].tlsid;
-	}
+	is->len = j;
 	return is;
 }
 
 
-
 //================= security functions ========================
 
-// given X.509 certificate, set up connection to factotum
-//	for using corresponding private key
+// given a public key, set up connection to factotum
+// for using corresponding private key
 static AuthRpc*
-factotum_rsa_open(uchar *cert, int certlen)
+factotum_rsa_open(RSApub *rsapub)
 {
 	int afd;
 	char *s;
-	mpint *pub = nil;
-	RSApub *rsapub;
+	mpint *n;
 	AuthRpc *rpc;
 
 	// start talking to factotum
-	if((afd = open("/mnt/factotum/rpc", ORDWR)) < 0)
+	if((afd = open("/mnt/factotum/rpc", ORDWR|OCEXEC)) < 0)
 		return nil;
 	if((rpc = auth_allocrpc(afd)) == nil){
 		close(afd);
 		return nil;
 	}
 	s = "proto=rsa service=tls role=client";
-	if(auth_rpc(rpc, "start", s, strlen(s)) != ARok){
-		factotum_rsa_close(rpc);
-		return nil;
-	}
-
-	// roll factotum keyring around to match certificate
-	rsapub = X509toRSApub(cert, certlen, nil, 0);
-	while(1){
-		if(auth_rpc(rpc, "read", nil, 0) != ARok){
-			factotum_rsa_close(rpc);
-			rpc = nil;
-			goto done;
-		}
-		pub = strtomp(rpc->arg, nil, 16, nil);
-		assert(pub != nil);
-		if(mpcmp(pub,rsapub->n) == 0)
-			break;
+	if(auth_rpc(rpc, "start", s, strlen(s)) == ARok){
+		// roll factotum keyring around to match public key
+		n = mpnew(0);
+		while(auth_rpc(rpc, "read", nil, 0) == ARok){
+			if(strtomp(rpc->arg, nil, 16, n) != nil
+			&& mpcmp(n, rsapub->n) == 0){
+				mpfree(n);
+				return rpc;
+			}
+		}
+		mpfree(n);
 	}
-done:
-	mpfree(pub);
-	rsapubfree(rsapub);
-	return rpc;
+	factotum_rsa_close(rpc);
+	return nil;
 }
 
 static mpint*
@@ -1742,20 +2290,23 @@
 	char *p;
 	int rv;
 
-	if((p = mptoa(cipher, 16, nil, 0)) == nil)
+	if(cipher == nil)
+		return nil;
+	p = mptoa(cipher, 16, nil, 0);
+	mpfree(cipher);
+	if(p == nil)
 		return nil;
 	rv = auth_rpc(rpc, "write", p, strlen(p));
 	free(p);
 	if(rv != ARok || auth_rpc(rpc, "read", nil, 0) != ARok)
 		return nil;
-	mpfree(cipher);
 	return strtomp(rpc->arg, nil, 16, nil);
 }
 
 static void
-factotum_rsa_close(AuthRpc*rpc)
+factotum_rsa_close(AuthRpc *rpc)
 {
-	if(!rpc)
+	if(rpc == nil)
 		return;
 	close(rpc->afd);
 	auth_freerpc(rpc);
@@ -1820,11 +2371,11 @@
 }
 
 static void
-tlsPsha2_256(uchar *buf, int nbuf, uchar *key, int nkey, uchar *label, int nlabel, uchar *seed, int nseed)
+p_sha256(uchar *buf, int nbuf, uchar *key, int nkey, uchar *label, int nlabel, uchar *seed, int nseed)
 {
 	uchar ai[SHA2_256dlen], tmp[SHA2_256dlen];
-	int n;
 	SHAstate *s;
+	int n;
 
 	// generate a1
 	s = hmac_sha2_256(label, nlabel, key, nkey, nil, nil);
@@ -1847,517 +2398,473 @@
 
 // fill buf with md5(args)^sha1(args)
 static void
-tlsPRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed0, int nseed0, uchar *seed1, int nseed1)
+tls10PRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed0, int nseed0, uchar *seed1, int nseed1)
 {
-	int i;
 	int nlabel = strlen(label);
 	int n = (nkey + 1) >> 1;
 
-	for(i = 0; i < nbuf; i++)
-		buf[i] = 0;
+	memset(buf, 0, nbuf);
 	tlsPmd5(buf, nbuf, key, n, (uchar*)label, nlabel, seed0, nseed0, seed1, nseed1);
 	tlsPsha1(buf, nbuf, key+nkey-n, n, (uchar*)label, nlabel, seed0, nseed0, seed1, nseed1);
 }
 
-void
+static void
 tls12PRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed0, int nseed0, uchar *seed1, int nseed1)
 {
 	uchar seed[2*RandomSize];
-	int nlabel = strlen(label);
 
+	assert(nseed0+nseed1 <= sizeof(seed));
 	memmove(seed, seed0, nseed0);
 	memmove(seed+nseed0, seed1, nseed1);
-	tlsPsha2_256(buf, nbuf, key, nkey, (uchar*)label, nlabel, seed, nseed0+nseed1);
+	p_sha256(buf, nbuf, key, nkey, (uchar*)label, strlen(label), seed, nseed0+nseed1);
 }
 
-/*
- * for setting server session id's
- */
-static Lock	sidLock;
-static long	maxSid = 1;
-
-/* the keys are verified to have the same public components
- * and to function correctly with pkcs 1 encryption and decryption. */
-static TlsSec*
-tlsSecInits(int cvers, uchar *csid, int ncsid, uchar *crandom, uchar *ssid, int *nssid, uchar *srandom)
+static void
+sslPRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed0, int nseed0, uchar *seed1, int nseed1)
 {
-	TlsSec *sec = emalloc(sizeof(*sec));
-
-	USED(csid); USED(ncsid);  // ignore csid for now
-
-	memmove(sec->crandom, crandom, RandomSize);
-	sec->clientVers = cvers;
-
-	put32(sec->srandom, time(0));
-	genrandom(sec->srandom+4, RandomSize-4);
-	memmove(srandom, sec->srandom, RandomSize);
-
-	/*
-	 * make up a unique sid: use our pid, and and incrementing id
-	 * can signal no sid by setting nssid to 0.
-	 */
-	memset(ssid, 0, SidSize);
-	put32(ssid, getpid());
-	lock(&sidLock);
-	put32(ssid+4, maxSid++);
-	unlock(&sidLock);
-	*nssid = SidSize;
-	return sec;
-}
+	uchar sha1dig[SHA1dlen], md5dig[MD5dlen], tmp[26];
+	DigestState *s;
+	int i, n, len;
 
-static int
-tlsSecSecrets(TlsSec *sec, int vers, uchar *epm, int nepm, uchar *kd, int nkd)
-{
-	if(epm != nil){
-		if(setVers(sec, vers) < 0)
-			goto Err;
-		serverMasterSecret(sec, epm, nepm);
-	}else if(sec->vers != vers){
-		werrstr("mismatched session versions");
-		goto Err;
+	USED(label);
+	len = 1;
+	while(nbuf > 0){
+		if(len > 26)
+			return;
+		for(i = 0; i < len; i++)
+			tmp[i] = 'A' - 1 + len;
+		s = sha1(tmp, len, nil, nil);
+		s = sha1(key, nkey, nil, s);
+		s = sha1(seed0, nseed0, nil, s);
+		sha1(seed1, nseed1, sha1dig, s);
+		s = md5(key, nkey, nil, nil);
+		md5(sha1dig, SHA1dlen, md5dig, s);
+		n = MD5dlen;
+		if(n > nbuf)
+			n = nbuf;
+		memmove(buf, md5dig, n);
+		buf += n;
+		nbuf -= n;
+		len++;
 	}
-	setSecrets(sec, kd, nkd);
-	return 0;
-Err:
-	sec->ok = -1;
-	return -1;
-}
-
-static TlsSec*
-tlsSecInitc(int cvers, uchar *crandom)
-{
-	TlsSec *sec = emalloc(sizeof(*sec));
-	sec->clientVers = cvers;
-	put32(sec->crandom, time(0));
-	genrandom(sec->crandom+4, RandomSize-4);
-	memmove(crandom, sec->crandom, RandomSize);
-	return sec;
 }
 
-static int
-tlsSecSecretc(TlsSec *sec, uchar *sid, int nsid, uchar *srandom, uchar *cert, int ncert, int vers, uchar **epm, int *nepm, uchar *kd, int nkd)
+static void
+sslSetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
 {
-	RSApub *pub;
-
-	pub = nil;
-
-	USED(sid);
-	USED(nsid);
-	
-	memmove(sec->srandom, srandom, RandomSize);
-
-	if(setVers(sec, vers) < 0)
-		goto Err;
+	DigestState *s;
+	uchar h0[MD5dlen], h1[SHA1dlen], pad[48];
+	char *label;
 
-	pub = X509toRSApub(cert, ncert, nil, 0);
-	if(pub == nil){
-		werrstr("invalid x509/rsa certificate");
-		goto Err;
-	}
-	if(clientMasterSecret(sec, pub, epm, nepm) < 0)
-		goto Err;
-	rsapubfree(pub);
-	setSecrets(sec, kd, nkd);
-	return 0;
+	if(isclient)
+		label = "CLNT";
+	else
+		label = "SRVR";
 
-Err:
-	if(pub != nil)
-		rsapubfree(pub);
-	sec->ok = -1;
-	return -1;
-}
+	md5((uchar*)label, 4, nil, &hsh.md5);
+	md5(sec->sec, MasterSecretSize, nil, &hsh.md5);
+	memset(pad, 0x36, 48);
+	md5(pad, 48, nil, &hsh.md5);
+	md5(nil, 0, h0, &hsh.md5);
+	memset(pad, 0x5C, 48);
+	s = md5(sec->sec, MasterSecretSize, nil, nil);
+	s = md5(pad, 48, nil, s);
+	md5(h0, MD5dlen, finished, s);
 
-static int
-tlsSecFinished(TlsSec *sec, HandHash hs, uchar *fin, int nfin, int isclient)
-{
-	if(sec->nfin != nfin){
-		sec->ok = -1;
-		werrstr("invalid finished exchange");
-		return -1;
-	}
-	hs.md5.malloced = 0;
-	hs.sha1.malloced = 0;
-	if(sec->setFinished == nil ){
-		sec->ok = -1;
-		werrstr("nil sec->setFinished in tlsSecFinished");
-		return -1;
-	}
-	(*sec->setFinished)(sec, hs, fin, isclient);
-	return 1;
+	sha1((uchar*)label, 4, nil, &hsh.sha1);
+	sha1(sec->sec, MasterSecretSize, nil, &hsh.sha1);
+	memset(pad, 0x36, 40);
+	sha1(pad, 40, nil, &hsh.sha1);
+	sha1(nil, 0, h1, &hsh.sha1);
+	memset(pad, 0x5C, 40);
+	s = sha1(sec->sec, MasterSecretSize, nil, nil);
+	s = sha1(pad, 40, nil, s);
+	sha1(h1, SHA1dlen, finished + MD5dlen, s);
 }
 
+// fill "finished" arg with md5(args)^sha1(args)
 static void
-tlsSecOk(TlsSec *sec)
+tls10SetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
 {
-	if(sec->ok == 0)
-		sec->ok = 1;
-}
+	uchar h0[MD5dlen], h1[SHA1dlen];
+	char *label;
 
-static void
-tlsSecKill(TlsSec *sec)
-{
-	if(!sec)
-		return;
-	factotum_rsa_close(sec->rpc);
-	sec->ok = -1;
+	// get current hash value, but allow further messages to be hashed in
+	md5(nil, 0, h0, &hsh.md5);
+	sha1(nil, 0, h1, &hsh.sha1);
+
+	if(isclient)
+		label = "client finished";
+	else
+		label = "server finished";
+	tls10PRF(finished, TLSFinishedLen, sec->sec, MasterSecretSize, label, h0, MD5dlen, h1, SHA1dlen);
 }
 
 static void
-tlsSecClose(TlsSec *sec)
+tls12SetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
 {
-	if(!sec)
-		return;
-	factotum_rsa_close(sec->rpc);
-	free(sec->server);
-	free(sec);
-}
+	uchar seed[SHA2_256dlen];
+	char *label;
 
-static int
-setVers(TlsSec *sec, int v)
-{
-	switch(v){
-	case SSL3Version:
-		sec->setFinished = sslSetFinished;
-		sec->nfin = SSL3FinishedLen;
-		sec->prf = sslPRF;
-		break;
-	case TLS10Version:
-	case TLS11Version:
-		sec->setFinished = tlsSetFinished;
-		sec->nfin = TLSFinishedLen;
-		sec->prf = tlsPRF;
-		break;
-	case TLS12Version:
-		sec->setFinished = tls12SetFinished;
-		sec->nfin = TLSFinishedLen;
-		sec->prf = tls12PRF;
-		break;
-	default:
-		werrstr("invalid version");
-		sec->setFinished = nil;
-		sec->prf = nil;
-		return -1;
-	}
-	sec->vers = v;
-	return 0;
-}
+	// get current hash value, but allow further messages to be hashed in
+	sha2_256(nil, 0, seed, &hsh.sha2_256);
 
-/*
- * generate secret keys from the master secret.
- *
- * different crypto selections will require different amounts
- * of key expansion and use of key expansion data,
- * but it's all generated using the same function.
- */
-static void
-setSecrets(TlsSec *sec, uchar *kd, int nkd)
-{
-	if (sec->prf == nil) {
-		werrstr("nil sec->prf in setSecrets");
-		return;
-	}
-	(*sec->prf)(kd, nkd, sec->sec, MasterSecretSize, "key expansion",
-			sec->srandom, RandomSize, sec->crandom, RandomSize);
+	if(isclient)
+		label = "client finished";
+	else
+		label = "server finished";
+	p_sha256(finished, TLSFinishedLen, sec->sec, MasterSecretSize, (uchar*)label, strlen(label), seed, SHA2_256dlen);
 }
 
-/*
- * set the master secret from the pre-master secret.
- */
 static void
-setMasterSecret(TlsSec *sec, Bytes *pm)
+tlsSecInits(TlsSec *sec, int cvers, uchar *crandom)
 {
-	(*sec->prf)(sec->sec, MasterSecretSize, pm->data, MasterSecretSize, "master secret",
-			sec->crandom, RandomSize, sec->srandom, RandomSize);
+	memset(sec, 0, sizeof(*sec));
+	sec->clientVers = cvers;
+	memmove(sec->crandom, crandom, RandomSize);
+
+	put32(sec->srandom, time(nil));
+	genrandom(sec->srandom+4, RandomSize-4);
 }
 
-static void
-serverMasterSecret(TlsSec *sec, uchar *epm, int nepm)
+static int
+tlsSecRSAs(TlsSec *sec, Bytes *epm)
 {
 	Bytes *pm;
 
-	pm = pkcs1_decrypt(sec, epm, nepm);
-
+	if(epm == nil){
+		werrstr("no encrypted premaster secret");
+		return -1;
+	}
 	// if the client messed up, just continue as if everything is ok,
 	// to prevent attacks to check for correctly formatted messages.
-	// Hence the fprint(2,) can't be replaced by tlsError(), which sends an Alert msg to the client.
-	if(sec->ok < 0 || pm == nil || get16(pm->data) != sec->clientVers){
-		fprint(2, "serverMasterSecret failed ok=%d pm=%p pmvers=%x cvers=%x nepm=%d\n",
-			sec->ok, pm, pm ? get16(pm->data) : -1, sec->clientVers, nepm);
-		sec->ok = -1;
-		if(pm != nil)
-			freebytes(pm);
+	pm = pkcs1_decrypt(sec, epm);
+	if(pm == nil || pm->len != MasterSecretSize || get16(pm->data) != sec->clientVers){
+		freebytes(pm);
 		pm = newbytes(MasterSecretSize);
-		genrandom(pm->data, MasterSecretSize);
+		genrandom(pm->data, pm->len);
 	}
 	setMasterSecret(sec, pm);
-	memset(pm->data, 0, pm->len);	
-	freebytes(pm);
+	return 0;
 }
 
-static int
-clientMasterSecret(TlsSec *sec, RSApub *pub, uchar **epm, int *nepm)
+static Bytes*
+tlsSecECDHEs1(TlsSec *sec, Namedcurve *nc)
 {
-	Bytes *pm, *key;
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	Bytes *par;
+	int n;
 
-	pm = newbytes(MasterSecretSize);
-	put16(pm->data, sec->clientVers);
-	genrandom(pm->data+2, MasterSecretSize - 2);
+	ecdominit(dom, nc->init);
+	memset(Q, 0, sizeof(*Q));
+	Q->x = mpnew(0);
+	Q->y = mpnew(0);
+	Q->d = mpnew(0);
+	ecgen(dom, Q);
+	n = 1 + 2*((mpsignif(dom->p)+7)/8);
+	par = newbytes(1+2+1+n);
+	par->data[0] = 3;
+	put16(par->data+1, nc->tlsid);
+	n = ecencodepub(dom, Q, par->data+4, par->len-4);
+	par->data[3] = n;
+	par->len = 1+2+1+n;
 
-	setMasterSecret(sec, pm);
+	return par;
+}
 
-	key = pkcs1_encrypt(pm, pub, 2);
-	memset(pm->data, 0, pm->len);
-	freebytes(pm);
-	if(key == nil){
-		werrstr("tls pkcs1_encrypt failed");
+static int
+tlsSecECDHEs2(TlsSec *sec, Bytes *Yc)
+{
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	ECpoint K;
+	ECpub *Y;
+
+	if(Yc == nil){
+		werrstr("no public key");
 		return -1;
 	}
 
-	*nepm = key->len;
-	*epm = malloc(*nepm);
-	if(*epm == nil){
-		freebytes(key);
-		werrstr("out of memory");
+	if((Y = ecdecodepub(dom, Yc->data, Yc->len)) == nil){
+		werrstr("bad public key");
 		return -1;
 	}
-	memmove(*epm, key->data, *nepm);
 
-	freebytes(key);
+	memset(&K, 0, sizeof(K));
+	K.x = mpnew(0);
+	K.y = mpnew(0);
 
-	return 1;
-}
+	ecmul(dom, Y, Q->d, &K);
 
-static void
-sslSetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient)
-{
-	DigestState *s;
-	uchar h0[MD5dlen], h1[SHA1dlen], pad[48];
-	char *label;
+	setMasterSecret(sec, mptobytes(K.x, (mpsignif(dom->p)+7)/8));
 
-	if(isClient)
-		label = "CLNT";
-	else
-		label = "SRVR";
+	mpfree(K.x);
+	mpfree(K.y);
 
-	md5((uchar*)label, 4, nil, &hs.md5);
-	md5(sec->sec, MasterSecretSize, nil, &hs.md5);
-	memset(pad, 0x36, 48);
-	md5(pad, 48, nil, &hs.md5);
-	md5(nil, 0, h0, &hs.md5);
-	memset(pad, 0x5C, 48);
-	s = md5(sec->sec, MasterSecretSize, nil, nil);
-	s = md5(pad, 48, nil, s);
-	md5(h0, MD5dlen, finished, s);
+	ecpubfree(Y);
 
-	sha1((uchar*)label, 4, nil, &hs.sha1);
-	sha1(sec->sec, MasterSecretSize, nil, &hs.sha1);
-	memset(pad, 0x36, 40);
-	sha1(pad, 40, nil, &hs.sha1);
-	sha1(nil, 0, h1, &hs.sha1);
-	memset(pad, 0x5C, 40);
-	s = sha1(sec->sec, MasterSecretSize, nil, nil);
-	s = sha1(pad, 40, nil, s);
-	sha1(h1, SHA1dlen, finished + MD5dlen, s);
+	return 0;
 }
 
-// fill "finished" arg with md5(args)^sha1(args)
 static void
-tlsSetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient)
+tlsSecInitc(TlsSec *sec, int cvers)
 {
-	uchar h0[MD5dlen], h1[SHA1dlen];
-	char *label;
+	memset(sec, 0, sizeof(*sec));
+	sec->clientVers = cvers;
+	put32(sec->crandom, time(nil));
+	genrandom(sec->crandom+4, RandomSize-4);
+}
 
-	// get current hash value, but allow further messages to be hashed in
-	md5(nil, 0, h0, &hs.md5);
-	sha1(nil, 0, h1, &hs.sha1);
+static Bytes*
+tlsSecRSAc(TlsSec *sec, uchar *cert, int ncert)
+{
+	RSApub *pub;
+	Bytes *pm, *epm;
 
-	if(isClient)
-		label = "client finished";
-	else
-		label = "server finished";
-	if (sec->prf == nil) {
-		werrstr("nil sec->prf in tlsSetFinished");
-		return;
+	pub = X509toRSApub(cert, ncert, nil, 0);
+	if(pub == nil){
+		werrstr("invalid x509/rsa certificate");
+		return nil;
 	}
-	(*sec->prf)(finished, TLSFinishedLen, sec->sec, MasterSecretSize, label, h0, MD5dlen, h1, SHA1dlen);
+	pm = newbytes(MasterSecretSize);
+	put16(pm->data, sec->clientVers);
+	genrandom(pm->data+2, MasterSecretSize - 2);
+	epm = pkcs1_encrypt(pm, pub);
+	setMasterSecret(sec, pm);
+	rsapubfree(pub);
+	return epm;
 }
 
-// fill "finished" arg with sha256(args)
-static void
-tls12SetFinished(TlsSec *sec, HandHash hs, uchar *finished, int isClient)
+static int
+tlsSecFinished(TlsSec *sec, HandshakeHash hsh, uchar *fin, int nfin, int isclient)
 {
-	uchar h[SHA2_256dlen];
-	char *label;
-
-	// get current hash value, but allow further messages to be hashed in
-	sha2_256(nil, 0, h, &hs.sha2_256);
-
-	if(isClient)
-		label = "client finished";
-	else
-		label = "server finished";
-	tlsPsha2_256(finished, TLSFinishedLen, sec->sec, MasterSecretSize, (uchar*)label, strlen(label), h, SHA2_256dlen);
+	if(sec->nfin != nfin){
+		werrstr("invalid finished exchange");
+		return -1;
+	}
+	hsh.md5.malloced = 0;
+	hsh.sha1.malloced = 0;
+	hsh.sha2_256.malloced = 0;
+	(*sec->setFinished)(sec, hsh, fin, isclient);
+	return 0;
 }
 
 static void
-sslPRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed0, int nseed0, uchar *seed1, int nseed1)
+tlsSecVers(TlsSec *sec, int v)
 {
-	DigestState *s;
-	uchar sha1dig[SHA1dlen], md5dig[MD5dlen], tmp[26];
-	int i, n, len;
-
-	USED(label);
-	len = 1;
-	while(nbuf > 0){
-		if(len > 26)
-			return;
-		for(i = 0; i < len; i++)
-			tmp[i] = 'A' - 1 + len;
-		s = sha1(tmp, len, nil, nil);
-		s = sha1(key, nkey, nil, s);
-		s = sha1(seed0, nseed0, nil, s);
-		sha1(seed1, nseed1, sha1dig, s);
-		s = md5(key, nkey, nil, nil);
-		md5(sha1dig, SHA1dlen, md5dig, s);
-		n = MD5dlen;
-		if(n > nbuf)
-			n = nbuf;
-		memmove(buf, md5dig, n);
-		buf += n;
-		nbuf -= n;
-		len++;
+	if(v == SSL3Version){
+		sec->setFinished = sslSetFinished;
+		sec->nfin = SSL3FinishedLen;
+		sec->prf = sslPRF;
+	}else if(v < TLS12Version) {
+		sec->setFinished = tls10SetFinished;
+		sec->nfin = TLSFinishedLen;
+		sec->prf = tls10PRF;
+	}else {
+		sec->setFinished = tls12SetFinished;
+		sec->nfin = TLSFinishedLen;
+		sec->prf = tls12PRF;
 	}
 }
 
-static mpint*
-bytestomp(Bytes* bytes)
+static int
+setSecrets(TlsConnection *c, int isclient)
 {
-	mpint* ans;
+	uchar kd[MaxKeyData];
+	char *secrets;
+	int rv;
 
-	ans = betomp(bytes->data, bytes->len, nil);
-	return ans;
+	assert(c->nsecret <= sizeof(kd));
+	secrets = emalloc(2*c->nsecret);
+
+	/*
+	 * generate secret keys from the master secret.
+	 *
+	 * different cipher selections will require different amounts
+	 * of key expansion and use of key expansion data,
+	 * but it's all generated using the same function.
+	 */
+	(*c->sec->prf)(kd, c->nsecret, c->sec->sec, MasterSecretSize, "key expansion",
+			c->sec->srandom, RandomSize, c->sec->crandom, RandomSize);
+
+	enc64(secrets, 2*c->nsecret, kd, c->nsecret);
+	memset(kd, 0, c->nsecret);
+
+	rv = fprint(c->ctl, "secret %s %s %d %s", c->digest, c->enc, isclient, secrets);
+	memset(secrets, 0, 2*c->nsecret);
+	free(secrets);
+
+	return rv;
 }
 
 /*
- * Convert mpint* to Bytes, putting high order byte first.
+ * set the master secret from the pre-master secret,
+ * destroys premaster.
  */
-static Bytes*
-mptobytes(mpint* big)
+static void
+setMasterSecret(TlsSec *sec, Bytes *pm)
 {
-	int n, m;
-	uchar *a;
-	Bytes* ans;
+	if(sec->psklen > 0){
+		Bytes *opm = pm;
+		uchar *p;
+
+		/* concatenate psk to pre-master secret */
+		pm = newbytes(4 + opm->len + sec->psklen);
+		p = pm->data;
+		put16(p, opm->len), p += 2;
+		memmove(p, opm->data, opm->len), p += opm->len;
+		put16(p, sec->psklen), p += 2;
+		memmove(p, sec->psk, sec->psklen);
 
-	a = nil;
-	n = (mpsignif(big)+7)/8;
-	m = mptobe(big, nil, n, &a);
-	ans = makebytes(a, m);
-	if(a != nil)
-		free(a);
-	return ans;
+		memset(opm->data, 0, opm->len);
+		freebytes(opm);
+	}
+
+	(*sec->prf)(sec->sec, MasterSecretSize, pm->data, pm->len, "master secret",
+			sec->crandom, RandomSize, sec->srandom, RandomSize);
+
+	memset(pm->data, 0, pm->len);	
+	freebytes(pm);
 }
 
-// Do RSA computation on block according to key, and pad
-// result on left with zeros to make it modlen long.
-static Bytes*
-rsacomp(Bytes* block, RSApub* key, int modlen)
+static int
+digestDHparams(TlsSec *sec, Bytes *par, uchar digest[MAXdlen], int sigalg)
 {
-	mpint *x, *y;
-	Bytes *a, *ybytes;
-	int ylen;
-
-	x = bytestomp(block);
-	y = rsaencrypt(key, x, nil);
-	mpfree(x);
-	ybytes = mptobytes(y);
-	ylen = ybytes->len;
+	int hashalg = (sigalg>>8) & 0xFF;
+	int digestlen;
+	Bytes *blob;
+
+	blob = newbytes(2*RandomSize + par->len);
+	memmove(blob->data+0*RandomSize, sec->crandom, RandomSize);
+	memmove(blob->data+1*RandomSize, sec->srandom, RandomSize);
+	memmove(blob->data+2*RandomSize, par->data, par->len);
+	if(hashalg == 0){
+		digestlen = MD5dlen+SHA1dlen;
+		md5(blob->data, blob->len, digest, nil);
+		sha1(blob->data, blob->len, digest+MD5dlen, nil);
+	} else {
+		digestlen = -1;
+		if(hashalg < nelem(hashfun) && hashfun[hashalg].fun != nil){
+			digestlen = hashfun[hashalg].len;
+			(*hashfun[hashalg].fun)(blob->data, blob->len, digest, nil);
+		}
+	}
+	freebytes(blob);
+	return digestlen;
+}
 
-	if(ylen < modlen) {
-		a = newbytes(modlen);
-		memset(a->data, 0, modlen-ylen);
-		memmove(a->data+modlen-ylen, ybytes->data, ylen);
-		freebytes(ybytes);
-		ybytes = a;
-	}
-	else if(ylen > modlen) {
-		// assume it has leading zeros (mod should make it so)
-		a = newbytes(modlen);
-		memmove(a->data, ybytes->data, modlen);
-		freebytes(ybytes);
-		ybytes = a;
+static char*
+verifyDHparams(TlsSec *sec, Bytes *par, Bytes *cert, Bytes *sig, int sigalg)
+{
+	uchar digest[MAXdlen];
+	int digestlen;
+	ECdomain dom;
+	ECpub *ecpk;
+	RSApub *rsapk;
+	char *err;
+
+	if(par == nil || par->len <= 0)
+		return "no DH parameters";
+
+	if(sig == nil || sig->len <= 0){
+		if(sec->psklen > 0)
+			return nil;
+		return "no signature";
+	}
+
+	if(cert == nil)
+		return "no certificate";
+
+	digestlen = digestDHparams(sec, par, digest, sigalg);
+	if(digestlen <= 0)
+		return "unknown signature digest algorithm";
+	
+	switch(sigalg & 0xFF){
+	case 0x01:
+		rsapk = X509toRSApub(cert->data, cert->len, nil, 0);
+		if(rsapk == nil)
+			return "bad certificate";
+		err = X509rsaverifydigest(sig->data, sig->len, digest, digestlen, rsapk);
+		rsapubfree(rsapk);
+		break;
+	case 0x03:
+		ecpk = X509toECpub(cert->data, cert->len, nil, 0, &dom);
+		if(ecpk == nil)
+			return "bad certificate";
+		err = X509ecdsaverifydigest(sig->data, sig->len, digest, digestlen, &dom, ecpk);
+		ecdomfree(&dom);
+		ecpubfree(ecpk);
+		break;
+	default:
+		err = "signaure algorithm not RSA or ECDSA";
 	}
-	mpfree(y);
-	return ybytes;
+
+	return err;
 }
 
 // encrypt data according to PKCS#1, /lib/rfc/rfc2437 9.1.2.1
 static Bytes*
-pkcs1_encrypt(Bytes* data, RSApub* key, int blocktype)
+pkcs1_encrypt(Bytes* data, RSApub* key)
 {
-	Bytes *pad, *eb, *ans;
-	int i, dlen, padlen, modlen;
+	mpint *x, *y;
 
-	modlen = (mpsignif(key->n)+7)/8;
-	dlen = data->len;
-	if(modlen < 12 || dlen > modlen - 11)
+	x = pkcs1padbuf(data->data, data->len, key->n, 2);
+	if(x == nil)
 		return nil;
-	padlen = modlen - 3 - dlen;
-	pad = newbytes(padlen);
-	genrandom(pad->data, padlen);
-	for(i = 0; i < padlen; i++) {
-		if(blocktype == 0)
-			pad->data[i] = 0;
-		else if(blocktype == 1)
-			pad->data[i] = 255;
-		else if(pad->data[i] == 0)
-			pad->data[i] = 1;
-	}
-	eb = newbytes(modlen);
-	eb->data[0] = 0;
-	eb->data[1] = blocktype;
-	memmove(eb->data+2, pad->data, padlen);
-	eb->data[padlen+2] = 0;
-	memmove(eb->data+padlen+3, data->data, dlen);
-	ans = rsacomp(eb, key, modlen);
-	freebytes(eb);
-	freebytes(pad);
-	return ans;
+	y = rsaencrypt(key, x, nil);
+	mpfree(x);
+	data = newbytes((mpsignif(key->n)+7)/8);
+	mptober(y, data->data, data->len);
+	mpfree(y);
+	return data;
 }
 
 // decrypt data according to PKCS#1, with given key.
-// expect a block type of 2.
 static Bytes*
-pkcs1_decrypt(TlsSec *sec, uchar *epm, int nepm)
+pkcs1_decrypt(TlsSec *sec, Bytes *data)
 {
-	Bytes *eb, *ans = nil;
-	int i, modlen;
-	mpint *x, *y;
+	mpint *y;
 
-	modlen = (mpsignif(sec->rsapub->n)+7)/8;
-	if(nepm != modlen)
+	if(data->len != (mpsignif(sec->rsapub->n)+7)/8)
 		return nil;
-	x = betomp(epm, nepm, nil);
-	y = factotum_rsa_decrypt(sec->rpc, x);
+	y = factotum_rsa_decrypt(sec->rpc, bytestomp(data));
 	if(y == nil)
 		return nil;
-	eb = mptobytes(y);
-	if(eb->len < modlen){ // pad on left with zeros
-		ans = newbytes(modlen);
-		memset(ans->data, 0, modlen-eb->len);
-		memmove(ans->data+modlen-eb->len, eb->data, eb->len);
-		freebytes(eb);
-		eb = ans;
-	}
-	if(eb->data[0] == 0 && eb->data[1] == 2) {
-		for(i = 2; i < modlen; i++)
-			if(eb->data[i] == 0)
-				break;
-		if(i < modlen - 1)
-			ans = makebytes(eb->data+i+1, modlen-(i+1));
+	data = mptobytes(y, (mpsignif(y)+7)/8);
+	mpfree(y);
+	if((data->len = pkcs1unpadbuf(data->data, data->len, sec->rsapub->n, 2)) < 0){
+		freebytes(data);
+		return nil;
 	}
-	if (eb != ans)			/* not freed above? */
-		freebytes(eb);
-	return ans;
+	return data;
+}
+
+static Bytes*
+pkcs1_sign(TlsSec *sec, uchar *digest, int digestlen, int sigalg)
+{
+	int hashalg = (sigalg>>8)&0xFF;
+	mpint *signedMP;
+	Bytes *signature;
+	uchar buf[128];
+
+	if(hashalg > 0 && hashalg < nelem(hashfun) && hashfun[hashalg].len == digestlen)
+		digestlen = asn1encodedigest(hashfun[hashalg].fun, digest, buf, sizeof(buf));
+	else if(digestlen == MD5dlen+SHA1dlen)
+		memmove(buf, digest, digestlen);
+	else
+		digestlen = -1;
+	if(digestlen <= 0){
+		werrstr("bad digest algorithm");
+		return nil;
+	}
+
+	signedMP = factotum_rsa_decrypt(sec->rpc, pkcs1padbuf(buf, digestlen, sec->rsapub->n, 1));
+	if(signedMP == nil)
+		return nil;
+	signature = mptobytes(signedMP, (mpsignif(sec->rsapub->n)+7)/8);
+	mpfree(signedMP);
+	return signature;
 }
 
 
@@ -2370,10 +2877,10 @@
 	if(n==0)
 		n=1;
 	p = malloc(n);
-	if(p == nil){
-		exits("out of memory");
-	}
+	if(p == nil)
+		sysfatal("out of memory");
 	memset(p, 0, n);
+	setmalloctag(p, getcallerpc(&n));
 	return p;
 }
 
@@ -2382,11 +2889,11 @@
 {
 	if(ReallocN == 0)
 		ReallocN = 1;
-	if(!ReallocP)
+	if(ReallocP == nil)
 		ReallocP = emalloc(ReallocN);
-	else if(!(ReallocP = realloc(ReallocP, ReallocN))){
-		exits("out of memory");
-	}
+	else if((ReallocP = realloc(ReallocP, ReallocN)) == nil)
+		sysfatal("out of memory");
+	setrealloctag(ReallocP, getcallerpc(&ReallocP));
 	return(ReallocP);
 }
 
@@ -2414,12 +2921,6 @@
 	p[1] = x;
 }
 
-static u32int
-get32(uchar *p)
-{
-	return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
-}
-
 static int
 get24(uchar *p)
 {
@@ -2432,19 +2933,14 @@
 	return (p[0]<<8)|p[1];
 }
 
-#define OFFSET(x, s) offsetof(s, x)
-
-/*
- * malloc and return a new Bytes structure capable of
- * holding len bytes. (len >= 0)
- * Used to use crypt_malloc, which aborts if malloc fails.
- */
 static Bytes*
 newbytes(int len)
 {
 	Bytes* ans;
 
-	ans = (Bytes*)malloc(OFFSET(data[0], Bytes) + len);
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bytes) + len);
 	ans->len = len;
 	return ans;
 }
@@ -2465,35 +2961,55 @@
 static void
 freebytes(Bytes* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
-/* len is number of ints */
-static Ints*
-newints(int len)
+static mpint*
+bytestomp(Bytes* bytes)
 {
-	Ints* ans;
+	return betomp(bytes->data, bytes->len, nil);
+}
 
-	ans = (Ints*)malloc(OFFSET(data[0], Ints) + len*sizeof(int));
-	ans->len = len;
+/*
+ * Convert mpint* to Bytes, putting high order byte first.
+ */
+static Bytes*
+mptobytes(mpint *big, int len)
+{
+	Bytes* ans;
+
+	if(len == 0) len++;
+	ans = newbytes(len);
+	mptober(big, ans->data, ans->len);
 	return ans;
 }
 
+/* len is number of ints */
 static Ints*
-makeints(int* buf, int len)
+newints(int len)
 {
 	Ints* ans;
 
-	ans = newints(len);
-	if(len > 0)
-		memmove(ans->data, buf, len*sizeof(int));
+	if(len < 0 || len > ((uint)-1>>1)/sizeof(int))
+		abort();
+	ans = emalloc(sizeof(Ints) + len*sizeof(int));
+	ans->len = len;
 	return ans;
 }
 
 static void
 freeints(Ints* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
+}
+
+static int
+lookupid(Ints* b, int id)
+{
+	int i;
+
+	for(i=0; i<b->len; i++)
+		if(b->data[i] == id)
+			return i;
+	return -1;
 }
--- /dev/null	Mon Jun 28 00:12:24 2021
+++ /sys/src/libsec/port/tsmemcmp.c	Sat Jun 26 21:21:42 2021
@@ -0,0 +1,27 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*
+ * timing safe memcmp()
+ */
+int
+tsmemcmp(void *a1, void *a2, ulong n)
+{
+	int lt, gt, c1, c2, r, m;
+	uchar *s1, *s2;
+
+	r = m = 0;
+	s1 = a1;
+	s2 = a2;
+	while(n--){
+		c1 = *s1++;
+		c2 = *s2++;
+		lt = (c1 - c2) >> 8;
+		gt = (c2 - c1) >> 8;
+		r |= (lt - gt) & ~m;
+		m |= lt | gt;
+	}
+	return r;
+}
--- sys/src/libsec/port/x509.c	Thu Jul 15 09:07:50 2021
+++ /sys/src/libsec/port/x509.c	Wed Jul 14 02:08:12 2021
@@ -3,11 +3,6 @@
 #include <mp.h>
 #include <libsec.h>
 
-typedef DigestState*(*DigestFun)(uchar*,ulong,uchar*,DigestState*);
-
-/* ANSI offsetof, backwards. */
-#define	OFFSETOF(a, b)	offsetof(b, a)
-
 /*=============================================================*/
 /*  general ASN1 declarations and parsing
  *
@@ -58,18 +53,18 @@
 
 struct Bytes {
 	int	len;
-	uchar	data[1];
+	uchar	data[];
 };
 
 struct Ints {
 	int	len;
-	int	data[1];
+	int	data[];
 };
 
 struct Bits {
 	int	len;		/* number of bytes */
 	int	unusedbits;	/* unused bits in last byte */
-	uchar	data[1];	/* most-significant bit first */
+	uchar	data[];		/* most-significant bit first */
 };
 
 struct Tag {
@@ -135,14 +130,11 @@
 static int	is_string(Elem* pe, char** pstring);
 static int	is_time(Elem* pe, char** ptime);
 static int	decode(uchar* a, int alen, Elem* pelem);
-static int	decode_seq(uchar* a, int alen, Elist** pelist);
-static int	decode_value(uchar* a, int alen, int kind, int isconstr, Value* pval);
 static int	encode(Elem e, Bytes** pbytes);
 static int	oid_lookup(Ints* o, Ints** tab);
 static void	freevalfields(Value* v);
 static mpint	*asn1mpint(Elem *e);
-
-
+static void	edump(Elem);
 
 #define TAG_MASK 0x1F
 #define CONSTR_MASK 0x20
@@ -169,9 +161,8 @@
 	if(n==0)
 		n=1;
 	p = malloc(n);
-	if(p == nil){
-		exits("out of memory");
-	}
+	if(p == nil)
+		sysfatal("out of memory");
 	memset(p, 0, n);
 	setmalloctag(p, getcallerpc(&n));
 	return p;
@@ -180,14 +171,13 @@
 static char*
 estrdup(char *s)
 {
-	char *d, *d0;
+	char *d;
+	int n;
 
-	if(!s)
-		return 0;
-	d = d0 = emalloc(strlen(s)+1);
-	while(*d++ = *s++)
-		;
-	return d0;
+	n = strlen(s)+1;
+	d = emalloc(n);
+	memmove(d, s, n);
+	return d;
 }
 
 
@@ -201,37 +191,12 @@
 decode(uchar* a, int alen, Elem* pelem)
 {
 	uchar* p = a;
+	int err;
 
-	return  ber_decode(&p, &a[alen], pelem);
-}
-
-/*
- * Like decode, but continue decoding after first element
- * of array ends.
- */
-static int
-decode_seq(uchar* a, int alen, Elist** pelist)
-{
-	uchar* p = a;
-
-	return seq_decode(&p, &a[alen], -1, 1, pelist);
-}
-
-/*
- * Decode the whole array as a BER encoding of an ASN1 value,
- * (i.e., the part after the tag and length).
- * Assume the value is encoded as universal tag "kind".
- * The constr arg is 1 if the value is constructed, 0 if primitive.
- * If there's an error, the return string will contain the error.
- * Depending on the error, the returned value may or may not
- * be nil.
- */
-static int
-decode_value(uchar* a, int alen, int kind, int isconstr, Value* pval)
-{
-	uchar* p = a;
-
-	return value_decode(&p, &a[alen], alen, kind, isconstr, pval);
+	err = ber_decode(&p, &a[alen], pelem);
+	if(err == ASN_OK && p != &a[alen])
+		err = ASN_EVALLEN;
+	return err;
 }
 
 /*
@@ -257,15 +222,14 @@
 	Tag tag;
 	Value val;
 
+	memset(pelem, 0, sizeof(*pelem));
 	err = tag_decode(pp, pend, &tag, &isconstr);
 	if(err == ASN_OK) {
 		err = length_decode(pp, pend, &length);
 		if(err == ASN_OK) {
-			if(tag.class == Universal) {
+			if(tag.class == Universal)
 				err = value_decode(pp, pend, length, tag.num, isconstr, &val);
-				if(val.tag == VSeq || val.tag == VSet)
-					setmalloctag(val.u.seqval, getcallerpc(&pp));
-			}else
+			else
 				err = value_decode(pp, pend, length, OCTET_STRING, 0, &val);
 			if(err == ASN_OK) {
 				pelem->tag = tag;
@@ -400,8 +364,7 @@
 				pval->u.bitstringval = makebits(0, 0, 0);
 				p += 2;
 			}
-			else
-				/* TODO: recurse and concat results */
+			else	/* TODO: recurse and concat results */
 				err = ASN_EUNIMPL;
 		}
 		else {
@@ -502,7 +465,6 @@
 
 	case SEQUENCE:
 		err = seq_decode(&p, pend, length, isconstr, &vl);
-		setmalloctag(vl, getcallerpc(&pp));
 		if(err == ASN_OK) {
 			pval->tag = VSeq ;
 			pval->u.seqval = vl;
@@ -511,12 +473,12 @@
 
 	case SETOF:
 		err = seq_decode(&p, pend, length, isconstr, &vl);
-		setmalloctag(vl, getcallerpc(&pp));
 		if(err == ASN_OK) {
 			pval->tag = VSet;
 			pval->u.setval = vl;
 		}
 		break;
+
 	case UTF8String:
 	case NumericString:
 	case PrintableString:
@@ -530,13 +492,64 @@
 	case GeneralString:
 	case UniversalString:
 	case BMPString:
-		/* TODO: figure out when character set conversion is necessary */
 		err = octet_decode(&p, pend, length, isconstr, &va);
 		if(err == ASN_OK) {
-			pval->tag = VString;
-			pval->u.stringval = (char*)emalloc(va->len+1);
-			memmove(pval->u.stringval, va->data, va->len);
-			pval->u.stringval[va->len] = 0;
+			uchar *s;
+			char *d;
+			Rune r;
+			int n;
+
+			switch(kind){
+			case UniversalString:
+				n = va->len / 4;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<24 | s[1]<<16 | s[2]<<8 | s[3];
+					if(r == 0)
+						break;
+					n--;
+					s += 4;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			case BMPString:
+				n = va->len / 2;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<8 | s[1];
+					if(r == 0)
+						break;
+					n--;
+					s += 2;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			default:
+				n = va->len;
+				d = emalloc(n+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					if((*d = *s) == 0)
+						break;
+					n--;
+					s++;
+					d++;
+				}
+				*d = 0;
+				break;
+			}
+			if(n != 0){
+				err = ASN_EINVAL;
+				free(pval->u.stringval);
+			} else 
+				pval->tag = VString;
 			free(va);
 		}
 		break;
@@ -665,25 +678,27 @@
 			switch(elem.val.tag) {
 			case VOctets:
 				newans = catbytes(ans, elem.val.u.octetsval);
+				freevalfields(&elem.val);
 				freebytes(ans);
 				ans = newans;
 				break;
 
 			case VEOC:
-				if(length != -1) {
-					p = pold;
-					err = ASN_EINVAL;
-				}
-				goto cloop_done;
-
+				if(length == -1)
+					goto cloop_done;
+				/* no break */
 			default:
+				freevalfields(&elem.val);
 				p = pold;
 				err = ASN_EINVAL;
 				goto cloop_done;
 			}
 		}
 cloop_done:
-		;
+		if(err != ASN_OK){
+			freebytes(ans);
+			ans = nil;
+		}
 	}
 	*pp = p;
 	*pbytes = ans;
@@ -736,7 +751,9 @@
 			else
 				lve = mkel(elem, lve);
 		}
-		if(err == ASN_OK) {
+		if(err != ASN_OK)
+			freeelist(lve);
+		else {
 			/* reverse back to original order */
 			while(lve != nil) {
 				lveold = lve;
@@ -748,7 +765,6 @@
 	}
 	*pp = p;
 	*pelist = ans;
-	setmalloctag(ans, getcallerpc(&pp));
 	return err;
 }
 
@@ -977,8 +993,8 @@
 				memmove(p, bb->data, bb->len);
 			p += bb->len;
 		}
-			else
-				err = ASN_EINVAL;
+		else
+			err = ASN_EINVAL;
 		break;
 
 	case NULLTAG:
@@ -1195,21 +1211,8 @@
 static int
 is_bigint(Elem* pe, Bytes** pbigint)
 {
-	int v, n, i;
-
-	if(pe->tag.class == Universal && pe->tag.num == INTEGER) {
-		if(pe->val.tag == VBigInt)
-			*pbigint = pe->val.u.bigintval;
-		else if(pe->val.tag == VInt){
-			v = pe->val.u.intval;
-			for(n = 1; n < 4; n++)
-				if((1 << (8 * n)) > v)
-					break;
-			*pbigint = newbytes(n);
-			for(i = 0; i < n; i++)
-				(*pbigint)->data[i] = (v >> ((n - 1 - i) * 8));
-		}else
-			return 0;
+	if(pe->tag.class == Universal && pe->tag.num == INTEGER && pe->val.tag == VBigInt) {
+		*pbigint = pe->val.u.bigintval;
 		return 1;
 	}
 	return 0;
@@ -1292,7 +1295,9 @@
 {
 	Bytes* ans;
 
-	ans = (Bytes*)emalloc(OFFSETOF(data[0], Bytes) + len);
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bytes) + len);
 	ans->len = len;
 	return ans;
 }
@@ -1313,8 +1318,7 @@
 static void
 freebytes(Bytes* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 /*
@@ -1352,7 +1356,9 @@
 {
 	Ints* ans;
 
-	ans = (Ints*)emalloc(OFFSETOF(data[0], Ints) + len*sizeof(int));
+	if(len < 0 || len > ((uint)-1>>1)/sizeof(int))
+		abort();
+	ans = emalloc(sizeof(Ints) + len*sizeof(int));
 	ans->len = len;
 	return ans;
 }
@@ -1363,16 +1369,14 @@
 	Ints* ans;
 
 	ans = newints(len);
-	if(len > 0)
-		memmove(ans->data, buf, len*sizeof(int));
+	memmove(ans->data, buf, len*sizeof(int));
 	return ans;
 }
 
 static void
 freeints(Ints* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 /* len is number of bytes */
@@ -1381,7 +1385,9 @@
 {
 	Bits* ans;
 
-	ans = (Bits*)emalloc(OFFSETOF(data[0], Bits) + len);
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bits) + len);
 	ans->len = len;
 	ans->unusedbits = 0;
 	return ans;
@@ -1401,8 +1407,7 @@
 static void
 freebits(Bits* b)
 {
-	if(b != nil)
-		free(b);
+	free(b);
 }
 
 static Elist*
@@ -1469,24 +1474,35 @@
 		freeints(v->u.objidval);
 		break;
 	case VString:
-		if(v->u.stringval)
-			free(v->u.stringval);
+		free(v->u.stringval);
 		break;
 	case VSeq:
 		el = v->u.seqval;
 		for(l = el; l != nil; l = l->tl)
 			freevalfields(&l->hd.val);
-		if(el)
-			freeelist(el);
+		freeelist(el);
 		break;
 	case VSet:
 		el = v->u.setval;
 		for(l = el; l != nil; l = l->tl)
 			freevalfields(&l->hd.val);
-		if(el)
-			freeelist(el);
+		freeelist(el);
 		break;
 	}
+	memset(v, 0, sizeof(*v));
+}
+
+static mpint*
+asn1mpint(Elem *e)
+{
+	Bytes *b;
+	int v;
+
+	if(is_int(e, &v))
+		return itomp(v, nil);
+	if(is_bigint(e, &b))
+		return betomp(b->data, b->len, nil);
+	return nil;
 }
 
 /* end of general ASN1 functions */
@@ -1569,9 +1585,11 @@
 	char*	validity_end;
 	char*	subject;
 	int	publickey_alg;
-	Bytes*	publickey;
+	Bits*	publickey;
 	int	signature_alg;
-	Bytes*	signature;
+	Bits*	signature;
+	int	curve;
+	Bytes*	ext;
 } CertX509;
 
 /* Algorithm object-ids */
@@ -1580,51 +1598,99 @@
 	ALG_md2WithRSAEncryption,
 	ALG_md4WithRSAEncryption,
 	ALG_md5WithRSAEncryption,
+
 	ALG_sha1WithRSAEncryption,
 	ALG_sha1WithRSAEncryptionOiw,
+
 	ALG_sha256WithRSAEncryption,
 	ALG_sha384WithRSAEncryption,
 	ALG_sha512WithRSAEncryption,
 	ALG_sha224WithRSAEncryption,
+
+	ALG_ecPublicKey,
+	ALG_sha1WithECDSA,
+	ALG_sha256WithECDSA,
+	ALG_sha384WithECDSA,
+	ALG_sha512WithECDSA,
+
 	ALG_md5,
 	ALG_sha1,
 	ALG_sha256,
 	ALG_sha384,
 	ALG_sha512,
 	ALG_sha224,
+
 	NUMALGS
 };
-typedef struct Ints9 {
+
+typedef struct Ints15 {
+	int		len;
+	int		data[15];
+} Ints15;
+
+typedef struct DigestAlg {
+	int		alg;
+	DigestState*	(*fun)(uchar*,ulong,uchar*,DigestState*);
 	int		len;
-	int		data[9];
-} Ints9;
-static Ints9 oid_rsaEncryption = {7, 1, 2, 840, 113549, 1, 1, 1 };
-static Ints9 oid_md2WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 2 };
-static Ints9 oid_md4WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 3 };
-static Ints9 oid_md5WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 4 };
-static Ints9 oid_sha1WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 5 };
-static Ints9 oid_sha1WithRSAEncryptionOiw ={6, 1, 3, 14, 3, 2, 29 };
-static Ints9 oid_sha256WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 11 };
-static Ints9 oid_sha384WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 12 };
-static Ints9 oid_sha512WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 13 };
-static Ints9 oid_sha224WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 14 };
-static Ints9 oid_md5 ={6, 1, 2, 840, 113549, 2, 5, 0 };
-static Ints9 oid_sha1 ={6, 1, 3, 14, 3, 2, 26 };
-static Ints9 oid_sha256 ={9, 2, 16, 840, 1, 101, 3, 4, 2, 1 };
-static Ints9 oid_sha384 ={9, 2, 16, 840, 1, 101, 3, 4, 2, 2 };
-static Ints9 oid_sha512 ={9, 2, 16, 840, 1, 101, 3, 4, 2, 3 };
-static Ints9 oid_sha224 ={9, 2, 16, 840, 1, 101, 3, 4, 2, 4 };
+} DigestAlg;
+
+static DigestAlg alg_md5 = { ALG_md5, md5, MD5dlen};
+static DigestAlg alg_sha1 = { ALG_sha1, sha1, SHA1dlen };
+static DigestAlg alg_sha256 = { ALG_sha256, sha2_256, SHA2_256dlen };
+static DigestAlg alg_sha384 = { ALG_sha384, sha2_384, SHA2_384dlen };
+static DigestAlg alg_sha512 = { ALG_sha512, sha2_512, SHA2_512dlen };
+static DigestAlg alg_sha224 = { ALG_sha224, sha2_224, SHA2_224dlen };
+
+/* maximum length of digest output of the digest algs above */
+enum {
+	MAXdlen = SHA2_512dlen,
+};
+
+static Ints15 oid_rsaEncryption = {7, 1, 2, 840, 113549, 1, 1, 1 };
+
+static Ints15 oid_md2WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 2 };
+static Ints15 oid_md4WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 3 };
+static Ints15 oid_md5WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 4 };
+static Ints15 oid_sha1WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 5 };
+static Ints15 oid_sha1WithRSAEncryptionOiw ={6, 1, 3, 14, 3, 2, 29 };
+static Ints15 oid_sha256WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 11 };
+static Ints15 oid_sha384WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 12 };
+static Ints15 oid_sha512WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 13 };
+static Ints15 oid_sha224WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 14 };
+
+static Ints15 oid_ecPublicKey = {6, 1, 2, 840, 10045, 2, 1 };
+static Ints15 oid_sha1WithECDSA = {6, 1, 2, 840, 10045, 4, 1 };
+static Ints15 oid_sha256WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 2 };
+static Ints15 oid_sha384WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 3 };
+static Ints15 oid_sha512WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 4 };
+
+static Ints15 oid_md5 = {6, 1, 2, 840, 113549, 2, 5 };
+static Ints15 oid_sha1 = {6, 1, 3, 14, 3, 2, 26 };
+static Ints15 oid_sha256= {9, 2, 16, 840, 1, 101, 3, 4, 2, 1 };
+static Ints15 oid_sha384= {9, 2, 16, 840, 1, 101, 3, 4, 2, 2 };
+static Ints15 oid_sha512= {9, 2, 16, 840, 1, 101, 3, 4, 2, 3 };
+static Ints15 oid_sha224= {9, 2, 16, 840, 1, 101, 3, 4, 2, 4 };
+
 static Ints *alg_oid_tab[NUMALGS+1] = {
 	(Ints*)&oid_rsaEncryption,
 	(Ints*)&oid_md2WithRSAEncryption,
 	(Ints*)&oid_md4WithRSAEncryption,
 	(Ints*)&oid_md5WithRSAEncryption,
+
 	(Ints*)&oid_sha1WithRSAEncryption,
 	(Ints*)&oid_sha1WithRSAEncryptionOiw,
+
 	(Ints*)&oid_sha256WithRSAEncryption,
 	(Ints*)&oid_sha384WithRSAEncryption,
 	(Ints*)&oid_sha512WithRSAEncryption,
 	(Ints*)&oid_sha224WithRSAEncryption,
+
+	(Ints*)&oid_ecPublicKey,
+	(Ints*)&oid_sha1WithECDSA,
+	(Ints*)&oid_sha256WithECDSA,
+	(Ints*)&oid_sha384WithECDSA,
+	(Ints*)&oid_sha512WithECDSA,
+
 	(Ints*)&oid_md5,
 	(Ints*)&oid_sha1,
 	(Ints*)&oid_sha256,
@@ -1633,22 +1699,46 @@
 	(Ints*)&oid_sha224,
 	nil
 };
-static DigestFun digestalg[NUMALGS+1] = { md5, md5, md5, md5, sha1, sha1, sha2_256, sha2_384, sha2_512, sha2_224, md5, sha1, sha2_256, sha2_384, sha2_512, sha2_224, nil };
+
+static DigestAlg *digestalg[NUMALGS+1] = {
+	&alg_md5, &alg_md5, &alg_md5, &alg_md5,
+	&alg_sha1, &alg_sha1,
+	&alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
+	&alg_sha256, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512,
+	&alg_md5, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
+	nil
+};
+
+static Bytes* encode_digest(DigestAlg *da, uchar *digest);
+
+static Ints15 oid_secp256r1 = {7, 1, 2, 840, 10045, 3, 1, 7};
+static Ints15 oid_secp384r1 = {5, 1, 3, 132, 0, 34};
+
+static Ints *namedcurves_oid_tab[] = {
+	(Ints*)&oid_secp256r1,
+	(Ints*)&oid_secp384r1,
+	nil,
+};
+static void (*namedcurves[])(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h) = {
+	secp256r1,
+	secp384r1,
+	nil,
+};
+
+static void appendaltnames(char *name, int nname, Bytes *ext, int req);
 
 static void
 freecert(CertX509* c)
 {
-	if(!c) return;
-	if(c->issuer != nil)
-		free(c->issuer);
-	if(c->validity_start != nil)
-		free(c->validity_start);
-	if(c->validity_end != nil)
-		free(c->validity_end);
-	if(c->subject != nil)
-		free(c->subject);
-	freebytes(c->publickey);
-	freebytes(c->signature);
+	if(c == nil)
+		return;
+	free(c->issuer);
+	free(c->validity_start);
+	free(c->validity_end);
+	free(c->subject);
+	freebits(c->publickey);
+	freebits(c->signature);
+	freebytes(c->ext);
 	free(c);
 }
 
@@ -1729,12 +1819,22 @@
 	return oid_lookup(oid, alg_oid_tab);
 }
 
+static int
+parse_curve(Elem* e)
+{
+	Elist* el;
+	Ints* oid;
+
+	if(!is_seq(e, &el) || elistlen(el)<2 || !is_oid(&el->tl->hd, &oid))
+		return -1;
+	return oid_lookup(oid, namedcurves_oid_tab);
+}
+
 static CertX509*
-decode_cert(Bytes* a)
+decode_cert(uchar *buf, int len)
 {
 	int ok = 0;
 	int n;
-	CertX509* c = nil;
 	Elem  ecert;
 	Elem* ecertinfo;
 	Elem* esigalg;
@@ -1752,8 +1852,9 @@
 	Bits* bits = nil;
 	Bytes* b;
 	Elem* e;
+	CertX509* c = nil;
 
-	if(decode(a->data, a->len, &ecert) != ASN_OK)
+	if(decode(buf, len, &ecert) != ASN_OK)
 		goto errret;
 
 	c = (CertX509*)emalloc(sizeof(CertX509));
@@ -1766,6 +1867,7 @@
 	c->publickey = nil;
 	c->signature_alg = -1;
 	c->signature = nil;
+	c->ext = nil;
 
 	/* Certificate */
  	if(!is_seq(&ecert, &elcert) || elistlen(elcert) !=3)
@@ -1803,7 +1905,14 @@
  	esubj = &el->hd;
  	el = el->tl;
  	epubkey = &el->hd;
- 	if(!is_int(eserial, &c->serial)) {
+	if(el->tl != nil
+	&& el->tl->hd.tag.class == Context
+	&& el->tl->hd.tag.num == 3
+	&& el->tl->hd.val.tag == VOctets){
+		c->ext = el->tl->hd.val.u.octetsval;
+		el->tl->hd.val.u.octetsval = nil;	/* transfer ownership */
+	}
+	if(!is_int(eserial, &c->serial)) {
 		if(!is_bigint(eserial, &b))
 			goto errret;
 		c->serial = -1;	/* else we have to change cert struct */
@@ -1831,7 +1940,7 @@
 		goto errret;
 
 	/* SubjectPublicKeyInfo */
- 	if(!is_seq(epubkey, &elpubkey))
+	if(!is_seq(epubkey, &elpubkey))
 		goto errret;
 	if(elistlen(elpubkey) != 2)
 		goto errret;
@@ -1839,18 +1948,25 @@
 	c->publickey_alg = parse_alg(&elpubkey->hd);
 	if(c->publickey_alg < 0)
 		goto errret;
-  	if(!is_bitstring(&elpubkey->tl->hd, &bits))
-		goto errret;
-	if(bits->unusedbits != 0)
+	c->curve = -1;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		c->curve = parse_curve(&elpubkey->hd);
+		if(c->curve < 0)
+			goto errret;
+	}
+	elpubkey = elpubkey->tl;
+	if(!is_bitstring(&elpubkey->hd, &bits))
 		goto errret;
- 	c->publickey = makebytes(bits->data, bits->len);
+	elpubkey->hd.val.u.bitstringval = nil;	/* transfer ownership */
+	c->publickey = bits;
 
 	/*resume Certificate */
 	if(c->signature_alg < 0)
 		goto errret;
- 	if(!is_bitstring(esig, &bits))
+	if(!is_bitstring(esig, &bits))
 		goto errret;
- 	c->signature = makebytes(bits->data, bits->len);
+	esig->val.u.bitstringval = nil;	/* transfer ownership */
+	c->signature = bits;
 	ok = 1;
 
 errret:
@@ -1863,45 +1979,38 @@
 }
 
 /*
- *	RSAPublickKey :: SEQUENCE {
+ *	RSAPublickKey ::= SEQUENCE {
  *		modulus INTEGER,
  *		publicExponent INTEGER
  *	}
  */
-static RSApub*
-decode_rsapubkey(Bytes* a)
+RSApub*
+asn1toRSApub(uchar *buf, int len)
 {
 	Elem e;
-	Elist *el, *l;
-	mpint *mp;
+	Elist *el;
 	RSApub* key;
 
-	l = nil;
-	key = rsapuballoc();
-	if(decode(a->data, a->len, &e) != ASN_OK)
+	key = nil;
+	if(decode(buf, len, &e) != ASN_OK)
 		goto errret;
 	if(!is_seq(&e, &el) || elistlen(el) != 2)
 		goto errret;
 
-	l = el;
-
-	key->n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	key = rsapuballoc();
+	if((key->n = asn1mpint(&el->hd)) == nil)
 		goto errret;
-
 	el = el->tl;
-	key->ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->ek = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
-	if(l != nil)
-		freeelist(l);
+	freevalfields(&e.val);
 	return key;
 errret:
-	if(l != nil)
-		freeelist(l);
+	freevalfields(&e.val);
 	rsapubfree(key);
 	return nil;
+
 }
 
 /*
@@ -1916,65 +2025,72 @@
  *		exponent2 INTEGER, -- d mod (q-1)
  *		coefficient INTEGER -- (inverse of q) mod p }
  */
-static RSApriv*
-decode_rsaprivkey(Bytes* a)
+RSApriv*
+asn1toRSApriv(uchar *buf, int len)
 {
 	int version;
 	Elem e;
 	Elist *el;
-	mpint *mp;
-	RSApriv* key;
+	Bytes *b;
+	RSApriv* key = nil;
 
-	key = rsaprivalloc();
-	if(decode(a->data, a->len, &e) != ASN_OK)
+	if(decode(buf, len, &e) != ASN_OK)
 		goto errret;
-	if(!is_seq(&e, &el) || elistlen(el) != 9)
+	if(!is_seq(&e, &el))
 		goto errret;
+
 	if(!is_int(&el->hd, &version) || version != 0)
 		goto errret;
 
+	if(elistlen(el) != 9){
+		if(elistlen(el) == 3
+		&& parse_alg(&el->tl->hd) == ALG_rsaEncryption
+		&& is_octetstring(&el->tl->tl->hd, &b)){
+			key = asn1toRSApriv(b->data, b->len);
+			if(key != nil)
+				goto done;
+		}
+		goto errret;
+	}
+
+	key = rsaprivalloc();
 	el = el->tl;
-	key->pub.n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->pub.n = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->pub.ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->pub.ek = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->dk = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->dk = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->q = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->q = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->p = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->p = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->kq = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->kq = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->kp = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->kp = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
 	el = el->tl;
-	key->c2 = mp = asn1mpint(&el->hd);
-	if(mp == nil)
+	if((key->c2 = asn1mpint(&el->hd)) == nil)
 		goto errret;
 
+done:
+	freevalfields(&e.val);
 	return key;
 errret:
+	freevalfields(&e.val);
 	rsaprivfree(key);
 	return nil;
 }
@@ -2041,56 +2157,6 @@
 	return nil;
 }
 
-static mpint*
-asn1mpint(Elem *e)
-{
-	Bytes *b;
-	mpint *mp;
-	int v;
-
-	if(is_int(e, &v))
-		return itomp(v, nil);
-	if(is_bigint(e, &b)) {
-		mp = betomp(b->data, b->len, nil);
-		freebytes(b);
-		return mp;
-	}
-	return nil;
-}
-
-static mpint*
-pkcs1pad(Bytes *b, mpint *modulus)
-{
-	int n = (mpsignif(modulus)+7)/8;
-	int pm1, i;
-	uchar *p;
-	mpint *mp;
-
-	pm1 = n - 1 - b->len;
-	p = (uchar*)emalloc(n);
-	p[0] = 0;
-	p[1] = 1;
-	for(i = 2; i < pm1; i++)
-		p[i] = 0xFF;
-	p[pm1] = 0;
-	memcpy(&p[pm1+1], b->data, b->len);
-	mp = betomp(p, n, nil);
-	free(p);
-	return mp;
-}
-
-RSApriv*
-asn1toRSApriv(uchar *kd, int kn)
-{
-	Bytes *b;
-	RSApriv *key;
-
-	b = makebytes(kd, kn);
-	key = decode_rsaprivkey(b);
-	freebytes(b);
-	return key;
-}
-
 DSApriv*
 asn1toDSApriv(uchar *kd, int kn)
 {
@@ -2108,219 +2174,246 @@
  * Our ASN.1 library doesn't return pointers into the original
  * data array, so we need to do a little hand decoding.
  */
-static void
-digest_certinfo(Bytes *cert, DigestFun digestfun, uchar *digest)
+static int
+digest_certinfo(uchar *cert, int ncert, DigestAlg *da, uchar *digest)
 {
 	uchar *info, *p, *pend;
-	ulong infolen;
 	int isconstr, length;
 	Tag tag;
 	Elem elem;
 
-	p = cert->data;
-	pend = cert->data + cert->len;
+	p = cert;
+	pend = cert + ncert;
 	if(tag_decode(&p, pend, &tag, &isconstr) != ASN_OK ||
 	   tag.class != Universal || tag.num != SEQUENCE ||
 	   length_decode(&p, pend, &length) != ASN_OK ||
 	   p+length > pend ||
 	   p+length < p)
-		return;
+		return -1;
 	info = p;
 	if(ber_decode(&p, pend, &elem) != ASN_OK)
-		return;
+		return -1;
 	freevalfields(&elem.val);
 	if(elem.tag.num != SEQUENCE)
-		return;
-	infolen = p - info;
-	(*digestfun)(info, infolen, digest, nil);
+		return -1;
+	(*da->fun)(info, p - info, digest, nil);
+	return da->len;
 }
 
-static char*
-verify_signature(Bytes* signature, RSApub *pk, uchar *edigest, Elem **psigalg)
+mpint*
+pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype)
 {
-	Elem e;
-	Elist *el;
+	int i, n = (mpsignif(modulus)-1)/8;
+	int pad = n - 2 - len;
+	uchar *p;
+	mpint *mp;
+
+	if(pad < 8){
+		werrstr("rsa modulus too small");
+		return nil;
+	}
+	if((p = malloc(n)) == nil)
+		return nil;
+	p[0] = blocktype;
+	switch(blocktype){
+	default:
+	case 1:
+		memset(p+1, 0xFF, pad);
+		break;
+	case 2:
+		for(i=1; i <= pad; i++)
+			p[i] = 1 + nfastrand(255);
+		break;
+	}
+	p[1+pad] = 0;
+	memmove(p+2+pad, buf, len);
+	mp = betomp(p, n, nil);
+	free(p);
+	return mp;
+}
+
+int
+pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype)
+{
+	uchar *p = buf + 1, *e = buf + len;
+
+	if(len < 1 || len != (mpsignif(modulus)-1)/8 || buf[0] != blocktype)
+		return -1;
+	switch(blocktype){
+	default:
+	case 1:
+		while(p < e && *p == 0xFF)
+			p++;
+		break;
+	case 2:
+		while(p < e && *p != 0x00)
+			p++;
+		break;
+	}
+	if(p - buf <= 8 || p >= e || *p++ != 0x00)
+		return -1;
+	memmove(buf, p, len = e - p);
+	return len;
+}
+
+static char Ebadsig[] = "bad signature";
+
+char*
+X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk)
+{
+	mpint *x, *y;
+	DigestAlg **dp;
 	Bytes *digest;
-	uchar *pkcs1buf, *buf;
-	int buflen;
-	mpint *pkcs1;
-	int nlen;
+	uchar *buf;
+	int len;
 	char *err;
 
-	err = nil;
-	pkcs1buf = nil;
+	x = betomp(sig, siglen, nil);
+	y = rsaencrypt(pk, x, nil);
+	mpfree(x);
+	len = mptobe(y, nil, 0, &buf);
+	mpfree(y);	
+
+	err = Ebadsig;
+	len = pkcs1unpadbuf(buf, len, pk->n, 1);
+	if(len == edigestlen && tsmemcmp(buf, edigest, edigestlen) == 0)
+		err = nil;
+	for(dp = digestalg; err != nil && *dp != nil; dp++){
+		if((*dp)->len != edigestlen)
+			continue;
+		digest = encode_digest(*dp, edigest);
+		if(digest->len == len && tsmemcmp(digest->data, buf, len) == 0)
+			err = nil;
+		freebytes(digest);
+	}
+	free(buf);
+	return err;
+}
 
-	/* one less than the byte length of the modulus */
-	nlen = (mpsignif(pk->n)-1)/8;
+char*
+X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub)
+{
+	Elem e;
+	Elist *el;
+	mpint *r, *s;
+	char *err;
 
-	/* see 9.2.1 of rfc2437 */
-	pkcs1 = betomp(signature->data, signature->len, nil);
-	mpexp(pkcs1, pk->ek, pk->n, pkcs1);
-	buflen = mptobe(pkcs1, nil, 0, &pkcs1buf);
-	buf = pkcs1buf;
-	if(buflen != nlen || buf[0] != 1) {
-		err = "expected 1";
+	r = s = nil;
+	err = Ebadsig;
+	if(decode(sig, siglen, &e) != ASN_OK)
 		goto end;
-	}
-	buf++;
-	while(buf[0] == 0xff)
-		buf++;
-	if(buf[0] != 0) {
-		err = "expected 0";
+	if(!is_seq(&e, &el) || elistlen(el) != 2)
 		goto end;
-	}
-	buf++;
-	buflen -= buf-pkcs1buf;
-	if(decode(buf, buflen, &e) != ASN_OK || !is_seq(&e, &el) || elistlen(el) != 2 ||
-			!is_octetstring(&el->tl->hd, &digest)) {
-		err = "signature parse error";
+	r = asn1mpint(&el->hd);
+	if(r == nil)
 		goto end;
-	}
-	*psigalg = &el->hd;
-	if(memcmp(digest->data, edigest, digest->len) == 0)
+	el = el->tl;
+	s = asn1mpint(&el->hd);
+	if(s == nil)
 		goto end;
-	err = "digests did not match";
-
+	if(ecdsaverify(dom, pub, edigest, edigestlen, r, s))
+		err = nil;
 end:
-	if(pkcs1 != nil)
-		mpfree(pkcs1);
-	if(pkcs1buf != nil)
-		free(pkcs1buf);
+	freevalfields(&e.val);
+	mpfree(s);
+	mpfree(r);
 	return err;
 }
 
-RSApub*
-X509toRSApub(uchar *cert, int ncert, char *name, int nname)
+static void
+copysubject(char *name, int nname, char *subject)
 {
 	char *e;
-	Bytes *b;
+
+	if(name == nil)
+		return;
+	memset(name, 0, nname);
+	if(subject == nil)
+		return;
+	strncpy(name, subject, nname-1);
+	e = strchr(name, ',');
+	if(e != nil)
+		*e = 0;	/* take just CN part of Distinguished Name */
+}
+
+ECpub*
+X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom)
+{
 	CertX509 *c;
-	RSApub *pk;
+	ECpub *pub;
 
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil)
 		return nil;
-	if(name != nil && c->subject != nil){
-		e = strchr(c->subject, ',');
-		if(e != nil)
-			*e = 0;	/* take just CN part of Distinguished Name */
-		strncpy(name, c->subject, nname);
+	copysubject(name, nname, c->subject);
+	appendaltnames(name, nname, c->ext, 0);
+	pub = nil;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		ecdominit(dom, namedcurves[c->curve]);
+		pub = ecdecodepub(dom, c->publickey->data, c->publickey->len);
+		if(pub == nil)
+			ecdomfree(dom);
 	}
-	pk = decode_rsapubkey(c->publickey);
 	freecert(c);
-	return pk;
+	return pub;
 }
 
-int
-getalgo(Elem *e)
+char*
+X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pk)
 {
-	Value *v;
-	Elist *el;
-	int a;
+	char *e;
+	CertX509 *c;
+	int digestlen;
+	uchar digest[MAXdlen];
 
-	if((a = parse_alg(e)) >= 0)
-		return a;
-	v = &e->val;
-	if(v->tag == VSeq){
-		print("Seq\n");
-		for(el = v->u.seqval; el!=nil; el = el->tl){
-			if((a = getalgo(&el->hd)) >= 0)
-				return a;
-		}
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return "cannot decode cert";
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
 	}
-	return -1;
+	e = X509ecdsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, dom, pk);
+	freecert(c);
+	return e;
 }
 
-static void edump(Elem e);
-
 RSApub*
-asn1toRSApub(uchar *der, int nder)
+X509toRSApub(uchar *cert, int ncert, char *name, int nname)
 {
-	Elem e;
-	Elist *el, *l;
-	int n;
-	Bits *b;
-	RSApub *key;
-	mpint *mp;
+	CertX509 *c;
+	RSApub *pub;
 
-	if(decode(der, nder, &e) != ASN_OK){
-		print("didn't parse\n");
-		return nil;
-	}
-	if(!is_seq(&e, &el)){
-		print("no seq");
-		return nil;
-	}
-	if((n = elistlen(el)) != 2){
-		print("bad length %d\n", n);
-		return nil;
-	}
-	if((n = getalgo(&el->hd)) < 0){
-		print("no algo\n");
-		return nil;
-	}
-	if(n != 0){
-		print("cant do algorithm %d\n", n);
-		return nil;
-	}
-	if(!is_bitstring(&el->tl->hd, &b)){
-		print("no bits\n");
-		return nil;
-	}
-	if(decode(b->data, b->len, &e) != ASN_OK){
-		print("no second decode\n");
-		return nil;
-	}
-	if(!is_seq(&e, &el)){
-		print("no second seq\n");
-		return nil;
-	}
-	if(elistlen(el) != 2){
-		print("no second length\n");
+	c = decode_cert(cert, ncert);
+	if(c == nil)
 		return nil;
-	}
-	key = rsapuballoc();
-
-	l = el;
-
-	key->n = mp = asn1mpint(&el->hd);
-	if(mp == nil)
-		goto errret;
-
-	el = el->tl;
-	key->ek = mp = asn1mpint(&el->hd);
-	if(mp == nil)
-		goto errret;
-
-	if(l != nil)
-		freeelist(l);
-	return key;
-errret:
-	if(l != nil)
-		freeelist(l);
-	rsapubfree(key);
-	return nil;
+	copysubject(name, nname, c->subject);
+	appendaltnames(name, nname, c->ext, 0);
+	pub = nil;
+	if(c->publickey_alg == ALG_rsaEncryption)
+		pub = asn1toRSApub(c->publickey->data, c->publickey->len);
+	freecert(c);
+	return pub;
 }
 
 char*
-X509verify(uchar *cert, int ncert, RSApub *pk)
+X509rsaverify(uchar *cert, int ncert, RSApub *pk)
 {
 	char *e;
-	Bytes *b;
 	CertX509 *c;
-	uchar digest[SHA1dlen];
-	Elem *sigalg;
+	int digestlen;
+	uchar digest[MAXdlen];
 
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	if(c != nil)
-		digest_certinfo(b, digestalg[c->signature_alg], digest);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil)
 		return "cannot decode cert";
-	e = verify_signature(c->signature, pk, digest, &sigalg);
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
+	}
+	e = X509rsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, pk);
 	freecert(c);
 	return e;
 }
@@ -2353,25 +2446,50 @@
 mkbigint(mpint *p)
 {
 	Elem e;
-	uchar *buf;
-	int buflen;
 
 	e.tag.class = Universal;
 	e.tag.num = INTEGER;
 	e.val.tag = VBigInt;
-	buflen = mptobe(p, nil, 0, &buf);
-	e.val.u.bigintval = makebytes(buf, buflen);
-	free(buf);
+	e.val.u.bigintval = newbytes((mpsignif(p)+8)/8);
+	if(p->sign < 0){
+		mpint *s = mpnew(e.val.u.bigintval->len*8+1);
+		mpleft(mpone, e.val.u.bigintval->len*8, s);
+		mpadd(p, s, s);
+		mptober(s, e.val.u.bigintval->data, e.val.u.bigintval->len);
+		mpfree(s);
+	} else {
+		mptober(p, e.val.u.bigintval->data, e.val.u.bigintval->len);
+	}
 	return e;
 }
 
+static int
+printable(char *s)
+{
+	int c;
+
+	while((c = (uchar)*s++) != 0){
+		if((c >= 'a' && c <= 'z')
+		|| (c >= 'A' && c <= 'Z')
+		|| (c >= '0' && c <= '9')
+		|| strchr("'=()+,-./:? ", c) != nil)
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+#define DirectoryString 0
+
 static Elem
-mkstring(char *s)
+mkstring(char *s, int t)
 {
 	Elem e;
 
+	if(t == DirectoryString)
+		t = printable(s) ? PrintableString : UTF8String;
 	e.tag.class = Universal;
-	e.tag.num = IA5String;
+	e.tag.num = t;
 	e.val.tag = VString;
 	e.val.u.stringval = estrdup(s);
 	return e;
@@ -2411,7 +2529,7 @@
 	e.tag.class = Universal;
 	e.tag.num = UTCTime;
 	e.val.tag = VString;
-	snprint(utc, 50, "%.2d%.2d%.2d%.2d%.2d%.2dZ",
+	snprint(utc, sizeof(utc), "%.2d%.2d%.2d%.2d%.2d%.2dZ",
 		tm->year % 100, tm->mon+1, tm->mday, tm->hour, tm->min, tm->sec);
 	e.val.u.stringval = estrdup(utc);
 	return e;
@@ -2460,24 +2578,26 @@
 }
 
 typedef struct Ints7pref {
-	int		len;
-	int		data[7];
+	int	len;
+	int	data[7];
 	char	prefix[4];
+	int	stype;
 } Ints7pref;
 Ints7pref DN_oid[] = {
-	{4, 2, 5, 4, 6, 0, 0, 0,  "C="},
-	{4, 2, 5, 4, 8, 0, 0, 0,  "ST="},
-	{4, 2, 5, 4, 7, 0, 0, 0,  "L="},
-	{4, 2, 5, 4, 10, 0, 0, 0, "O="},
-	{4, 2, 5, 4, 11, 0, 0, 0, "OU="},
-	{4, 2, 5, 4, 3, 0, 0, 0,  "CN="},
- 	{7, 1,2,840,113549,1,9,1, "E="},
+	{4, 2, 5, 4, 6, 0, 0, 0,        "C=", PrintableString},
+	{4, 2, 5, 4, 8, 0, 0, 0,        "ST=",DirectoryString},
+	{4, 2, 5, 4, 7, 0, 0, 0,        "L=", DirectoryString},
+	{4, 2, 5, 4, 10, 0, 0, 0,       "O=", DirectoryString},
+	{4, 2, 5, 4, 11, 0, 0, 0,       "OU=",DirectoryString},
+	{4, 2, 5, 4, 3, 0, 0, 0,        "CN=",DirectoryString},
+	{7, 1,2,840,113549,1,9,1,       "E=", IA5String},
+	{7, 0,9,2342,19200300,100,1,25,	"DC=",IA5String},
 };
 
 static Elem
 mkname(Ints7pref *oid, char *subj)
 {
-	return mkset(mkel(mkseq(mkel(mkoid((Ints*)oid), mkel(mkstring(subj), nil))), nil));
+	return mkset(mkel(mkseq(mkel(mkoid((Ints*)oid), mkel(mkstring(subj, oid->stype), nil))), nil));
 }
 
 static Elem
@@ -2501,167 +2621,519 @@
 	return mkseq(el);
 }
 
-uchar*
-RSApubtoasn1(RSApub *pub, int *keylen)
+/*
+ * DigestInfo ::= SEQUENCE {
+ *	digestAlgorithm AlgorithmIdentifier,
+ *	digest OCTET STRING }
+ */
+static Bytes*
+encode_digest(DigestAlg *da, uchar *digest)
 {
-	Elem pubkey;
-	Bytes *pkbytes;
-	uchar *key;
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkalg(da->alg),
+		mkel(mkoctet(digest, da->len),
+		nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
 
-	key = nil;
-	pubkey = mkseq(mkel(mkbigint(pub->n),mkel(mkint(mptoi(pub->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
+int
+asn1encodedigest(DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest, uchar *buf, int len)
+{
+	Bytes *bytes;
+	DigestAlg **dp;
+
+	for(dp = digestalg; *dp != nil; dp++){
+		if((*dp)->fun != fun)
+			continue;
+		bytes = encode_digest(*dp, digest);
+		if(bytes == nil)
+			break;
+		if(bytes->len > len){
+			freebytes(bytes);
+			break;
+		}
+		len = bytes->len;
+		memmove(buf, bytes->data, len);
+		freebytes(bytes);
+		return len;
+	}
+	return -1;
+}
+
+static Elem
+mkcont(int num, Elist *l)
+{
+	Elem e = mkseq(l);
+	e.tag.class = Context;
+	e.tag.num = num;
+	return e;
+}
+
+static Elem
+mkaltname(char *s)
+{
+	Elem e;
+	int i;
+
+	for(i=0; i<nelem(DN_oid); i++){
+		if(strstr(s, DN_oid[i].prefix) != nil)
+			return mkcont(4, mkel(mkDN(s), nil)); /* DN */
+	}
+	e = mkstring(s, IA5String);
+	e.tag.class = Context;
+	e.tag.num = strchr(s, '@') != nil ? 1 : 2; /* email : DNS */
+	return e;
+}
+
+static Elist*
+mkaltnames(char *alts)
+{
+	Elist *el;
+	char *s, *p;
+
+	if(alts == nil)
+		return nil;
+
+	el = nil;
+	alts = estrdup(alts);
+	for(s = alts; s != nil; s = p){
+		while(*s == ' ')
+			s++;
+		if(*s == '\0')
+			break;
+		if((p = strchr(s, ',')) != nil)
+			*p++ = 0;
+		el = mkel(mkaltname(s), el);
+	}
+	free(alts);
+	return el;
+}
+
+static Elist*
+mkextel(Elem e, Ints *oid, Elist *el)
+{
+	Bytes *b = nil;
+
+	if(encode(e, &b) == ASN_OK){
+		el = mkel(mkseq(
+			mkel(mkoid(oid),
+			mkel(mkoctet(b->data, b->len),
+			nil))), el);
+		freebytes(b);
+	}
+	freevalfields(&e.val);
+	return el;
+}
+
+static Ints15 oid_subjectAltName = {4, 2, 5, 29, 17 };
+static Ints15 oid_extensionRequest = { 7, 1, 2, 840, 113549, 1, 9, 14};
+
+static Elist*
+mkextensions(char *alts, int isreq)
+{
+	Elist *sl, *xl;
+
+	xl = nil;
+	if((sl = mkaltnames(alts)) != nil)
+		xl = mkextel(mkseq(sl), (Ints*)&oid_subjectAltName, xl);
+	if(xl != nil){
+		xl = mkel(mkseq(xl), nil);
+		if(isreq)
+			xl = mkel(mkseq(
+				mkel(mkoid((Ints*)&oid_extensionRequest),
+				mkel(mkset(xl), nil))), nil);
+	}
+	if(isreq)
+		xl = mkel(mkcont(0, xl), nil);
+	else if(xl != nil)
+		xl = mkel(mkcont(3, xl), nil);
+	return xl;
+}
+
+static char*
+splitalts(char *s)
+{
+	int q;
+
+	for(q = 0; *s != '\0'; s++){
+		if(*s == '\'')
+			q ^= 1;
+		else if(q == 0 && *s == ','){
+			*s++ = 0;
+			return s;
+		}
+	}
+	return nil;
+}
+
+static void
+appendaltnames(char *name, int nname, Bytes *ext, int isreq)
+{
+	Elem eext, ealt, edn;
+	Elist *el, *l;
+	Ints *oid;
+	char *alt, *e;
+	int len;
+
+	if(name == nil || ext == nil)
+		return;
+	if(decode(ext->data, ext->len, &eext) != ASN_OK)
+		return;
+	if(isreq){
+		if(!is_seq(&eext, &el) || elistlen(el) != 2)
+			goto errext;
+		if(!is_oid(&el->hd, &oid) || !ints_eq(oid, (Ints*)&oid_extensionRequest))
+			goto errext;
+		el = el->tl;
+		if(!is_set(&el->hd, &el))
+			goto errext;
+		if(!is_seq(&el->hd, &el))
+			goto errext;
+	} else {
+		if(!is_seq(&eext, &el))
+			goto errext;
+	}
+	for(; el != nil; el = el->tl){
+		if(!is_seq(&el->hd, &l) || elistlen(l) != 2)
+			goto errext;
+		if(!is_oid(&l->hd, &oid) || !ints_eq(oid, (Ints*)&oid_subjectAltName))
+			continue;
+		el = l->tl;
+		break;
+	}
+	if(el == nil)
+		goto errext;
+	if(!is_octetstring(&el->hd, &ext))
+		goto errext;
+	if(decode(ext->data, ext->len, &ealt) != ASN_OK)
+		goto errext;
+	if(!is_seq(&ealt, &el))
+		goto erralt;
+	for(; el != nil; el = el->tl){
+		ext = el->hd.val.u.octetsval;
+		switch(el->hd.tag.num){
+		default:
+			continue;
+		case 1:	/* email */
+		case 2:	/* DNS */
+			if(el->hd.val.tag != VOctets)
+				goto erralt;
+			alt = smprint("%.*s", ext->len, (char*)ext->data);
+			break;
+		case 4:	/* DN */
+			if(el->hd.val.tag != VOctets
+			|| decode(ext->data, ext->len, &edn) != ASN_OK)
+				goto erralt;
+			alt = parse_name(&edn);
+			freevalfields(&edn.val);
+			break;
+		}
+		if(alt == nil)
+			goto erralt;
+		/* take just CN part of Distinguished Name */
+		if((e = strchr(alt, ',')) != nil)
+			*e = '\0';
+		len = strlen(alt);
+		if(strncmp(name, alt, len) == 0 && strchr(",", name[len]) != nil){
+			free(alt);	/* same as the subject */
+			continue;
+		}
+		if(name[0] != '\0')
+			strncat(name, ", ", nname-1);
+		strncat(name, alt, nname-1);
+		free(alt);
+	}
+erralt:
+	freevalfields(&ealt.val);
+errext:
+	freevalfields(&eext.val);
+}
+	
+static Bytes*
+encode_rsapubkey(RSApub *pk)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
 		nil)));
-	freebytes(pkbytes);
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	if(keylen)
-		*keylen = pkbytes->len;
-	key = malloc(pkbytes->len);
-	memmove(key, pkbytes->data, pkbytes->len);
-	free(pkbytes);
-errret:
-	freevalfields(&pubkey.val);
-	return key;
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+static Bytes*
+encode_rsaprivkey(RSApriv *k)
+{
+	Bytes *b = nil;
+	RSApub *pk = &k->pub;
+	Elem e = mkseq(
+		mkel(mkint(0),
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
+		mkel(mkbigint(k->dk),
+		mkel(mkbigint(k->p),
+		mkel(mkbigint(k->q),
+		mkel(mkbigint(k->kp),
+		mkel(mkbigint(k->kq),
+		mkel(mkbigint(k->c2),
+		nil))))))))));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+int
+asn1encodeRSApub(RSApub *pk, uchar *buf, int len)
+{
+	Bytes *b = encode_rsapubkey(pk);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
+}
+
+int
+asn1encodeRSApriv(RSApriv *k, uchar *buf, int len)
+{
+	Bytes *b;
+	b = encode_rsaprivkey(k);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
 }
 
 uchar*
-X509gen(RSApriv *priv, char *subj, ulong valid[2], int *certlen)
+X509rsagen(RSApriv *priv, char *subj, ulong valid[2], int *certlen)
 {
-	int serial = 0;
+	int serial = 0, sigalg = ALG_sha256WithRSAEncryption;
 	uchar *cert = nil;
-	RSApub *pk = rsaprivtopub(priv);
 	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
-	Elem e, certinfo, issuer, subject, pubkey, validity, sig;
-	uchar digest[SHA2_256dlen], *buf;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
 	int buflen;
 	mpint *pkcs1;
+	char *alts;
 
-	e.val.tag = VInt;  /* so freevalfields at errret is no-op */
-	issuer = mkDN(subj);
-	subject = mkDN(subj);
-	pubkey = mkseq(mkel(mkbigint(pk->n),mkel(mkint(mptoi(pk->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
-		nil)));
-	freebytes(pkbytes);
-	validity = mkseq(
-		mkel(mkutc(valid[0]),
-		mkel(mkutc(valid[1]),
-		nil)));
-	certinfo = mkseq(
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
+		mkel(mkcont(0, mkel(mkint(2), nil)),
 		mkel(mkint(serial),
-		mkel(mkalg(ALG_sha256WithRSAEncryption),
-		mkel(issuer,
-		mkel(validity,
-		mkel(subject,
-		mkel(pubkey,
-		nil)))))));
-	if(encode(certinfo, &certinfobytes) != ASN_OK)
+		mkel(mkalg(sigalg),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkutc(valid[0]),
+			mkel(mkutc(valid[1]),
+			nil))),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 0)))))))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
 		goto errret;
-	sha2_256(certinfobytes->data, certinfobytes->len, digest, 0);
+
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
 	freebytes(certinfobytes);
-	sig = mkseq(
-		mkel(mkalg(ALG_sha256),
-		mkel(mkoctet(digest, SHA2_256dlen),
-		nil)));
-	if(encode(sig, &sigbytes) != ASN_OK)
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
 		goto errret;
-	pkcs1 = pkcs1pad(sigbytes, pk->n);
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
 	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
 	rsadecrypt(priv, pkcs1, pkcs1);
 	buflen = mptobe(pkcs1, nil, 0, &buf);
 	mpfree(pkcs1);
 	e = mkseq(
 		mkel(certinfo,
-		mkel(mkalg(ALG_sha256WithRSAEncryption),
+		mkel(mkalg(sigalg),
 		mkel(mkbits(buf, buflen),
 		nil))));
 	free(buf);
 	if(encode(e, &certbytes) != ASN_OK)
 		goto errret;
-	if(certlen)
+	if(certlen != nil)
 		*certlen = certbytes->len;
-	cert = certbytes->data;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
 errret:
 	freevalfields(&e.val);
+	free(subj);
 	return cert;
 }
 
 uchar*
-X509req(RSApriv *priv, char *subj, int *certlen)
+X509rsareq(RSApriv *priv, char *subj, int *certlen)
 {
 	/* RFC 2314, PKCS #10 Certification Request Syntax */
-	int version = 0;
+	int version = 0, sigalg = ALG_sha256WithRSAEncryption;
 	uchar *cert = nil;
-	RSApub *pk = rsaprivtopub(priv);
 	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
-	Elem e, certinfo, subject, pubkey, sig;
-	uchar digest[SHA2_256dlen], *buf;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
 	int buflen;
 	mpint *pkcs1;
+	char *alts;
 
-	e.val.tag = VInt;  /* so freevalfields at errret is no-op */
-	subject = mkDN(subj);
-	pubkey = mkseq(mkel(mkbigint(pk->n),mkel(mkint(mptoi(pk->ek)),nil)));
-	if(encode(pubkey, &pkbytes) != ASN_OK)
-		goto errret;
-	freevalfields(&pubkey.val);
-	pubkey = mkseq(
-		mkel(mkalg(ALG_rsaEncryption),
-		mkel(mkbits(pkbytes->data, pkbytes->len),
-		nil)));
-	freebytes(pkbytes);
-	certinfo = mkseq(
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
 		mkel(mkint(version),
-		mkel(subject,
-		mkel(pubkey,
-		nil))));
-	if(encode(certinfo, &certinfobytes) != ASN_OK)
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 1)))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
 		goto errret;
-	sha2_256(certinfobytes->data, certinfobytes->len, digest, 0);
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
 	freebytes(certinfobytes);
-	sig = mkseq(
-		mkel(mkalg(ALG_sha256),
-		mkel(mkoctet(digest, SHA2_256dlen),
-		nil)));
-	if(encode(sig, &sigbytes) != ASN_OK)
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
 		goto errret;
-	pkcs1 = pkcs1pad(sigbytes, pk->n);
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
 	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
 	rsadecrypt(priv, pkcs1, pkcs1);
 	buflen = mptobe(pkcs1, nil, 0, &buf);
 	mpfree(pkcs1);
 	e = mkseq(
 		mkel(certinfo,
-		mkel(mkalg(ALG_sha256),
+		mkel(mkalg(sigalg),
 		mkel(mkbits(buf, buflen),
 		nil))));
 	free(buf);
 	if(encode(e, &certbytes) != ASN_OK)
 		goto errret;
-	if(certlen)
+	if(certlen != nil)
 		*certlen = certbytes->len;
-	cert = certbytes->data;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
 errret:
 	freevalfields(&e.val);
+	free(subj);
 	return cert;
 }
 
+RSApub*
+X509reqtoRSApub(uchar *req, int nreq, char *name, int nname)
+{
+	Elem ereq;
+	Elist *el;
+	char *subject;
+	Bits *bits;
+	RSApub *pub;
+
+	pub = nil;
+	if(decode(req, nreq, &ereq) != ASN_OK)
+		goto errret;
+	if(!is_seq(&ereq, &el) || elistlen(el) != 3)
+		goto errret;
+	if(!is_seq(&el->hd, &el) || elistlen(el) < 3)
+		goto errret;
+ 	el = el->tl;
+	subject = parse_name(&el->hd);
+	if(subject == nil)
+		goto errret;
+	copysubject(name, nname, subject);
+	free(subject);
+	el = el->tl;
+	if(el->tl != nil
+	&& el->tl->hd.tag.class == Context
+	&& el->tl->hd.tag.num == 0
+	&& el->tl->hd.val.tag == VOctets)
+		appendaltnames(name, nname, el->tl->hd.val.u.octetsval, 1);
+	if(!is_seq(&el->hd, &el) || elistlen(el) != 2)
+		goto errret;
+	if(parse_alg(&el->hd) != ALG_rsaEncryption)
+		goto errret;
+	el = el->tl;
+	if(!is_bitstring(&el->hd, &bits))
+		goto errret;
+	pub = asn1toRSApub(bits->data, bits->len);
+	if(pub == nil)
+		goto errret;
+errret:
+	freevalfields(&ereq.val);
+	return pub;
+}
+
+static void
+digestSPKI(int alg, uchar *pubkey, int npubkey, DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(mkel(mkalg(alg), mkel(mkbits(pubkey, npubkey), nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	(*fun)(b->data, b->len, digest, nil);
+	freebytes(b);
+}
+
+int
+X509digestSPKI(uchar *cert, int ncert, DigestState* (*fun)(uchar*, ulong, uchar*, DigestState*), uchar *digest)
+{
+	CertX509 *c;
+
+	c = decode_cert(cert, ncert);
+	if(c == nil){
+		werrstr("cannot decode cert");
+		return -1;
+	}
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, fun, digest);
+	freecert(c);
+	return 0;
+}
+
 static char*
 tagdump(Tag tag)
 {
-	if(tag.class != Universal)
-		return smprint("class%d,num%d", tag.class, tag.num);
+	static char buf[32];
+
+	if(tag.class != Universal){
+		snprint(buf, sizeof(buf), "class%d,num%d", tag.class, tag.num);
+		return buf;
+	}
 	switch(tag.num){
 	case BOOLEAN: return "BOOLEAN";
 	case INTEGER: return "INTEGER";
@@ -2690,7 +3162,8 @@
 	case UniversalString: return "UniversalString";
 	case BMPString: return "BMPString";
 	default:
-		return smprint("Universal,num%d", tag.num);
+		snprint(buf, sizeof(buf), "Universal,num%d", tag.num);
+		return buf;
 	}
 }
 
@@ -2710,10 +3183,7 @@
 	case VBigInt: print("BigInt[%d] %.2x%.2x...",v.u.bigintval->len,v.u.bigintval->data[0],v.u.bigintval->data[1]); break;
 	case VReal: print("Real..."); break;
 	case VOther: print("Other..."); break;
-	case VBitString: print("BitString");
-		for(i = 0; i<v.u.bitstringval->len; i++)
-			print(" %02x", v.u.bitstringval->data[i]);
-		break;
+	case VBitString: print("BitString[%d]...", v.u.bitstringval->len*8 - v.u.bitstringval->unusedbits); break;
 	case VNull: print("Null"); break;
 	case VEOC: print("EOC..."); break;
 	case VObjId: print("ObjId");
@@ -2749,20 +3219,24 @@
 X509dump(uchar *cert, int ncert)
 {
 	char *e;
-	Bytes *b;
 	CertX509 *c;
-	RSApub *pk;
-	uchar digest[SHA1dlen];
-	Elem *sigalg;
+	RSApub *rsapub;
+	ECpub *ecpub;
+	ECdomain ecdom;
+	int digestlen;
+	uchar digest[MAXdlen];
 
 	print("begin X509dump\n");
-	b = makebytes(cert, ncert);
-	c = decode_cert(b);
-	if(c != nil)
-		digest_certinfo(b, digestalg[c->signature_alg], digest);
-	freebytes(b);
+	c = decode_cert(cert, ncert);
 	if(c == nil){
-		print("cannot decode cert");
+		print("cannot decode cert\n");
+		return;
+	}
+
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		print("cannot decode certinfo\n");
 		return;
 	}
 
@@ -2770,20 +3244,47 @@
 	print("issuer %s\n", c->issuer);
 	print("validity %s %s\n", c->validity_start, c->validity_end);
 	print("subject %s\n", c->subject);
-	pk = decode_rsapubkey(c->publickey);
-	print("pubkey e=%B n(%d)=%B\n", pk->ek, mpsignif(pk->n), pk->n);
-
-	print("sigalg=%d digest=%.*H\n", c->signature_alg, SHA2_256dlen, digest);
-	e = verify_signature(c->signature, pk, digest, &sigalg);
-	if(e==nil){
-		e = "nil (meaning ok)";
-		print("sigalg=\n");
-		if(sigalg)
-			edump(*sigalg);
+	print("sigalg=%d digest=%.*H\n", c->signature_alg, digestlen, digest);
+	print("publickey_alg=%d pubkey[%d] %.*H\n", c->publickey_alg, c->publickey->len,
+		c->publickey->len, c->publickey->data);
+
+	switch(c->publickey_alg){
+	case ALG_rsaEncryption:
+		rsapub = asn1toRSApub(c->publickey->data, c->publickey->len);
+		if(rsapub != nil){
+			print("rsa pubkey e=%B n(%d)=%B\n", rsapub->ek, mpsignif(rsapub->n), rsapub->n);
+			e = X509rsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, rsapub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509rsaverifydigest returns: %s\n", e);
+			rsapubfree(rsapub);
+		}
+		break;
+	case ALG_ecPublicKey:
+		ecdominit(&ecdom, namedcurves[c->curve]);
+		ecpub = ecdecodepub(&ecdom, c->publickey->data, c->publickey->len);
+		if(ecpub != nil){
+			e = X509ecdsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, &ecdom, ecpub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509ecdsaverifydigest returns: %s\n", e);
+			ecpubfree(ecpub);
+		}
+		ecdomfree(&ecdom);
+		break;
 	}
-	print("self-signed verify_signature returns: %s\n", e);
 
-	rsapubfree(pk);
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, sha2_256, digest);
+	print("publickey_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha2_256(cert, ncert, digest, nil);
+	print("cert_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha1(cert, ncert, digest, nil);
+	print("cert_thumbprint sha1=%.*H\n", SHA1dlen, digest);
+
 	freecert(c);
 	print("end X509dump\n");
 }