ref: b4d00e3be37d68ea5976a23ee15d1d35c8ffa89a
parent: a50be99567d1b6018a0a1f019a51c0bc7a0f11ff
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Jan 15 14:02:22 EST 2017
libsec: sync with 9front
--- a/include/libsec.h
+++ b/include/libsec.h
@@ -89,6 +89,7 @@
ChachaBsize= 64,
ChachaKeylen= 256/8,
ChachaIVlen= 96/8,
+ XChachaIVlen= 192/8,
};
typedef struct Chachastate Chachastate;
@@ -103,6 +104,7 @@
u32int iv[3];
};
};
+ u32int xkey[8];
int rounds;
int ivwords;
};
@@ -113,6 +115,8 @@
void chacha_encrypt(uchar*, ulong, Chachastate*);
void chacha_encrypt2(uchar*, uchar*, ulong, Chachastate*);
+void hchacha(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
+
void ccpoly_encrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
int ccpoly_decrypt(uchar *dat, ulong ndat, uchar *aad, ulong naad, uchar tag[16], Chachastate *cs);
@@ -131,7 +135,7 @@
struct Salsastate
{
u32int input[16];
- u32int key[8];
+ u32int xkey[8];
int rounds;
int ivwords;
};
@@ -142,6 +146,8 @@
void salsa_encrypt(uchar*, ulong, Salsastate*);
void salsa_encrypt2(uchar*, uchar*, ulong, Salsastate*);
+void salsa_core(u32int in[16], u32int out[16], int rounds);
+
void hsalsa(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds);
/*
@@ -485,7 +491,7 @@
int inf;
mpint *x;
mpint *y;
- mpint *z;
+ mpint *z; /* nil when using affine coordinates */
} ECpoint;
typedef ECpoint ECpub;
@@ -563,6 +569,11 @@
/* password-based key derivation function 2 (rfc2898) */
void pbkdf2_x(uchar *p, ulong plen, uchar *s, ulong slen, ulong rounds, uchar *d, ulong dlen,
DigestState* (*x)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*), int xlen);
+
+/* scrypt password-based key derivation function */
+char* scrypt(uchar *p, ulong plen, uchar *s, ulong slen,
+ ulong N, ulong R, ulong P,
+ uchar *d, ulong dlen);
/* hmac-based key derivation function (rfc5869) */
void hkdf_x(uchar *salt, ulong nsalt, uchar *info, ulong ninfo, uchar *key, ulong nkey, uchar *d, ulong dlen,
--- a/libsec/Makefile
+++ b/libsec/Makefile
@@ -58,6 +58,7 @@
rsagen.$O\
rsaprivtopub.$O\
salsa.$O\
+ scrypt.$O\
secp256k1.$O\
secp256r1.$O\
secp384r1.$O\
--- a/libsec/chacha.c
+++ b/libsec/chacha.c
@@ -15,13 +15,13 @@
};
/* little-endian data order */
-#define GET4(p) ((((((p)[3]<<8) | (p)[2])<<8) | (p)[1])<<8 | (p)[0])
-#define PUT4(p, v) (((p)[0]=v), (v>>=8), ((p)[1]=v), (v>>=8), ((p)[2]=v), (v>>=8), ((p)[3]=v))
+#define GET4(p) ((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define PUT4(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c))))
#define QUARTERROUND(ia,ib,ic,id) { \
- u32int a, b, c, d, t;\
+ u32int a, b, c, d, t; \
a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \
a += b; t = d^a; d = ROTATE(t,16); \
c += d; t = b^c; b = ROTATE(t,12); \
@@ -32,12 +32,9 @@
#define ENCRYPT(s, x, y, d) {\
u32int v; \
- uchar *sp, *dp; \
- sp = (s); \
- v = GET4(sp); \
+ v = GET4(s); \
v ^= (x)+(y); \
- dp = (d); \
- PUT4(dp, v); \
+ PUT4(d, v); \
}
static uchar sigma[16] = "expand 32-byte k";
@@ -57,7 +54,8 @@
{
if(keylen != 256/8 && keylen != 128/8)
sysfatal("invalid chacha key length");
- if(ivlen != 96/8 && ivlen != 64/8)
+ if(ivlen != 64/8 && ivlen != 96/8
+ && ivlen != 128/8 && ivlen != 192/8) /* hchacha, xchacha */
sysfatal("invalid chacha iv length");
if(rounds == 0)
rounds = 20;
@@ -70,7 +68,16 @@
load(&s->input[4], key, 4);
load(&s->input[8], key, 4);
}
- s->ivwords = ivlen/sizeof(u32int);
+ s->xkey[0] = s->input[4];
+ s->xkey[1] = s->input[5];
+ s->xkey[2] = s->input[6];
+ s->xkey[3] = s->input[7];
+ s->xkey[4] = s->input[8];
+ s->xkey[5] = s->input[9];
+ s->xkey[6] = s->input[10];
+ s->xkey[7] = s->input[11];
+
+ s->ivwords = ivlen/4;
s->input[12] = 0;
s->input[13] = 0;
if(iv == nil){
@@ -80,9 +87,88 @@
chacha_setiv(s, iv);
}
+static void
+dorounds(u32int x[Blockwords], int rounds)
+{
+ for(; rounds > 0; rounds -= 2) {
+ QUARTERROUND(0, 4, 8,12)
+ QUARTERROUND(1, 5, 9,13)
+ QUARTERROUND(2, 6,10,14)
+ QUARTERROUND(3, 7,11,15)
+
+ QUARTERROUND(0, 5,10,15)
+ QUARTERROUND(1, 6,11,12)
+ QUARTERROUND(2, 7, 8,13)
+ QUARTERROUND(3, 4, 9,14)
+ }
+}
+
+static void
+hchachablock(uchar h[32], Chachastate *s)
+{
+ u32int x[16];
+
+ x[0] = s->input[0];
+ x[1] = s->input[1];
+ x[2] = s->input[2];
+ x[3] = s->input[3];
+ x[4] = s->input[4];
+ x[5] = s->input[5];
+ x[6] = s->input[6];
+ x[7] = s->input[7];
+ x[8] = s->input[8];
+ x[9] = s->input[9];
+ x[10] = s->input[10];
+ x[11] = s->input[11];
+ x[12] = s->input[12];
+ x[13] = s->input[13];
+ x[14] = s->input[14];
+ x[15] = s->input[15];
+
+ dorounds(x, s->rounds);
+
+ PUT4(h+0*4, x[0]);
+ PUT4(h+1*4, x[1]);
+ PUT4(h+2*4, x[2]);
+ PUT4(h+3*4, x[3]);
+ PUT4(h+4*4, x[12]);
+ PUT4(h+5*4, x[13]);
+ PUT4(h+6*4, x[14]);
+ PUT4(h+7*4, x[15]);
+}
+
void
chacha_setiv(Chachastate *s, uchar *iv)
{
+ if(s->ivwords == 192/32){
+ /* xchacha with 192-bit iv */
+ u32int counter[2];
+ uchar h[32];
+
+ s->input[4] = s->xkey[0];
+ s->input[5] = s->xkey[1];
+ s->input[6] = s->xkey[2];
+ s->input[7] = s->xkey[3];
+ s->input[8] = s->xkey[4];
+ s->input[9] = s->xkey[5];
+ s->input[10] = s->xkey[6];
+ s->input[11] = s->xkey[7];
+
+ counter[0] = s->input[12];
+ counter[1] = s->input[13];
+
+ load(&s->input[12], iv, 4);
+
+ hchachablock(h, s);
+ load(&s->input[4], h, 8);
+ memset(h, 0, 32);
+
+ s->input[12] = counter[0];
+ s->input[13] = counter[1];
+
+ load(&s->input[14], iv+16, 2);
+ return;
+ }
load(&s->input[16 - s->ivwords], iv, s->ivwords);
}
@@ -90,7 +176,7 @@
chacha_setblock(Chachastate *s, u64int blockno)
{
s->input[12] = blockno;
- if(s->ivwords == 2)
+ if(s->ivwords != 3)
s->input[13] = blockno>>32;
}
@@ -98,9 +184,8 @@
encryptblock(Chachastate *s, uchar *src, uchar *dst)
{
u32int x[Blockwords];
- int i, rounds;
+ int i;
- rounds = s->rounds;
x[0] = s->input[0];
x[1] = s->input[1];
x[2] = s->input[2];
@@ -117,37 +202,8 @@
x[13] = s->input[13];
x[14] = s->input[14];
x[15] = s->input[15];
+ dorounds(x, s->rounds);
- for(i = rounds; i > 0; i -= 2) {
- QUARTERROUND(0, 4, 8,12)
- QUARTERROUND(1, 5, 9,13)
- QUARTERROUND(2, 6,10,14)
- QUARTERROUND(3, 7,11,15)
-
- QUARTERROUND(0, 5,10,15)
- QUARTERROUND(1, 6,11,12)
- QUARTERROUND(2, 7, 8,13)
- QUARTERROUND(3, 4, 9,14)
- }
-
-#ifdef FULL_UNROLL
- ENCRYPT(src+0*4, x[0], s->input[0], dst+0*4);
- ENCRYPT(src+1*4, x[1], s->input[1], dst+1*4);
- ENCRYPT(src+2*4, x[2], s->input[2], dst+2*4);
- ENCRYPT(src+3*4, x[3], s->input[3], dst+3*4);
- ENCRYPT(src+4*4, x[4], s->input[4], dst+4*4);
- ENCRYPT(src+5*4, x[5], s->input[5], dst+5*4);
- ENCRYPT(src+6*4, x[6], s->input[6], dst+6*4);
- ENCRYPT(src+7*4, x[7], s->input[7], dst+7*4);
- ENCRYPT(src+8*4, x[8], s->input[8], dst+8*4);
- ENCRYPT(src+9*4, x[9], s->input[9], dst+9*4);
- ENCRYPT(src+10*4, x[10], s->input[10], dst+10*4);
- ENCRYPT(src+11*4, x[11], s->input[11], dst+11*4);
- ENCRYPT(src+12*4, x[12], s->input[12], dst+12*4);
- ENCRYPT(src+13*4, x[13], s->input[13], dst+13*4);
- ENCRYPT(src+14*4, x[14], s->input[14], dst+14*4);
- ENCRYPT(src+15*4, x[15], s->input[15], dst+15*4);
-#else
for(i=0; i<nelem(x); i+=4){
ENCRYPT(src, x[i], s->input[i], dst);
ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
@@ -156,9 +212,8 @@
src += 16;
dst += 16;
}
-#endif
- if(++s->input[12] == 0 && s->ivwords == 2)
+ if(++s->input[12] == 0 && s->ivwords != 3)
s->input[13]++;
}
@@ -183,4 +238,14 @@
chacha_encrypt(uchar *buf, ulong bytes, Chachastate *s)
{
chacha_encrypt2(buf, buf, bytes, s);
+}
+
+void
+hchacha(uchar h[32], uchar *key, ulong keylen, uchar nonce[16], int rounds)
+{
+ Chachastate s[1];
+
+ setupChachastate(s, key, keylen, nonce, 16, rounds);
+ hchachablock(h, s);
+ memset(s, 0, sizeof(s));
}
--- a/libsec/salsa.c
+++ b/libsec/salsa.c
@@ -1,24 +1,17 @@
#include "os.h"
#include <libsec.h>
-enum{
- Blockwords= SalsaBsize/sizeof(u32int)
-};
-
/* little-endian data order */
-#define GET4(p) ((((((p)[3]<<8) | (p)[2])<<8) | (p)[1])<<8 | (p)[0])
-#define PUT4(p, v) (((p)[0]=v), (v>>=8), ((p)[1]=v), (v>>=8), ((p)[2]=v), (v>>=8), ((p)[3]=v))
+#define GET4(p) ((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define PUT4(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
#define ROTATE(v,c) (t = v, (u32int)(t << (c)) | (t >> (32 - (c))))
#define ENCRYPT(s, x, y, d) {\
u32int v; \
- uchar *sp, *dp; \
- sp = (s); \
- v = GET4(sp); \
+ v = GET4(s); \
v ^= (x)+(y); \
- dp = (d); \
- PUT4(dp, v); \
+ PUT4(d, v); \
}
static uchar sigma[16] = "expand 32-byte k";
@@ -38,7 +31,8 @@
{
if(keylen != 256/8 && keylen != 128/8)
sysfatal("invalid salsa key length");
- if(ivlen != 64/8 && ivlen != 128/8 && ivlen != 192/8)
+ if(ivlen != 64/8
+ && ivlen != 128/8 && ivlen != 192/8) /* hsalsa, xsalsa */
sysfatal("invalid salsa iv length");
if(rounds == 0)
rounds = 20;
@@ -58,14 +52,14 @@
load(&s->input[11], key, 4);
load(&s->input[15], tau +4*3, 1);
}
- s->key[0] = s->input[1];
- s->key[1] = s->input[2];
- s->key[2] = s->input[3];
- s->key[3] = s->input[4];
- s->key[4] = s->input[11];
- s->key[5] = s->input[12];
- s->key[6] = s->input[13];
- s->key[7] = s->input[14];
+ s->xkey[0] = s->input[1];
+ s->xkey[1] = s->input[2];
+ s->xkey[2] = s->input[3];
+ s->xkey[3] = s->input[4];
+ s->xkey[4] = s->input[11];
+ s->xkey[5] = s->input[12];
+ s->xkey[6] = s->input[13];
+ s->xkey[7] = s->input[14];
s->ivwords = ivlen/4;
s->input[8] = 0;
@@ -78,30 +72,11 @@
}
static void
-hsalsablock(uchar h[32], Salsastate *s)
+dorounds(u32int x[16], int rounds)
{
- u32int x[Blockwords], t;
- int i, rounds;
+ u32int t;
- rounds = s->rounds;
- x[0] = s->input[0];
- x[1] = s->input[1];
- x[2] = s->input[2];
- x[3] = s->input[3];
- x[4] = s->input[4];
- x[5] = s->input[5];
- x[6] = s->input[6];
- x[7] = s->input[7];
- x[8] = s->input[8];
- x[9] = s->input[9];
- x[10] = s->input[10];
- x[11] = s->input[11];
- x[12] = s->input[12];
- x[13] = s->input[13];
- x[14] = s->input[14];
- x[15] = s->input[15];
-
- for(i = rounds; i > 0; i -= 2) {
+ for(; rounds > 0; rounds -= 2) {
x[4] ^= ROTATE( x[0]+x[12], 7);
x[8] ^= ROTATE( x[4]+ x[0], 9);
x[12] ^= ROTATE( x[8]+ x[4],13);
@@ -135,7 +110,32 @@
x[14] ^= ROTATE(x[13]+x[12],13);
x[15] ^= ROTATE(x[14]+x[13],18);
}
+}
+static void
+hsalsablock(uchar h[32], Salsastate *s)
+{
+ u32int x[16];
+
+ x[0] = s->input[0];
+ x[1] = s->input[1];
+ x[2] = s->input[2];
+ x[3] = s->input[3];
+ x[4] = s->input[4];
+ x[5] = s->input[5];
+ x[6] = s->input[6];
+ x[7] = s->input[7];
+ x[8] = s->input[8];
+ x[9] = s->input[9];
+ x[10] = s->input[10];
+ x[11] = s->input[11];
+ x[12] = s->input[12];
+ x[13] = s->input[13];
+ x[14] = s->input[14];
+ x[15] = s->input[15];
+
+ dorounds(x, s->rounds);
+
PUT4(h+0*4, x[0]);
PUT4(h+1*4, x[5]);
PUT4(h+2*4, x[10]);
@@ -150,7 +150,7 @@
salsa_setiv(Salsastate *s, uchar *iv)
{
if(s->ivwords == 128/32){
- /* hsalsa 128-bit iv */
+ /* hsalsa with 128-bit iv */
load(&s->input[6], iv, 4);
return;
}
@@ -162,14 +162,14 @@
counter[0] = s->input[8];
counter[1] = s->input[9];
- s->input[1] = s->key[0];
- s->input[2] = s->key[1];
- s->input[3] = s->key[2];
- s->input[4] = s->key[3];
- s->input[11] = s->key[4];
- s->input[12] = s->key[5];
- s->input[13] = s->key[6];
- s->input[14] = s->key[7];
+ s->input[1] = s->xkey[0];
+ s->input[2] = s->xkey[1];
+ s->input[3] = s->xkey[2];
+ s->input[4] = s->xkey[3];
+ s->input[11] = s->xkey[4];
+ s->input[12] = s->xkey[5];
+ s->input[13] = s->xkey[6];
+ s->input[14] = s->xkey[7];
load(&s->input[6], iv, 4);
@@ -197,10 +197,9 @@
static void
encryptblock(Salsastate *s, uchar *src, uchar *dst)
{
- u32int x[Blockwords], t;
- int i, rounds;
+ u32int x[16];
+ int i;
- rounds = s->rounds;
x[0] = s->input[0];
x[1] = s->input[1];
x[2] = s->input[2];
@@ -218,59 +217,8 @@
x[14] = s->input[14];
x[15] = s->input[15];
- for(i = rounds; i > 0; i -= 2) {
- x[4] ^= ROTATE( x[0]+x[12], 7);
- x[8] ^= ROTATE( x[4]+ x[0], 9);
- x[12] ^= ROTATE( x[8]+ x[4],13);
- x[0] ^= ROTATE(x[12]+ x[8],18);
- x[9] ^= ROTATE( x[5]+ x[1], 7);
- x[13] ^= ROTATE( x[9]+ x[5], 9);
- x[1] ^= ROTATE(x[13]+ x[9],13);
- x[5] ^= ROTATE( x[1]+x[13],18);
- x[14] ^= ROTATE(x[10]+ x[6], 7);
- x[2] ^= ROTATE(x[14]+x[10], 9);
- x[6] ^= ROTATE( x[2]+x[14],13);
- x[10] ^= ROTATE( x[6]+ x[2],18);
- x[3] ^= ROTATE(x[15]+x[11], 7);
- x[7] ^= ROTATE( x[3]+x[15], 9);
- x[11] ^= ROTATE( x[7]+ x[3],13);
- x[15] ^= ROTATE(x[11]+ x[7],18);
- x[1] ^= ROTATE( x[0]+ x[3], 7);
- x[2] ^= ROTATE( x[1]+ x[0], 9);
- x[3] ^= ROTATE( x[2]+ x[1],13);
- x[0] ^= ROTATE( x[3]+ x[2],18);
- x[6] ^= ROTATE( x[5]+ x[4], 7);
- x[7] ^= ROTATE( x[6]+ x[5], 9);
- x[4] ^= ROTATE( x[7]+ x[6],13);
- x[5] ^= ROTATE( x[4]+ x[7],18);
- x[11] ^= ROTATE(x[10]+ x[9], 7);
- x[8] ^= ROTATE(x[11]+x[10], 9);
- x[9] ^= ROTATE( x[8]+x[11],13);
- x[10] ^= ROTATE( x[9]+ x[8],18);
- x[12] ^= ROTATE(x[15]+x[14], 7);
- x[13] ^= ROTATE(x[12]+x[15], 9);
- x[14] ^= ROTATE(x[13]+x[12],13);
- x[15] ^= ROTATE(x[14]+x[13],18);
- }
+ dorounds(x, s->rounds);
-#ifdef FULL_UNROLL
- ENCRYPT(src+0*4, x[0], s->input[0], dst+0*4);
- ENCRYPT(src+1*4, x[1], s->input[1], dst+1*4);
- ENCRYPT(src+2*4, x[2], s->input[2], dst+2*4);
- ENCRYPT(src+3*4, x[3], s->input[3], dst+3*4);
- ENCRYPT(src+4*4, x[4], s->input[4], dst+4*4);
- ENCRYPT(src+5*4, x[5], s->input[5], dst+5*4);
- ENCRYPT(src+6*4, x[6], s->input[6], dst+6*4);
- ENCRYPT(src+7*4, x[7], s->input[7], dst+7*4);
- ENCRYPT(src+8*4, x[8], s->input[8], dst+8*4);
- ENCRYPT(src+9*4, x[9], s->input[9], dst+9*4);
- ENCRYPT(src+10*4, x[10], s->input[10], dst+10*4);
- ENCRYPT(src+11*4, x[11], s->input[11], dst+11*4);
- ENCRYPT(src+12*4, x[12], s->input[12], dst+12*4);
- ENCRYPT(src+13*4, x[13], s->input[13], dst+13*4);
- ENCRYPT(src+14*4, x[14], s->input[14], dst+14*4);
- ENCRYPT(src+15*4, x[15], s->input[15], dst+15*4);
-#else
for(i=0; i<nelem(x); i+=4){
ENCRYPT(src, x[i], s->input[i], dst);
ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
@@ -279,7 +227,6 @@
src += 16;
dst += 16;
}
-#endif
if(++s->input[8] == 0)
s->input[9]++;
@@ -306,6 +253,48 @@
salsa_encrypt(uchar *buf, ulong bytes, Salsastate *s)
{
salsa_encrypt2(buf, buf, bytes, s);
+}
+
+void
+salsa_core(u32int in[16], u32int out[16], int rounds)
+{
+ u32int x[16];
+
+ x[0] = in[0];
+ x[1] = in[1];
+ x[2] = in[2];
+ x[3] = in[3];
+ x[4] = in[4];
+ x[5] = in[5];
+ x[6] = in[6];
+ x[7] = in[7];
+ x[8] = in[8];
+ x[9] = in[9];
+ x[10] = in[10];
+ x[11] = in[11];
+ x[12] = in[12];
+ x[13] = in[13];
+ x[14] = in[14];
+ x[15] = in[15];
+
+ dorounds(x, rounds);
+
+ out[0] = x[0] + in[0];
+ out[1] = x[1] + in[1];
+ out[2] = x[2] + in[2];
+ out[3] = x[3] + in[3];
+ out[4] = x[4] + in[4];
+ out[5] = x[5] + in[5];
+ out[6] = x[6] + in[6];
+ out[7] = x[7] + in[7];
+ out[8] = x[8] + in[8];
+ out[9] = x[9] + in[9];
+ out[10] = x[10] + in[10];
+ out[11] = x[11] + in[11];
+ out[12] = x[12] + in[12];
+ out[13] = x[13] + in[13];
+ out[14] = x[14] + in[14];
+ out[15] = x[15] + in[15];
}
void
--- /dev/null
+++ b/libsec/scrypt.c
@@ -1,0 +1,119 @@
+#include "os.h"
+#include <libsec.h>
+
+#define movw(w, S, D) memmove(D, S, (w)*4)
+
+static void
+xorw(ulong w, u32int *S, u32int *D)
+{
+ for(w /= 8; w; w--, D += 8, S += 8){
+ D[0] ^= S[0];
+ D[1] ^= S[1];
+ D[2] ^= S[2];
+ D[3] ^= S[3];
+ D[4] ^= S[4];
+ D[5] ^= S[5];
+ D[6] ^= S[6];
+ D[7] ^= S[7];
+ }
+}
+
+static void
+scryptBlockMix(ulong R, u32int *B, u32int *Y)
+{
+ u32int X[16];
+ ulong i;
+
+ R *= 2;
+ movw(16, &B[(R-1)*16], X);
+ for(i = 0; i < R; i += 2){
+ xorw(16, &B[i*16], X);
+ salsa_core(X, X, 8);
+ movw(16, X, &Y[i*8]);
+
+ xorw(16, &B[(i+1)*16], X);
+ salsa_core(X, X, 8);
+ movw(16, X, &Y[i*8 + R*8]);
+ }
+}
+
+static void
+scryptROMix(ulong R, ulong N, u32int *V, u32int *X, uchar *B)
+{
+ ulong w, i, d;
+ u32int *Y;
+
+ w = R*32;
+ for(i=0; i<w; i++, B+=4)
+ X[i] = B[0] | (B[1]<<8) | (B[2]<<16) | (B[3]<<24);
+
+ Y = &X[w];
+ for(i=0; i<N; i += 2){
+ movw(w, X, &V[i*w]);
+ scryptBlockMix(R, X, Y);
+
+ movw(w, Y, &V[(i+1)*w]);
+ scryptBlockMix(R, Y, X);
+ }
+ for(i=0; i<N; i += 2){
+ xorw(w, &V[(X[w-16] & (N-1))*w], X);
+ scryptBlockMix(R, X, Y);
+
+ xorw(w, &V[(Y[w-16] & (N-1))*w], Y);
+ scryptBlockMix(R, Y, X);
+ }
+
+ B -= w*4;
+ for(i=0; i<w; i++, B+=4)
+ d = X[i], B[0]=d, B[1]=d>>8, B[2]=d>>16, B[3]=d>>24;
+}
+
+char*
+scrypt(p, plen, s, slen, N, R, P, d, dlen)
+ ulong plen, slen, dlen, N, R, P;
+ uchar *p, *s, *d;
+{
+ static char oom[] = "out of memory";
+
+ ulong rb, i;
+ u32int *V, *X;
+ uchar *B;
+
+ if(P < 1)
+ return "invalid parallelization parameter P";
+ if(R < 1 || R >= (1UL<<(31-7))/P)
+ return "invalid block size parameter R";
+ if(N < 2 || (N & (N-1)) != 0 || N >= (1UL<<(31-7))/R)
+ return "invalid cpu/memory cost parameter N";
+
+ rb = R<<7;
+ if((B = malloc(P*rb)) == nil)
+ return oom;
+ if((V = malloc(N*rb)) == nil){
+ free(B);
+ return oom;
+ }
+ if((X = malloc(2*rb)) == nil){
+ free(V);
+ free(B);
+ return oom;
+ }
+
+ pbkdf2_x(p, plen, s, slen, 1, B, P*rb, hmac_sha2_256, SHA2_256dlen);
+
+ for(i=0; i<P; i++)
+ scryptROMix(R, N, V, X, &B[i*rb]);
+
+ memset(X, 0, 2*rb);
+ free(X);
+
+ memset(V, 0, N*rb);
+ free(V);
+
+ pbkdf2_x(p, plen, B, P*rb, 1, d, dlen, hmac_sha2_256, SHA2_256dlen);
+
+ memset(B, 0, P*rb);
+ free(B);
+
+ return nil;
+}