shithub: mc

Download patch

ref: a2644d1d980e0f1c5439dce7764f0d051cf4cc41
parent: 330b2e6a59402d3a290629f409d77c2692973f3c
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Jan 6 18:13:53 EST 2019

Add start of x25519 code.

	Not yet tested, so not enabled.

--- /dev/null
+++ b/lib/crypto/x25519.myr
@@ -1,0 +1,642 @@
+/* Copyright 2008, Google Inc.
+ * Translated to Myrddin by Ori Bernstein in 2018
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * curve25519: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+
+use std
+
+pkg crypto =
+	const fdiff : (out : felem[:], in : felem[:] -> void)
+;;
+
+type felem = uint64
+
+/* Sum two numbers: out += in */
+const fsum = {out, in
+	for var i = 0; i < 10; i += 2
+		out[0 + i] = out[0 + i] + in[0 + i]
+		out[1 + i] = out[1 + i] + in[1 + i]
+	;;
+}
+
+/* Find the difference of two numbers: out = in - out
+ * (note the order of the arguments!)
+ */
+const fdiff = {out, in
+	for var i = 0; i < 10; i++
+		out[i] = (in[i] - out[i])
+	;;
+}
+
+/* Multiply a number my a scalar: out = in * scalar */
+const fscalarproduct = {out, in, scalar
+	for var i = 0; i < 10; i++
+		out[i] = in[i] * scalar
+	;;
+}
+
+/* Multiply two numbers: out = in2 * in
+ *
+ * out must be distinct to both ins. The ins are reduced coefficient
+ * form, the out is not.
+ */
+const fproduct = {out, in, in2
+	out[0] =	in2[0] * in[0]
+	out[1] =	in2[0] * in[1] + \
+			in2[1] * in[0]
+	out[2] =	2 * in2[1] * in[1] + \
+			in2[0] * in[2] + \
+			in2[2] * in[0]
+	out[3] =	in2[1] * in[2] + \
+			in2[2] * in[1] + \
+			in2[0] * in[3] + \
+			in2[3] * in[0]
+	out[4] =	in2[2] * in[2] + \
+	             2 * (in2[1] * in[3] + \
+			in2[3] * in[1]) + \
+			in2[0] * in[4] + \
+			in2[4] * in[0]
+	out[5] =	in2[2] * in[3] + \
+			in2[3] * in[2] + \
+			in2[1] * in[4] + \
+			in2[4] * in[1] + \
+			in2[0] * in[5] + \
+			in2[5] * in[0]
+	out[6] =	2 * (in2[3] * in[3] + \
+			in2[1] * in[5] + \
+			in2[5] * in[1]) + \
+			in2[2] * in[4] + \
+			in2[4] * in[2] + \
+			in2[0] * in[6] + \
+			in2[6] * in[0]
+	out[7] =	in2[3] * in[4] + \
+			in2[4] * in[3] + \
+			in2[2] * in[5] + \
+			in2[5] * in[2] + \
+			in2[1] * in[6] + \
+			in2[6] * in[1] + \
+			in2[0] * in[7] + \
+			in2[7] * in[0]
+	out[8] =	in2[4] * in[4] + \
+	             2 * (in2[3] * in[5] + \
+			in2[5] * in[3] + \
+			in2[1] * in[7] + \
+			in2[7] * in[1]) + \
+			in2[2] * in[6] + \
+			in2[6] * in[2] + \
+			in2[0] * in[8] + \
+			in2[8] * in[0]
+	out[9] =	in2[4] * in[5] + \
+			in2[5] * in[4] + \
+			in2[3] * in[6] + \
+			in2[6] * in[3] + \
+			in2[2] * in[7] + \
+			in2[7] * in[2] + \
+			in2[1] * in[8] + \
+			in2[8] * in[1] + \
+			in2[0] * in[9] + \
+			in2[9] * in[0]
+	out[10] =	2 * (in2[5] * in[5] + \
+			in2[3] * in[7] + \
+			in2[7] * in[3] + \
+			in2[1] * in[9] + \
+			in2[9] * in[1]) + \
+			in2[4] * in[6] + \
+			in2[6] * in[4] + \
+			in2[2] * in[8] + \
+			in2[8] * in[2]
+	out[11] =	in2[5] * in[6] + \
+			in2[6] * in[5] + \
+			in2[4] * in[7] + \
+			in2[7] * in[4] + \
+			in2[3] * in[8] + \
+			in2[8] * in[3] + \
+			in2[2] * in[9] + \
+			in2[9] * in[2]
+	out[12] =	in2[6] * in[6] + \
+	             2 * (in2[5] * in[7] + \
+			in2[7] * in[5] + \
+			in2[3] * in[9] + \
+			in2[9] * in[3]) + \
+			in2[4] * in[8] + \
+			in2[8] * in[4]
+	out[13] =	in2[6] * in[7] + \
+			in2[7] * in[6] + \
+			in2[5] * in[8] + \
+			in2[8] * in[5] + \
+			in2[4] * in[9] + \
+			in2[9] * in[4]
+	out[14] =	2 * (in2[7] * in[7] + \
+			in2[5] * in[9] + \
+			in2[9] * in[5]) + \
+			in2[6] * in[8] + \
+			in2[8] * in[6]
+	out[15] =	in2[7] * in[8] + \
+			in2[8] * in[7] + \
+			in2[6] * in[9] + \
+			in2[9] * in[6]
+	out[16] =	in2[8] * in[8] + \
+	             2 * (in2[7] * in[9] + \
+			in2[9] * in[7])
+	out[17] =	in2[8] * in[9] + \
+			in2[9] * in[8]
+	out[18] =	2 * in2[9] * in[9]
+}
+
+
+/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
+const freducedegree= {out
+	out[8] += 19 * out[18];
+	out[7] += 19 * out[17];
+	out[6] += 19 * out[16];
+	out[5] += 19 * out[15];
+	out[4] += 19 * out[14];
+	out[3] += 19 * out[13];
+	out[2] += 19 * out[12];
+	out[1] += 19 * out[11];
+	out[0] += 19 * out[10];
+}
+
+/* Reduce all coeff of the short form in to be -2**25 <= x <= 2**25
+ */
+const freducecoeff = {out
+	var over, over2
+
+	while true
+		out[10] = 0
+
+		for var i = 0; i < 10; i += 2
+			over = out[i] / (0x2000000l : felem)
+			over2 = (over + ((over >> 63) * 2) + 1) / 2
+			out[i+1] += over2
+			out[i] -= over2 * (0x4000000l : felem)
+
+			over = out[i+1] / 0x2000000
+			out[i+2] += over
+			out[i+1] -= over * 0x2000000
+		;;
+		out[0] += 19 * out[10]
+		if out[10] == 0
+			break
+		;;
+	;;
+}
+
+/* A helpful wrapper around fproduct: out = in * in2.
+ *
+ * out must be distinct to both ins. The out is reduced degree and
+ * reduced coefficient.
+ */
+const fmul = {out, in, in2
+	var t : felem[19]
+
+	fproduct(t[:], in, in2)
+	freducedegree(t[:])
+	freducecoeff(t[:])
+	std.slcp(out, t[:10])
+}
+
+const fsquareinner = {out, in
+	var tmp : felem
+
+	out[0] =	in[0] * in[0]
+	out[1] =	2 * in[0] * in[1]
+	out[2] =	2 * (in[1] * in[1] + \
+			     in[0] * in[2])
+	out[3] =	2 * (in[1] * in[2] + \
+			     in[0] * in[3])
+	out[4] =	in[2] * in[2] + \
+			4 * in[1] * in[3] + \
+			2 * in[0] * in[4]
+	out[5] =	2 * (in[2] * in[3] + \
+			     in[1] * in[4] + \
+			     in[0] * in[5])
+	out[6] =	2 * (in[3] * in[3] + \
+			     in[2] * in[4] + \
+			     in[0] * in[6] + \
+			     2 * in[1] * in[5])
+	out[7] =	2 * (in[3] * in[4] + \
+			     in[2] * in[5] + \
+		     	in[1] * in[6] + \
+		     	in[0] * in[7])
+	tmp = in[1] * in[7] + in[3] * in[5]
+	out[8] =	in[4] * in[4] + \
+			2 * (in[2] * in[6] + \
+		     	     in[0] * in[8] + \
+		     	     2 * tmp)
+	out[9] =	2 * (in[4] * in[5] + \
+		     	in[3] * in[6] + \
+		     	in[2] * in[7] + \
+		     	in[1] * in[8] + \
+		     	in[0] * in[9])
+	tmp = 		in[3] * in[7] + in[1] * in[9]
+	out[10] =	2 * (in[5] * in[5] + \
+		             in[4] * in[6] + \
+		             in[2] * in[8] + \
+			     2 * tmp)
+	out[11] =	2 * (in[5] * in[6] + \
+		    	     in[4] * in[7] + \
+		     	     in[3] * in[8] + \
+			     in[2] * in[9])
+	out[12] =	in[6] * in[6] + \
+			2 * (in[4] * in[8] + \
+			     2 * (in[5] * in[7] + \
+		     	          in[3] * in[9]))
+	out[13] = 	2 * (in[6] * in[7] + \
+			     in[5] * in[8] + \
+			     in[4] * in[9])
+	out[14] =	2 * (in[7] * in[7] + \
+			in[6] * in[8] + \
+			2 * in[5] * in[9])
+	out[15] =	2 * (in[7] * in[8] + \
+			in[6] * in[9])
+	out[16] =	in[8] * in[8] + \
+			4 * in[7] * in[9]
+	out[17] =	2 * in[8] * in[9]
+	out[18] =	2 * in[9] * in[9]
+}
+
+const fsquare = {out, in
+	var t : felem[19]
+	fsquareinner(t[:], in)
+	freducedegree(t[:])
+	freducecoeff(t[:])
+	std.slcp(out, t[:10])
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+const fexpand = {out, in
+	/*
+	 * #define F(n,start,shift,mask) \
+	 * 	out[n] = (((in[start + 0] : felem) | \
+	 * 		(in[start + 1] : felem) << 8 | \
+	 * 		(in[start + 2] : felem) << 16 | \
+	 * 		(in[start + 3] : felem) << 24) >> shift) & mask
+	 * 	F(0, 0, 0, 0x3ffffff)
+	 * 	F(1, 3, 2, 0x1ffffff)
+	 * 	F(2, 6, 3, 0x3ffffff)
+	 * 	F(3, 9, 5, 0x1ffffff)
+	 * 	F(4, 12, 6, 0x3ffffff)
+	 * 	F(5, 16, 0, 0x1ffffff)
+	 * 	F(6, 19, 1, 0x3ffffff)
+	 * 	F(7, 22, 3, 0x1ffffff)
+	 * 	F(8, 25, 4, 0x3ffffff)
+	 * 	F(9, 28, 6, 0x1ffffff)
+	 * #undef F
+	 */
+	out[0] = (((in[0 + 0] : felem) | (in[0 + 1] : felem) << 8 | (in[0 + 2] : felem) << 16 | (in[0 + 3] : felem) << 24) >> 0) & 0x3ffffff
+	out[1] = (((in[3 + 0] : felem) | (in[3 + 1] : felem) << 8 | (in[3 + 2] : felem) << 16 | (in[3 + 3] : felem) << 24) >> 2) & 0x1ffffff
+	out[2] = (((in[6 + 0] : felem) | (in[6 + 1] : felem) << 8 | (in[6 + 2] : felem) << 16 | (in[6 + 3] : felem) << 24) >> 3) & 0x3ffffff
+	out[3] = (((in[9 + 0] : felem) | (in[9 + 1] : felem) << 8 | (in[9 + 2] : felem) << 16 | (in[9 + 3] : felem) << 24) >> 5) & 0x1ffffff
+	out[4] = (((in[12 + 0] : felem) | (in[12 + 1] : felem) << 8 | (in[12 + 2] : felem) << 16 | (in[12 + 3] : felem) << 24) >> 6) & 0x3ffffff
+	out[5] = (((in[16 + 0] : felem) | (in[16 + 1] : felem) << 8 | (in[16 + 2] : felem) << 16 | (in[16 + 3] : felem) << 24) >> 0) & 0x1ffffff
+	out[6] = (((in[19 + 0] : felem) | (in[19 + 1] : felem) << 8 | (in[19 + 2] : felem) << 16 | (in[19 + 3] : felem) << 24) >> 1) & 0x3ffffff
+	out[7] = (((in[22 + 0] : felem) | (in[22 + 1] : felem) << 8 | (in[22 + 2] : felem) << 16 | (in[22 + 3] : felem) << 24) >> 3) & 0x1ffffff
+	out[8] = (((in[25 + 0] : felem) | (in[25 + 1] : felem) << 8 | (in[25 + 2] : felem) << 16 | (in[25 + 3] : felem) << 24) >> 4) & 0x3ffffff
+	out[9] = (((in[28 + 0] : felem) | (in[28 + 1] : felem) << 8 | (in[28 + 2] : felem) << 16 | (in[28 + 3] : felem) << 24) >> 6) & 0x1ffffff
+
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+const fcontract = {out, in
+	while true
+		for var i = 0; i < 9; ++i
+			if (i & 1) == 1
+				while in[i] < 0
+					in[i] += 0x2000000
+					in[i + 1]--
+				;;
+			else
+				while in[i] < 0
+					in[i] += 0x4000000
+					in[i + 1]--
+				;;
+			;;
+		;;
+		while in[9] < 0
+			in[9] += 0x2000000
+			in[0] -= 19
+		;;
+		if  in[0] >= 0
+			break
+		;;
+	;;
+
+	in[1] <<= 2
+	in[2] <<= 3
+	in[3] <<= 5
+	in[4] <<= 6
+	in[6] <<= 1
+	in[7] <<= 3
+	in[8] <<= 4
+	in[9] <<= 6
+	/*
+	 * #define F(i, s) \
+	 * 	out[s+0] |=  in[i] & 0xff; \
+	 * 	out[s+1]  = (in[i] >> 8) & 0xff; \
+	 * 	out[s+2]  = (in[i] >> 16) & 0xff; \
+	 * 	out[s+3]  = (in[i] >> 24) & 0xff
+	 * 	out[0] = 0
+	 * 	out[16] = 0
+	 * 	F(0,0)
+	 * 	F(1,3)
+	 * 	F(2,6)
+	 * 	F(3,9)
+	 * 	F(4,12)
+	 * 	F(5,16)
+	 * 	F(6,19)
+	 * 	F(7,22)
+	 * 	F(8,25)
+	 * 	F(9,28)
+	 * #undef F
+	 */
+	out[0] = 0
+	out[16] = 0
+	out[ 0 + 0] |= (in[0] : byte); out[ 0 +1] = (in[0] >> 8 : byte); out[ 0 +2] = (in[0] >> 16 : byte); out[ 0 +3] = (in[0] >> 24 : byte)
+	out[ 3 + 0] |= (in[1] : byte); out[ 3 +1] = (in[1] >> 8 : byte); out[ 3 +2] = (in[1] >> 16 : byte); out[ 3 +3] = (in[1] >> 24 : byte)
+	out[ 6 + 0] |= (in[2] : byte); out[ 6 +1] = (in[2] >> 8 : byte); out[ 6 +2] = (in[2] >> 16 : byte); out[ 6 +3] = (in[2] >> 24 : byte)
+	out[ 9 + 0] |= (in[3] : byte); out[ 9 +1] = (in[3] >> 8 : byte); out[ 9 +2] = (in[3] >> 16 : byte); out[ 9 +3] = (in[3] >> 24 : byte)
+	out[12 + 0] |= (in[4] : byte); out[12 +1] = (in[4] >> 8 : byte); out[12 +2] = (in[4] >> 16 : byte); out[12 +3] = (in[4] >> 24 : byte)
+	out[16 + 0] |= (in[5] : byte); out[16 +1] = (in[5] >> 8 : byte); out[16 +2] = (in[5] >> 16 : byte); out[16 +3] = (in[5] >> 24 : byte)
+	out[19 + 0] |= (in[6] : byte); out[19 +1] = (in[6] >> 8 : byte); out[19 +2] = (in[6] >> 16 : byte); out[19 +3] = (in[6] >> 24 : byte)
+	out[22 + 0] |= (in[7] : byte); out[22 +1] = (in[7] >> 8 : byte); out[22 +2] = (in[7] >> 16 : byte); out[22 +3] = (in[7] >> 24 : byte)
+	out[25 + 0] |= (in[8] : byte); out[25 +1] = (in[8] >> 8 : byte); out[25 +2] = (in[8] >> 16 : byte); out[25 +3] = (in[8] >> 24 : byte)
+	out[28 + 0] |= (in[9] : byte); out[28 +1] = (in[9] >> 8 : byte); out[28 +2] = (in[9] >> 16 : byte); out[28 +3] = (in[9] >> 24 : byte)
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form, out 2Q
+ *   x3 z3: long form, out Q + Q'
+ *   x z: short form, destroyed, in Q
+ *   xprime zprime: short form, destroyed, in Q'
+ *   qmqp: short form, preserved, in Q - Q'
+ */
+const fmonty = {x2, z2, x3, z3, x, z, xprime, zprime, qmqp
+	var origx : felem[10]
+	var origxprime : felem[10]
+	var zzz : felem [19]
+	var xx : felem[19]
+	var zz : felem[19]
+	var xxprime : felem[19]
+	var zzprime : felem[19]
+	var zzzprime : felem[19]
+	var xxxprime : felem[19]
+
+	std.slcp(origx[:], x[:10])
+	fsum(x, z)
+	fdiff(z, origx[:]);  // does x - z
+
+	std.slcp(origxprime[:], xprime[:10])
+	fsum(xprime, zprime)
+	fdiff(zprime, origxprime[:])
+	fproduct(xxprime[:], xprime, z)
+	fproduct(zzprime[:], x, zprime)
+	freducedegree(xxprime[:])
+	freducecoeff(xxprime[:])
+	freducedegree(zzprime[:])
+	freducecoeff(zzprime[:])
+	std.slcp(origxprime[:], xxprime[:10])
+	fsum(xxprime[:], zzprime[:])
+	fdiff(zzprime[:], origxprime[:])
+	fsquare(xxxprime[:], xxprime[:])
+	fsquare(zzzprime[:], zzprime[:])
+	fproduct(zzprime[:], zzzprime[:], qmqp)
+	freducedegree(zzprime[:])
+	freducecoeff(zzprime[:])
+	std.slcp(x3, xxxprime[:10])
+	std.slcp(z3, zzprime[:10])
+
+	fsquare(xx[:], x)
+	fsquare(zz[:], z)
+	fproduct(x2, xx[:], zz[:])
+	freducedegree(x2)
+	freducecoeff(x2)
+	fdiff(zz[:], xx[:]);  // does zz = xx - zz
+	std.slfill(zzz[10:], 0)
+	fscalarproduct(zzz, zz, 121665)
+	freducedegree(zzz[:])
+	freducecoeff(zzz[:])
+	fsum(zzz[:], xx[:])
+	fproduct(z2, zz[:], zzz[:])
+	freducedegree(z2)
+	freducecoeff(z2)
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+const cmult = {resultx, resultz, n, q
+	var a : felem[19] = [0:0, 18:0]
+	var b : felem[19] = [0:1, 18:0]
+	var c : felem[19] = [0:1, 18:0]
+	var d : felem[19] = [0:0, 18:0]
+	var e : felem[19] = [0:0, 18:0]
+	var f : felem[19] = [0:1, 18:0]
+	var g : felem[19] = [0:0, 18:0]
+	var h : felem[19] = [0:1, 18:0]
+	var nqpqx = a[:]
+	var nqpqz = b[:]
+	var nqx = c[:]
+	var nqz = d[:]
+	var nqpqx2 = e[:]
+	var nqpqz2 = f[:]
+	var nqx2 = g[:]
+	var nqz2 = h[:]
+	var t
+
+	std.slcp(nqpqx[:10], q[:10])
+	for var i = 0; i < 32; ++i
+		var byte = n[31 - i]
+		for var j = 0; j < 8; ++j
+			if byte & 0x80 != 0
+				fmonty(nqpqx2, nqpqz2,
+				    nqx2, nqz2,
+				    nqpqx, nqpqz,
+				    nqx, nqz,
+				    q)
+			else
+				fmonty(nqx2, nqz2,
+				    nqpqx2, nqpqz2,
+				    nqx, nqz,
+				    nqpqx, nqpqz,
+				    q)
+			;;
+
+			t = nqx
+			nqx = nqx2
+			nqx2 = t
+			t = nqz
+			nqz = nqz2
+			nqz2 = t
+			t = nqpqx
+			nqpqx = nqpqx2
+			nqpqx2 = t
+			t = nqpqz
+			nqpqz = nqpqz2
+			nqpqz2 = t
+
+			byte <<= 1
+		;;
+	;;
+
+	std.slcp(resultx, nqx[:10])
+	std.slcp(resultz, nqz[:10])
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+const crecip = {out, z
+	var z2 : felem[10]
+	var z9 : felem[10]
+	var z11 : felem[10]
+	var z2_5_0 : felem[10]
+	var z2_10_0 : felem[10]
+	var z2_20_0 : felem[10]
+	var z2_50_0 : felem[10]
+	var z2_100_0 : felem[10]
+	var t0 : felem[10]
+	var t1 : felem[10]
+	var i
+
+	/* 2 */ fsquare(z2[:], z[:])
+	/* 4 */ fsquare(t1[:], z2[:])
+	/* 8 */ fsquare(t0[:], t1[:])
+	/* 9 */ fmul(z9[:] ,t0[:], z[:])
+	/* 11 */ fmul(z11[:], z9[:], z2[:])
+	/* 22 */ fsquare(t0[:], z11[:])
+	/* 2^5 - 2^0 = 31 */ fmul(z2_5_0[:], t0[:], z9[:])
+
+	/* 2^6 - 2^1 */ fsquare(t0[:], z2_5_0[:])
+	/* 2^7 - 2^2 */ fsquare(t1[:], t0[:])
+	/* 2^8 - 2^3 */ fsquare(t0[:], t1[:])
+	/* 2^9 - 2^4 */ fsquare(t1[:], t0[:])
+	/* 2^10 - 2^5 */ fsquare(t0[:],t1[:])
+	/* 2^10 - 2^0 */ fmul(z2_10_0[:], t0[:], z2_5_0[:])
+
+	/* 2^11 - 2^1 */ fsquare(t0[:], z2_10_0[:])
+	/* 2^12 - 2^2 */ fsquare(t1[:], t0[:])
+	/* 2^20 - 2^10 */
+	for i = 2;i < 10;i += 2
+		fsquare(t0[:],t1[:])
+		fsquare(t1[:],t0[:])
+	;;
+	/* 2^20 - 2^0 */ fmul(z2_20_0[:], t1[:], z2_10_0[:])
+
+	/* 2^21 - 2^1 */ fsquare(t0[:], z2_20_0[:])
+	/* 2^22 - 2^2 */ fsquare(t1[:], t0[:])
+	/* 2^40 - 2^20 */
+	for var i = 2;i < 20;i += 2
+		fsquare(t0[:], t1[:])
+		fsquare(t1[:], t0[:])
+	;;
+	/* 2^40 - 2^0 */ fmul(t0[:], t1[:], z2_20_0[:])
+
+	/* 2^41 - 2^1 */ fsquare(t1[:],t0[:])
+	/* 2^42 - 2^2 */ fsquare(t0[:],t1[:])
+	/* 2^50 - 2^10 */
+	for var i = 2;i < 10;i += 2
+		fsquare(t1[:],t0[:])
+		fsquare(t0[:],t1[:])
+	;;
+	/* 2^50 - 2^0 */ fmul(z2_50_0[:], t0[:], z2_10_0[:])
+
+	/* 2^51 - 2^1 */ fsquare(t0[:], z2_50_0[:])
+	/* 2^52 - 2^2 */ fsquare(t1[:], t0[:])
+	/* 2^100 - 2^50 */
+	for i = 2;i < 50;i += 2
+		fsquare(t0[:],t1[:])
+		fsquare(t1[:],t0[:])
+	;;
+	/* 2^100 - 2^0 */ fmul(z2_100_0[:], t1[:], z2_50_0[:])
+
+	/* 2^101 - 2^1 */ fsquare(t1[:], z2_100_0[:])
+	/* 2^102 - 2^2 */ fsquare(t0[:], t1[:])
+	/* 2^200 - 2^100 */
+	for i = 2;i < 100;i += 2
+		fsquare(t1[:],t0[:])
+		fsquare(t0[:],t1[:])
+	;;
+	/* 2^200 - 2^0 */ fmul(t1[:],t0[:], z2_100_0[:])
+
+	/* 2^201 - 2^1 */ fsquare(t0[:], t1[:])
+	/* 2^202 - 2^2 */ fsquare(t1[:], t0[:])
+	/* 2^250 - 2^50 */
+	for i = 2;i < 50;i += 2
+		fsquare(t0[:], t1[:])
+		fsquare(t1[:], t0[:])
+	;;
+	/* 2^250 - 2^0 */ fmul(t0[:], t1[:], z2_50_0[:])
+
+	/* 2^251 - 2^1 */ fsquare(t1[:], t0[:])
+	/* 2^252 - 2^2 */ fsquare(t0[:], t1[:])
+	/* 2^253 - 2^3 */ fsquare(t1[:], t0[:])
+	/* 2^254 - 2^4 */ fsquare(t0[:], t1[:])
+	/* 2^255 - 2^5 */ fsquare(t1[:], t0[:])
+	/* 2^255 - 21 */ fmul(out,t1[:], z11[:])
+}
+
+const curve25519 = {pub : byte[:/*32*/], secret : byte[:/*32*/], basepoint : byte[:/*32*/]
+	var bp : felem[10]
+	var x : felem[10]
+	var z : felem[10]
+	var zmone : felem[10]
+
+	std.assert(pub.len == 32 && secret.len == 32 && basepoint.len == 32, "wrong key sizes")
+	fexpand(bp[:], basepoint[:])
+	cmult(x[:], z[:], secret[:], bp[:])
+	crecip(zmone[:], z[:])
+	fmul(z[:], x[:], zmone[:])
+	fcontract(pub[:], z[:])
+}