ref: d159e8c6b102a1c5d9a7625ecc8176f08290c259
dir: /lib/crypto/ctbig.myr/
use std use iter use "ct" pkg crypto = type ctbig = struct nbit : std.size dig : uint32[:] /* little endian, no leading zeros. */ ;; generic mkctbign : (v : @a, nbit : std.size -> ctbig#) :: numeric,integral @a const bitcount : (buf : byte[:] -> std.size) pkglocal const ctzero : (nbit : std.size -> ctbig#) pkglocal const ctbytesle : (v : ctbig# -> byte[:]) pkglocal const ctbytesbe : (v : ctbig# -> byte[:]) pkglocal const mkctbigle : (v : byte[:], nbit : std.size -> ctbig#) pkglocal const mkctbigbe : (v : byte[:], nbit : std.size -> ctbig#) pkglocal const ctfree : (v : ctbig# -> void) pkglocal const ctbigdup : (v : ctbig# -> ctbig#) pkglocal const ctlike : (v : ctbig# -> ctbig#) pkglocal const ct2big : (v : ctbig# -> std.bigint#) pkglocal const big2ct : (v : std.bigint#, nbit : std.size -> ctbig#) /* arithmetic */ pkglocal const ctadd : (r : ctbig#, a : ctbig#, b : ctbig# -> void) pkglocal const ctsub : (r : ctbig#, a : ctbig#, b : ctbig# -> void) pkglocal const ctmul : (r : ctbig#, a : ctbig#, b : ctbig# -> void) pkglocal const ctmodpow : (r : ctbig#, a : ctbig#, b : ctbig#, m : ctbig# -> void) pkglocal const ctiszero : (v : ctbig# -> bool) pkglocal const cteq : (a : ctbig#, b : ctbig# -> bool) pkglocal const ctne : (a : ctbig#, b : ctbig# -> bool) pkglocal const ctgt : (a : ctbig#, b : ctbig# -> bool) pkglocal const ctge : (a : ctbig#, b : ctbig# -> bool) pkglocal const ctlt : (a : ctbig#, b : ctbig# -> bool) pkglocal const ctle : (a : ctbig#, b : ctbig# -> bool) /* for testing */ pkglocal const growmod : (r : ctbig#, a : ctbig#, k : uint32, m : ctbig# -> void) pkglocal const clip : (v : ctbig# -> ctbig#) impl std.equatable ctbig# ;; const Bits = 32 const Base = 0x100000000ul impl std.equatable ctbig# = eq = {a, b -> cteq(a, b) } ;; const __init__ = { var ct : ctbig# ct = ctzero(0) std.fmtinstall(std.typeof(ct), ctfmt) ctfree(ct) } const ctfmt = {sb, ap, opts var ct : ctbig# ct = std.vanext(ap) for d : iter.byreverse(ct.dig) std.sbfmt(sb, "{w=8,p=0,x}.", d) ;; } const bitcount = {buf const bits = [ 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, ] var last, nbit nbit = 8*buf.len last = buf[buf.len - 1] for b : bits[:] if last & b != 0 break ;; nbit-- ;; -> nbit } generic mkctbign = {v : @a, nbit : std.size :: integral,numeric @a var a var val a = std.zalloc() val = (v : uint64) a.nbit = nbit a.dig = std.slalloc(ndig(nbit)) if nbit > 0 a.dig[0] = (val : uint32) ;; if nbit > 32 a.dig[1] = (val >> 32 : uint32) ;; -> clip(a) } const ctzero = {nbit -> std.mk([ .nbit=nbit, .dig=std.slzalloc(ndig(nbit)), ]) } const ctdup = {v -> std.mk([ .nbit=v.nbit, .dig=std.sldup(v.dig) ]) } const ct2big = {ct -> std.mk([ .sign=1, .dig=std.sldup(ct.dig) ]) } const big2ct = {big, nbit var v, n, l n = ndig(nbit) l = std.min(n, big.dig.len) v = std.slzalloc(n) std.slcp(v[:l], big.dig[:l]) -> clip(std.mk([ .nbit=nbit, .dig=v, ])) } const mkctbigle = {v, nbit var a, last, i, o, off /* It's ok to depend on the length of v here: we can leak the size of the numbers. */ o = 0 a = std.slzalloc(ndig(nbit)) for i = 0; i + 4 <= v.len; i += 4 a[o++] = \ ((v[i + 0] : uint32) << 0) | \ ((v[i + 1] : uint32) << 8) | \ ((v[i + 2] : uint32) << 16) | \ ((v[i + 3] : uint32) << 24) ;; if i != v.len last = 0 for i; i < v.len; i++ off = i & 0x3 last |= (v[i] : uint32) << (8 *off) ;; a[o++] = last ;; -> clip(std.mk([.nbit=nbit, .dig=a])) } const mkctbigbe = {v, nbit var a, i, o, tail : byte[4] /* It's ok to depend on the length of v here: we can leak the size of the numbers. */ o = 0 a = std.slzalloc(ndig(nbit)) for i = v.len ; i >= 4; i -= 4 a[o++] = std.getbe32(v[i-4:i]) ;; if i != 0 std.slfill(tail[:], 0) std.slcp(tail[4-i:], v[:i]) a[o++] = std.getbe32(tail[:]) ;; -> clip(std.mk([.nbit=nbit, .dig=a])) } const ctbytesle = {v var d, i, n, o, ret o = 0 n = (v.nbit + 7) / 8 ret = std.slalloc(n) for i = 0; i * 4 < n; i++ d = v.dig[i] ret[o++] = (d >> 0 : byte) ret[o++] = (d >> 8 : byte) ret[o++] = (d >> 16 : byte) ret[o++] = (d >> 24 : byte) ;; if i * 4 != n d = v.dig[i] for ; i < n; i++ ret[o++] = (d : byte) d >>= 8 ;; ;; -> ret } const ctbytesbe = {v : ctbig# var d : uint32, i, n, o, ret i = v.dig.len - 1 o = 0 n = (v.nbit + 7) / 8 ret = std.slalloc(n) if n & 0x3 != 0 d = v.dig[i--] for var j = n & 0x3 + 1; j > 0; j-- ret[o++] = (d >> 8*(j - 1 : uint32): byte) ;; ;; for ; i >= 0 ; i-- d = v.dig[i] ret[o++] = (d >> 24 : byte) ret[o++] = (d >> 16 : byte) ret[o++] = (d >> 8 : byte) ret[o++] = (d >> 0 : byte) ;; -> ret } const ctlike = {v -> std.mk([ .nbit = v.nbit, .dig=std.slzalloc(v.dig.len), ]) } const ctbigdup = {v -> std.mk([ .nbit=v.nbit, .dig=std.sldup(v.dig), ]) } const ctfree = {v std.slfree(v.dig) std.free(v) } const ctadd = {r, a, b ctaddcc(r, a, b, 1) } const ctaddcc = {r, a, b, ctl var v, i, carry checksz(a, b) checksz(a, r) carry = 0 for i = 0; i < a.dig.len; i++ v = (a.dig[i] : uint64) + (b.dig[i] : uint64) + carry; r.dig[i] = mux(ctl, (v : uint32), r.dig[i]) carry = v >> 32 ;; clip(r) } const ctsub = {r, a, b ctsubcc(r, a, b, 1) } const ctsubcc = {r, a, b, ctl var borrow, v, i checksz(a, b) checksz(a, r) borrow = 0 for i = 0; i < a.dig.len; i++ v = (a.dig[i] : uint64) - (b.dig[i] : uint64) - borrow borrow = (v & (1<<63)) >> 63 r.dig[i] = mux(ctl, (v : uint32), r.dig[i]) ;; clip(r) -> borrow } const ctmul = {r, a, b var i, j var ai, bj, wij var carry, t var w checksz(a, b) checksz(a, r) w = std.slzalloc(a.dig.len + b.dig.len) for j = 0; j < b.dig.len; j++ carry = 0 for i = 0; i < a.dig.len; i++ ai = (a.dig[i] : uint64) bj = (b.dig[j] : uint64) wij = (w[i+j] : uint64) t = ai * bj + wij + carry w[i+j] = (t : uint32) carry = t >> 32 ;; w[i + j] = (carry : uint32) ;; /* safe to leak that a == r; not data dependent */ std.slgrow(&w, a.dig.len) if a == r std.slfree(a.dig) ;; r.dig = w[:a.dig.len] clip(r) } /* * Returns the top digit in the number that has * a bit set. This is useful for finding our division. */ const topfull = {n : ctbig# var top top = 0 for var i = 0; i < n.dig.len; i++ top = mux(n.dig[i], i, top) ;; -> 0 } /* * Multiplies by 2**32 mod m */ const growmod = {r, a, k, m var a0, a1, b0, hi, g, q, tb, e var chf, clow, under, over var cc : uint64 checksz(a, m) std.assert(a.dig.len > 1, "bad modulus") std.assert(a.nbit % 32 == 0, "ragged sizes not yet supported") std.assert(m.dig[m.dig.len - 1] & (1 << 31) != 0, "top of mod not set") a0 = (a.dig[m.dig.len - 1] : uint64) << 32 a1 = (a.dig[m.dig.len - 2] : uint64) << 0 b0 = (m.dig[m.dig.len - 1] : uint64) /* * We hold the top digit here, so * this keeps the number of digits the same, and * as a result, keeps checksz() happy. */ hi = a.dig[a.dig.len - 1] /* Do the multiplication of x by 2**32 */ std.slcp(r.dig[1:], a.dig[:a.dig.len-1]) r.dig[0] = k g = ((a0 + a1) / b0 : uint32) e = eq(a0, b0) q = mux((e : uint32), 0xffffffff, mux(eq(g, 0), 0, g - 1)); cc = 0; tb = 1; for var u = 0; u < r.dig.len; u++ var mw, zw, xw, nxw var zl : uint64 mw = m.dig[u]; zl = (mw : uint64) * (q : uint64) + cc cc = zl >> 32 zw = (zl : uint32) xw = r.dig[u] nxw = xw - zw; cc += (gt(nxw, xw) : uint64) r.dig[u] = nxw; tb = mux(eq(nxw, mw), tb, gt(nxw, mw)); ;; /* * We can either underestimate or overestimate q, * - If we overestimated, either cc < hi, or cc == hi && tb != 0. * - If we overestimated, cc > hi. * - Otherwise, we got it exactly right. * * If we overestimated, we need to subtract 'm' once. If we * underestimated, we need to add it once. */ chf = (cc >> 32 : uint32) clow = (cc >> 0 : uint32) over = chf | gt(clow, hi); under = ~over & (tb | (~chf & lt(clow, hi))); ctaddcc(r, r, m, over); ctsubcc(r, r, m, under); } const tomonty = {r, x, m checksz(x, r) checksz(x, m) std.slcp(r.dig, x.dig) for var i = 0; i < m.dig.len; i++ growmod(r, r, 0, m) ;; } const ccopy = {r, v, ctl checksz(r, v) for var i = 0; i < r.dig.len; i++ r.dig[i] = mux(ctl, v.dig[i], r.dig[i]) ;; } const muladd = {a, b, k -> (a : uint64) * (b : uint64) + (k : uint64) } const montymul = {r : ctbig#, x : ctbig#, y : ctbig#, m : ctbig#, m0i : uint32 var dh : uint64 var s checksz(x, y) checksz(x, m) checksz(x, r) std.slfill(r.dig, 0) dh = 0 for var u = 0; u < x.dig.len; u++ var f : uint32, xu : uint32 var r1 : uint64, r2 : uint64, zh : uint64 xu = x.dig[u] f = (r.dig[0] + x.dig[u] * y.dig[0]) * m0i; r1 = 0; r2 = 0; for var v = 0; v < y.dig.len; v++ var z : uint64 var t : uint32 z = muladd(xu, y.dig[v], r.dig[v]) + r1 r1 = z >> 32 t = (z : uint32) z = muladd(f, m.dig[v], t) + r2 r2 = z >> 32 if v != 0 r.dig[v - 1] = (z : uint32) ;; ;; zh = dh + r1 + r2; r.dig[r.dig.len - 1] = (zh : uint32) dh = zh >> 32; ;; /* * r may still be greater than m at that point; notably, the * 'dh' word may be non-zero. */ s = ne(dh, 0) | (ctge(r, m) : uint64) ctsubcc(r, r, m, (s : uint32)) } const ninv32 = {x var y y = 2 - x y *= 2 - y * x y *= 2 - y * x y *= 2 - y * x y *= 2 - y * x -> mux(x & 1, -y, 0) } const ctmodpow = {r, a, e, m var t1, t2, m0i, ctl var n = 0 t1 = ctdup(a) t2 = ctzero(a.nbit) m0i = ninv32(m.dig[0]) tomonty(t1, a, m); std.slfill(r.dig, 0); r.dig[0] = 1; for var i = 0; i < e.nbit; i++ ctl = (e.dig[i>>5] >> (i & 0x1f : uint32)) & 1 montymul(t2, r, t1, m, m0i) ccopy(r, t2, ctl); montymul(t2, t1, t1, m, m0i); std.slcp(t1.dig, t2.dig); ;; ctfree(t1) ctfree(t2) } const ctiszero = {a var z, zz z = 1 for var i = 0; i < a.dig.len; i++ zz = mux(a.dig[i], 0, 1) z = mux(zz, z, 0) ;; -> (z : bool) } const cteq = {a, b var nz checksz(a, b) nz = 0 for var i = 0; i < a.dig.len; i++ nz = nz | a.dig[i] - b.dig[i] ;; -> (eq(nz, 0) : bool) } const ctne = {a, b var v v = (cteq(a, b) : byte) -> (not(v) : bool) } const ctgt = {a, b -> (ctsubcc(b, b, a, 0) : bool) } const ctge = {a, b var v v = (ctlt(a, b) : byte) -> (not(v) : bool) } const ctlt = {a, b -> (ctsubcc(a, a, b, 0) : bool) } const ctle = {a, b var v v = (ctgt(a, b) : byte) -> (not(v) : bool) } const ndig = {nbit -> (nbit + 8*sizeof(uint32) - 1)/(8*sizeof(uint32)) } const checksz = {a, b std.assert(a.nbit == b.nbit, "mismatched bit sizes") std.assert(a.dig.len == b.dig.len, "mismatched backing sizes") } const clip = {v var mask, edge : uint64 edge = (v.nbit : uint64) & (Bits - 1) mask = mux(edge, (1 << edge) - 1, ~0) v.dig[v.dig.len - 1] &= (mask : uint32) -> v }