org.bouncycastle.pqc.crypto.cmce.CMCEEngine Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bcprov-ext-debug-jdk18on Show documentation
Show all versions of bcprov-ext-debug-jdk18on Show documentation
The Bouncy Castle Crypto package is a Java implementation of cryptographic algorithms. This jar contains JCE provider and lightweight API for the Bouncy Castle Cryptography APIs for Java 1.8 and later with debug enabled.
The newest version!
package org.bouncycastle.pqc.crypto.cmce;
import java.security.SecureRandom;
import org.bouncycastle.crypto.Xof;
import org.bouncycastle.crypto.digests.SHAKEDigest;
import org.bouncycastle.util.Arrays;
class CMCEEngine
{
private int SYS_N; // = 3488;
private int SYS_T; // = 64;
private int GFBITS; // = 12;
private int IRR_BYTES; // = SYS_T * 2;
private int COND_BYTES; // = (1 << (GFBITS-4))*(2*GFBITS - 1);
private int PK_NROWS; // = SYS_T*GFBITS;
private int PK_NCOLS; // = SYS_N - PK_NROWS;
private int PK_ROW_BYTES;// = (PK_NCOLS + 7)/8;
private int SYND_BYTES;// = (PK_NROWS + 7)/8;
private int GFMASK; // = (1 << GFBITS) - 1;
private int[] poly; // only needed for key pair gen
private final int defaultKeySize;
private GF gf;
private BENES benes;
private boolean usePadding;
private boolean countErrorIndices;
private boolean usePivots; // used for compression
public int getIrrBytes()
{
return IRR_BYTES;
}
public int getCondBytes()
{
return COND_BYTES;
}
public int getPrivateKeySize()
{
return COND_BYTES + IRR_BYTES + SYS_N / 8 + 40;
}
public int getPublicKeySize()
{
if (usePadding)
{
return PK_NROWS * ((SYS_N / 8 - ((PK_NROWS - 1) / 8)));
}
return PK_NROWS * PK_NCOLS / 8;
}
// public int getPublicKeySize(){ return PK_NCOLS*PK_NROWS/8; }
public int getCipherTextSize()
{
return SYND_BYTES;
}
public CMCEEngine(int m, int n, int t, int[] p, boolean usePivots, int defaultKeySize)
{
this.usePivots = usePivots;
this.SYS_N = n;
this.SYS_T = t;
this.GFBITS = m;
this.poly = p;
this.defaultKeySize = defaultKeySize;
IRR_BYTES = SYS_T * 2; // t * ceil(m/8)
COND_BYTES = (1 << (GFBITS - 4)) * (2 * GFBITS - 1);
PK_NROWS = SYS_T * GFBITS;
PK_NCOLS = SYS_N - PK_NROWS;
PK_ROW_BYTES = (PK_NCOLS + 7) / 8;
SYND_BYTES = (PK_NROWS + 7) / 8;
GFMASK = (1 << GFBITS) - 1;
if (GFBITS == 12)
{
gf = new GF12();
benes = new BENES12(SYS_N, SYS_T, GFBITS);
}
else
{
gf = new GF13();
benes = new BENES13(SYS_N, SYS_T, GFBITS);
}
usePadding = SYS_T % 8 != 0;
countErrorIndices = (1 << GFBITS) > SYS_N;
}
public byte[] generate_public_key_from_private_key(byte[] sk)
{
byte[] pk = new byte[getPublicKeySize()];
short[] pi = new short[1 << GFBITS];
long[] pivots = {0};
// generating the perm used to generate the private key
int[] perm = new int[1 << GFBITS];
byte[] hash = new byte[(SYS_N / 8) + ((1 << GFBITS) * 4)];
int hash_idx = hash.length - 32 - IRR_BYTES - ((1 << GFBITS) * 4);
Xof digest;
digest = new SHAKEDigest(256);
digest.update((byte)64);
digest.update(sk, 0, 32);
digest.doFinal(hash, 0, hash.length);
for (int i = 0; i < (1 << GFBITS); i++)
{
perm[i] = Utils.load4(hash, hash_idx + i * 4);
}
pk_gen(pk, sk, perm, pi, pivots);
return pk;
}
// generates the rest of the private key given the first 40 bytes
public byte[] decompress_private_key(byte[] sk)
{
byte[] reg_sk = new byte[getPrivateKeySize()];
System.arraycopy(sk, 0, reg_sk, 0, sk.length);
// s: n/8 (random string)
// a: COND_BYTES (field ordering) ((2m-1) * 2^(m-4))
// g: IRR_BYTES (polynomial) (t * 2)
// generate hash using the seed given in the sk (64 || first 32 bytes)
byte[] hash = new byte[(SYS_N / 8) + ((1 << GFBITS) * 4) + IRR_BYTES + 32];
int hash_idx = 0;
Xof digest;
digest = new SHAKEDigest(256);
digest.update((byte)64);
digest.update(sk, 0, 32); // input
digest.doFinal(hash, 0, hash.length);
// generate g
if (sk.length <= 40)
{
short[] field = new short[SYS_T];
byte[] reg_g = new byte[IRR_BYTES];
hash_idx = hash.length - 32 - IRR_BYTES;
for (int i = 0; i < SYS_T; i++)
{
field[i] = Utils.load_gf(hash, hash_idx + i * 2, GFMASK);
}
generate_irr_poly(field);
for (int i = 0; i < SYS_T; i++)
{
Utils.store_gf(reg_g, i * 2, field[i]);
}
System.arraycopy(reg_g, 0, reg_sk, 40, IRR_BYTES);
}
// generate a
if (sk.length <= 40 + IRR_BYTES)
{
int[] perm = new int[1 << GFBITS];
short[] pi = new short[1 << GFBITS];
hash_idx = hash.length - 32 - IRR_BYTES - ((1 << GFBITS) * 4);
for (int i = 0; i < (1 << GFBITS); i++)
{
perm[i] = Utils.load4(hash, hash_idx + i * 4);
}
if (usePivots)
{
long[] pivots = {0};
pk_gen(null, reg_sk, perm, pi, pivots);
}
else
{
long[] buf = new long[1 << GFBITS];
for (int i = 0; i < (1 << GFBITS); i++)
{
buf[i] = perm[i];
buf[i] <<= 31;
buf[i] |= i;
buf[i] &= 0x7fffffffffffffffL; // getting rid of signed longs
}
sort64(buf, 0, buf.length);
for (int i = 0; i < (1 << GFBITS); i++)
{
pi[i] = (short)(buf[i] & GFMASK);
}
}
byte[] out = new byte[COND_BYTES];
controlbitsfrompermutation(out, pi, GFBITS, 1 << GFBITS);
//copy the controlbits from the permutation to the private key
System.arraycopy(out, 0, reg_sk, IRR_BYTES + 40, out.length);
}
// reg s
System.arraycopy(hash, 0, reg_sk, getPrivateKeySize() - SYS_N / 8, SYS_N / 8);
return reg_sk;
}
public void kem_keypair(byte[] pk, byte[] sk, SecureRandom random)
{
// 1. Generate a uniform random l-bit string δ. (This is called a seed.)
byte[] seed_a = new byte[1];
byte[] seed_b = new byte[32];
seed_a[0] = 64;
random.nextBytes(seed_b);
//2. Output SeededKeyGen(δ).
// SeededKeyGen
byte[] E = new byte[(SYS_N / 8) + ((1 << GFBITS) * 4) + (SYS_T * 2) + 32];
int seedIndex, skIndex = 0;
byte[] prev_sk = seed_b;
long[] pivots = {0};
Xof digest = new SHAKEDigest(256);
while (true)
{
// SeededKeyGen - 1. Compute E = G(δ), a string of n + σ2q + σ1t + l bits. (3488 + 32*4096 + 16*64 + 256)
digest.update(seed_a, 0, seed_a.length);
digest.update(seed_b, 0, seed_b.length);
digest.doFinal(E, 0, E.length);
// Store the seeds generated
// SeededKeyGen - 2. Define δ′ as the last l bits of E.
// Update seed using the last 32 bytes (l) of E
// If anything fails, this set δ = δ′ (the next last 32 bytes of E) and restart the algorithm.
seedIndex = E.length - 32;
seed_b = Arrays.copyOfRange(E, seedIndex, seedIndex + 32);
// store the previous last 32 bytes used as δ
System.arraycopy(prev_sk, 0, sk, 0, 32);
prev_sk = Arrays.copyOfRange(seed_b, 0, 32);
// (step 5 and 4 are swapped)
// SeededKeyGen - 5. Compute g from the next σ1t bits of E by the Irreducible algorithm. If this fails,
// set δ = δ′ and restart the algorithm.
// Create Field which is an element in gf2^mt
// 2.4.1 Irreducible-polynomial generation
short[] field = new short[SYS_T];
int sigma1_t = E.length - 32 - (2 * SYS_T);
seedIndex = sigma1_t;
// Irreducible 2.4.1 - 1. Define βj = ∑m−1
// i=0 dσ1j+izi for each j ∈ {0,1,...,t −1}. (Within each group of σ1
// input bits, this uses only the first m bits.
for (int i = 0; i < SYS_T; i++)
{
field[i] = Utils.load_gf(E, sigma1_t + i * 2, GFMASK);
}
if (generate_irr_poly(field) == -1)
{
continue;
}
// storing poly to sk
skIndex = 32 + 8;
for (int i = 0; i < SYS_T; i++)
{
Utils.store_gf(sk, skIndex + i * 2, field[i]);
}
// SeededKeyGen - 4. Compute α1,...,αq from the next σ2q bits of E by the FieldOrdering algorithm.
// If this fails, set δ = δ′ and restart the algorithm.
// Generate permutation
int[] perm = new int[(1 << GFBITS)];
seedIndex -= (1 << GFBITS) * 4;
// FieldOrdering 2.4.2 - 1. Take the first σ2 input bits b0,b1,...,bσ2−1 as a σ2-bit integer a0 =
// b0 + 2b1 + ··· + 2σ2−1bσ2−1, take the next σ2 bits as a σ2-bit integer a1, and so on through aq−1.
for (int i = 0; i < (1 << GFBITS); i++)
{
perm[i] = Utils.load4(E, seedIndex + i * 4);
}
// generating public key
short[] pi = new short[1 << GFBITS];
//8. Write Γ′ as (g,α′1,α′2,...,α′n)
if (pk_gen(pk, sk, perm, pi, pivots) == -1)
{
// System.out.println("FAILED GENERATING PUBLIC KEY");
continue;
}
// computing c using Nassimi-Sahni algorithm which is a
// parallel algorithms to set up the Benes permutation network
byte[] out = new byte[COND_BYTES];
controlbitsfrompermutation(out, pi, GFBITS, 1 << GFBITS);
//copy the controlbits from the permutation to the private key
System.arraycopy(out, 0, sk, IRR_BYTES + 40, out.length);
// storing the random string s
seedIndex -= SYS_N / 8;
System.arraycopy(E, seedIndex, sk, sk.length - SYS_N / 8, SYS_N / 8);
// This part is reserved for compression which is not implemented and is not required
if (!usePivots)
{
Utils.store8(sk, 32, 0xFFFFFFFFL);
}
else
{
Utils.store8(sk, 32, pivots[0]);
}
// 9. Output T as public key and (δ,c,g,α,s) as private key, where c = (cn−k−μ+1,...,cn−k)
// and α = (α′1,...,α′n,αn+1,...,αq
break;
}
}
// 2.2.3 Encoding subroutine
private void syndrome(byte[] cipher_text, byte[] pk, byte[] error_vector)
{
/*
2.2.3 Encoding subroutine
1. Define H = (In−k |T)
2. Compute and return C0 = He ∈Fn−k2 .
*/
short[] row = new short[SYS_N / 8];
int i, j, pk_ptr = 0;
byte b;
int tail = PK_NROWS % 8;
for (i = 0; i < SYND_BYTES; i++)
{
cipher_text[i] = 0;
}
for (i = 0; i < PK_NROWS; i++)
{
for (j = 0; j < SYS_N / 8; j++)
{
row[j] = 0;
}
for (j = 0; j < PK_ROW_BYTES; j++)
{
row[SYS_N / 8 - PK_ROW_BYTES + j] = pk[pk_ptr + j];
}
if (usePadding)
{
for (j = SYS_N / 8 - 1; j >= SYS_N / 8 - PK_ROW_BYTES; j--)
{
row[j] = (short)((((row[j] & 0xff) << tail) | ((row[j - 1] & 0xff) >>> (8 - tail))) & 0xff);
// System.out.printf("%04x ", row[j]);
}
}
row[i / 8] |= 1 << (i % 8);
b = 0;
for (j = 0; j < SYS_N / 8; j++)
{
b ^= row[j] & error_vector[j];
}
b ^= b >>> 4;
b ^= b >>> 2;
b ^= b >>> 1;
b &= 1;
cipher_text[i / 8] |= (b << (i % 8));
pk_ptr += PK_ROW_BYTES;
}
}
// 2.4.4 Fixed-weight-vector generation
private void generate_error_vector(byte[] error_vector, SecureRandom random)
{
byte[] buf_bytes;
short[] buf_nums = new short[SYS_T * 2];
short[] ind = new short[SYS_T];
byte[] val = new byte[SYS_T];
/*
2.4.4 Fixed-weight-vector generation
1. Generate σ1τ uniform random bits b0,b1,...,bσ1τ−1.
*/
while (true)
{
/*
2.4.4 Fixed-weight-vector generation
2. Define dj = ∑m−1
i=0 bσ1j+i2i for each j ∈{0,1,...,τ −1}.
*/
if (countErrorIndices)
{
buf_bytes = new byte[SYS_T * 4];
random.nextBytes(buf_bytes);
for (int i = 0; i < SYS_T * 2; i++)
{
buf_nums[i] = Utils.load_gf(buf_bytes, i * 2, GFMASK);
}
/*
2.4.4 Fixed-weight-vector generation
3. Define a0,a1,...,at−1 as the first t entries in d0,d1,...,dτ−1 in the range
{0,1,...,n −1}. If there are fewer than t such entries, restart the algorithm
*/
// moving and counting indices in the correct range
int count = 0;
for (int i = 0; i < SYS_T * 2 && count < SYS_T; i++)
{
if (buf_nums[i] < SYS_N)
{
ind[count++] = buf_nums[i];
}
}
if (count < SYS_T)
{
// System.out.println("Failed Encrypt indices wrong range");
continue;
}
}
else
{
buf_bytes = new byte[SYS_T * 2];
random.nextBytes(buf_bytes);
for (int i = 0; i < SYS_T; i++)
{
ind[i] = Utils.load_gf(buf_bytes, i * 2, GFMASK);
}
}
/*
2.4.4 Fixed-weight-vector generation
4. If a0,a1,...,at−1 are not all distinct, restart the algorithm.
*/
int eq = 0;
// check for repetition
for (int i = 1; i < SYS_T && eq != 1; i++)
{
for (int j = 0; j < i; j++)
{
if (ind[i] == ind[j])
{
eq = 1;
break;
}
}
}
if (eq == 0)
{
break;
}
else
{
// System.out.println("Failed Encrypt found duplicate");
}
}
/*
2.4.4 Fixed-weight-vector generation
5. Define e = (e0,e1,...,en−1) ∈ Fn2 as the weight-t vector such that eai = 1 for each i.
(Implementors are cautioned to compute e through arithmetic rather than variable-
time RAM lookups.)
*/
for (int i = 0; i < SYS_T; i++)
{
val[i] = (byte)(1 << (ind[i] & 7));
}
// System.out.print("e: ");
for (short i = 0; i < SYS_N / 8; i++)
{
error_vector[i] = 0;
for (int j = 0; j < SYS_T; j++)
{
short mask = same_mask32(i, (short)(ind[j] >> 3));
mask &= 0xff;
error_vector[i] |= val[j] & mask;
// System.out.printf("%02x ", mask);
}
}
}
private void encrypt(byte[] cipher_text, byte[] pk, byte[] error_vector, SecureRandom random)
{
/*
2.4.5 Encapsulation
1. Use FixedWeight to generate a vector e ∈Fn2 of weight t.
*/
// 2.4.4 Fixed-weight-vector generation
generate_error_vector(error_vector, random);
/*
2.4.5 Encapsulation
2. Compute C0 = Encode(e,T).
*/
syndrome(cipher_text, pk, error_vector);
}
// 2.4.5 Encapsulation
public int kem_enc(byte[] cipher_text, byte[] key, byte[] pk, SecureRandom random)
{
byte[] error_vector = new byte[SYS_N / 8];
byte mask;
int i, padding_ok = 0;
if (usePadding)
{
padding_ok = check_pk_padding(pk);
// System.out.println("padding_ok: " + padding_ok);
}
/*
2.4.5 Encapsulation
1. Use FixedWeight to generate a vector e ∈Fn2 of weight t.
2. Compute C0 = Encode(e,T).
*/
encrypt(cipher_text, pk, error_vector, random);
/*
2.4.5 Encapsulation
4. Compute K = H(1,e,C)
*/
// K = Hash((0x1 || e || C), 32)
Xof digest = new SHAKEDigest(256);
digest.update((byte)0x01);
digest.update(error_vector, 0, error_vector.length);
digest.update(cipher_text, 0, cipher_text.length); // input
digest.doFinal(key, 0, key.length); // output
if (usePadding)
{
//
// clear outputs (set to all 0's) if padding bits are not all zero
mask = (byte)padding_ok;
mask ^= 0xFF;
for (i = 0; i < SYND_BYTES; i++)
{
cipher_text[i] &= mask;
}
for (i = 0; i < 32; i++)
{
key[i] &= mask;
}
return padding_ok;
}
return 0;
}
// 2.3.3 Decapsulation
public int kem_dec(byte[] key, byte[] cipher_text, byte[] sk)
{
byte[] error_vector = new byte[SYS_N / 8];
byte[] preimage = new byte[1 + SYS_N/8 + SYND_BYTES];
int i, padding_ok = 0;
byte mask;
if (usePadding)
{
padding_ok = check_c_padding(cipher_text);
}
/*
2.3.3 Decapsulation
4. Compute e = Decode(C0,Γ′). If e = ⊥, set e = s and b = 0.
*/
// Decrypt
byte ret_decrypt = (byte)decrypt(error_vector, sk, cipher_text);
/*
2.3.3 Decapsulation
6. If C′1 6= C1, set e = s and b = 0.
*/
short m;
m = ret_decrypt;
m -= 1;
m >>= 8;
m &= 0xff;
/*
2.3.3 Decapsulation
2. Set b = 1.
*/
preimage[0] = (byte)(m & 1);
for (i = 0; i < SYS_N / 8; i++)
{
preimage[1 + i] = (byte)((~m & sk[i + 40 + IRR_BYTES + COND_BYTES]) | (m & error_vector[i]));
}
for (i = 0; i < SYND_BYTES; i++)
{
preimage[1 + SYS_N / 8 + i] = cipher_text[i];
}
/*
2.3.3 Decapsulation
7. Compute K = H(b,e,C)
*/
// = SHAKE256(preimage, 32)
Xof digest = new SHAKEDigest(256);
digest.update(preimage, 0, preimage.length); // input
digest.doFinal(key, 0, key.length); // output
// clear outputs (set to all 1's) if padding bits are not all zero
if (usePadding)
{
mask = (byte)padding_ok;
for (i = 0; i < key.length; i++)
{
key[i] |= mask;
}
return padding_ok;
}
return 0;
}
// 2.2.4 Decoding subroutine
// Niederreiter decryption with the Berlekamp decoder
private int decrypt(byte[] error_vector, byte[] sk, byte[] cipher_text)
{
short[] g = new short[SYS_T + 1];
short[] L = new short[SYS_N];
short[] s = new short[SYS_T * 2];
short[] s_cmp = new short[SYS_T * 2];
short[] locator = new short[SYS_T + 1];
short[] images = new short[SYS_N];
short t;
byte[] r = new byte[SYS_N / 8];
/*
2.2.4 Decoding subroutine
1. Extend C0 to v = (C0,0,...,0) ∈Fn2 by appending k zeros.
*/
for (int i = 0; i < SYND_BYTES; i++)
{
r[i] = cipher_text[i];
}
for (int i = SYND_BYTES; i < SYS_N / 8; i++)
{
r[i] = 0;
}
for (int i = 0; i < SYS_T; i++)
{
g[i] = Utils.load_gf(sk, 40 + i * 2, GFMASK);
}
g[SYS_T] = 1;
/*
2.2.4 Decoding subroutine
2. Find the unique codeword c in the Goppa code defined by Γ′ that is at distance ≤t
from v. If there is no such codeword, return ⊥.
*/
// support gen
benes.support_gen(L, sk);
// compute syndrome
synd(s, g, L, r);
// compute minimal polynomial of syndrome
bm(locator, s);
// calculate the root for locator in L
root(images, locator, L);
/*
2.2.4 Decoding subroutine
3. Set e = v + c.
*/
for (int i = 0; i < SYS_N / 8; i++)
{
error_vector[i] = 0;
}
int w = 0;
for (int i = 0; i < SYS_N; i++)
{
t = (short)(gf.gf_iszero(images[i]) & 1);
error_vector[i / 8] |= t << (i % 8);
w += t;
}
// compute syndrome
synd(s_cmp, g, L, error_vector);
/*
2.2.4 Decoding subroutine
4. If wt(e) = t and C0 = He, return e. Otherwise return ⊥
*/
int check;
check = w;
check ^= SYS_T;
for (int i = 0; i < SYS_T * 2; i++)
{
check |= s[i] ^ s_cmp[i];
}
check -= 1;
check >>= 15;
check &= 0x1;
if ((check ^ 1) != 0)
{
//TODO throw exception?
// System.out.println("Decryption failed");
}
return check ^ 1;
}
private static int min(short a, int b)
{
if (a < b)
{
return a;
}
return b;
}
/* the Berlekamp-Massey algorithm */
/* input: s, sequence of field elements */
/* output: out, minimal polynomial of s */
private void bm(short[] out, short[] s)
{
short N = 0;
short L = 0;
short mle;
short mne;
short[] T = new short[SYS_T + 1];
short[] C = new short[SYS_T + 1];
short[] B = new short[SYS_T + 1];
short b = 1, d, f;
//
for (int i = 0; i < SYS_T + 1; i++)
{
C[i] = B[i] = 0;
}
B[1] = C[0] = 1;
//
for (N = 0; N < 2 * SYS_T; N++)
{
int d_ext = 0;
for (int i = 0; i <= min(N, SYS_T); i++)
{
d_ext ^= gf.gf_mul_ext(C[i], s[N - i]);
}
d = gf.gf_reduce(d_ext);
mne = d;
mne -= 1;
mne >>= 15;
mne &= 0x1;
mne -= 1;
mle = N;
mle -= 2 * L;
mle >>= 15;
mle &= 0x1;
mle -= 1;
mle &= mne;
for (int i = 0; i <= SYS_T; i++)
{
T[i] = C[i];
}
f = gf.gf_frac(b, d);
for (int i = 0; i <= SYS_T; i++)
{
C[i] ^= gf.gf_mul(f, B[i]) & mne;
}
L = (short)((L & ~mle) | ((N + 1 - L) & mle));
for (int i = SYS_T - 1; i >= 0; i--)
{
B[i + 1] = (short)((B[i] & ~mle) | (T[i] & mle));
}
B[0] = 0;
b = (short)((b & ~mle) | (d & mle));
}
for (int i = 0; i <= SYS_T; i++)
{
out[i] = C[SYS_T - i];
}
}
/* input: Goppa polynomial f, support L, received word r */
/* output: out, the syndrome of length 2t */
private void synd(short[] out, short[] f, short[] L, byte[] r)
{
{
short c = (short)(r[0] & 1);
short L_i = L[0];
short e = eval(f, L_i);
short e_inv = gf.gf_inv(gf.gf_sq(e));
short c_div_e = (short)(e_inv & -c);
out[0] = c_div_e;
for (int j = 1; j < 2 * SYS_T; j++)
{
c_div_e = gf.gf_mul(c_div_e, L_i);
out[j] = c_div_e;
}
}
for (int i = 1; i < SYS_N; i++)
{
short c = (short)((r[i / 8] >> (i % 8)) & 1);
short L_i = L[i];
short e = eval(f, L_i);
short e_inv = gf.gf_inv(gf.gf_sq(e));
short c_div_e = gf.gf_mul(e_inv, c);
out[0] ^= c_div_e;
for (int j = 1; j < 2 * SYS_T; j++)
{
c_div_e = gf.gf_mul(c_div_e, L_i);
out[j] ^= c_div_e;
}
}
}
private int mov_columns(byte[][] mat, short[] pi, long[] pivots)
{
int i, j, k, s, block_idx, row, tail;
long[] buf = new long[64],
ctz_list = new long[32];
long t, d, mask, one = 1;
byte[] tmp = new byte[9]; // Used for padding
row = PK_NROWS - 32;
block_idx = row / 8;
tail = row % 8;
// extract the 32x64 matrix
if (usePadding)
{
for (i = 0; i < 32; i++)
{
for (j = 0; j < 9; j++)
{
tmp[j] = mat[row + i][block_idx + j];
}
for (j = 0; j < 8; j++)
{
tmp[j] = (byte)(((tmp[j] & 0xff) >> tail) | (tmp[j + 1] << (8 - tail)));
}
buf[i] = Utils.load8(tmp, 0);
}
}
else
{
for (i = 0; i < 32; i++)
{
buf[i] = Utils.load8(mat[row + i], block_idx);
}
}
// compute the column indices of pivots by Gaussian elimination.
// the indices are stored in ctz_list
pivots[0] = 0;
for (i = 0; i < 32; i++)
{
t = buf[i];
for (j = i + 1; j < 32; j++)
{
t |= buf[j];
}
if (t == 0)
{
return -1; // return if buf is not full rank
}
ctz_list[i] = s = ctz(t);
pivots[0] |= one << ctz_list[i];
for (j = i + 1; j < 32; j++)
{
mask = (buf[i] >> s) & 1;
mask -= 1;
buf[i] ^= buf[j] & mask;
}
for (j = i + 1; j < 32; j++)
{
mask = (buf[j] >> s) & 1;
mask = -mask;
buf[j] ^= buf[i] & mask;
}
}
// updating permutation
for (j = 0; j < 32; j++)
{
for (k = j + 1; k < 64; k++)
{
d = pi[row + j] ^ pi[row + k];
d &= same_mask64((short)k, (short)ctz_list[j]);
pi[row + j] ^= d;
pi[row + k] ^= d;
}
}
// moving columns of mat according to the column indices of pivots
for (i = 0; i < PK_NROWS; i++)
{
if (usePadding)
{
for (k = 0; k < 9; k++)
{
tmp[k] = mat[i][block_idx + k];
}
for (k = 0; k < 8; k++)
{
tmp[k] = (byte)(((tmp[k] & 0xff) >> tail) | (tmp[k + 1] << (8 - tail)));
}
t = Utils.load8(tmp, 0);
}
else
{
t = Utils.load8(mat[i], block_idx);
}
for (j = 0; j < 32; j++)
{
d = t >> j;
d ^= t >> ctz_list[j];
d &= 1;
t ^= d << ctz_list[j];
t ^= d << j;
}
if (usePadding)
{
Utils.store8(tmp, 0, t);
mat[i][block_idx + 8] = (byte)(((mat[i][block_idx + 8] & 0xff) >>> tail << tail) | ((tmp[7] & 0xff) >>> (8 - tail)));
mat[i][block_idx + 0] = (byte)(((tmp[0] & 0xff) << tail) | ((mat[i][block_idx] & 0xff) << (8 - tail) >>> (8 - tail)));
for (k = 7; k >= 1; k--)
{
mat[i][block_idx + k] = (byte)(((tmp[k] & 0xff) << tail) | ((tmp[k - 1] & 0xff) >>> (8 - tail)));
}
}
else
{
Utils.store8(mat[i], block_idx, t);
}
}
return 0;
}
/* return number of trailing zeros of the non-zero input in */
private static int ctz(long in)
{
// int i, b, m = 0, r = 0;
//
// for (i = 0; i < 64; i++)
// {
// b = (int)((in >> i) & 1);
// m |= b;
// r += (m ^ 1) & (b ^ 1);
// }
//
// return r;
long m1 = 0x0101010101010101L, r8 = 0, x = ~in;
for (int i = 0; i < 8; ++i)
{
m1 &= x >>> i;
r8 += m1;
}
long m8 = r8 & 0x0808080808080808L;
m8 |= m8 >>> 1;
m8 |= m8 >>> 2;
long r = r8;
r8 >>>= 8;
r += r8 & m8;
for (int i = 2; i < 8; ++i)
{
m8 &= m8 >>> 8;
r8 >>>= 8;
r += r8 & m8;
}
return (int)r & 0xFF;
}
/* Used in mov columns*/
static private long same_mask64(short x, short y)
{
long mask;
mask = x ^ y;
mask -= 1;
mask >>>= 63;
mask = -mask;
return mask;
}
/* Used in error vector generation*/
private static byte same_mask32(short x, short y)
{
int mask;
mask = x ^ y;
mask -= 1;
mask >>>= 31;
mask = -mask;
return (byte)(mask & 0xFF);
}
private static void layer(short[] p, byte[] out, int ptrIndex, int s, int n)
{
int i, j;
int stride = 1 << s;
int index = 0;
int d, m;
for (i = 0; i < n; i += stride * 2)
{
for (j = 0; j < stride; j++)
{
d = p[i + j] ^ p[i + j + stride];
m = (out[ptrIndex + (index >> 3)] >> (index & 7)) & 1;
m = -m;
d &= m;
p[i + j] ^= d;
p[i + j + stride] ^= d;
index++;
}
}
}
private static void controlbitsfrompermutation(byte[] out, short[] pi, long w, long n)
{
int[] temp = new int[(int)(2 * n)];
short[] pi_test = new short[(int)n];
short diff;
int i;
int ptrIndex;
while (true)
{
for (i = 0; i < (((2 * w - 1) * n / 2) + 7) / 8; i++)
{
out[i] = 0;
}
cbrecursion(out, 0, 1, pi, 0, w, n, temp);
// check for correctness
for (i = 0; i < n; i++)
{
pi_test[i] = (short)i;
}
ptrIndex = 0;
for (i = 0; i < w; i++)
{
layer(pi_test, out, ptrIndex, i, (int)n);
ptrIndex += n >> 4;
}
for (i = (int)(w - 2); i >= 0; i--)
{
layer(pi_test, out, ptrIndex, i, (int)n);
ptrIndex += n >> 4;
}
diff = 0;
for (i = 0; i < n; i++)
{
diff |= pi[i] ^ pi_test[i];
}
if (diff == 0)
{
break;
}
}
}
static short get_q_short(int[] temp, int q_index)
{
int temp_index = q_index / 2;
if (q_index % 2 == 0)
{
return (short)temp[temp_index];
}
else
{
return (short)((temp[temp_index] & 0xffff0000) >> 16);
}
}
static void cbrecursion(byte[] out, long pos, long step, short[] pi, int qIndex, long w, long n, int[] temp)
{
long x, i, j, k;
if (w == 1)
{
out[(int)(pos >> 3)] ^= get_q_short(temp, qIndex) << (pos & 7);
return;
}
if (pi != null)
{
for (x = 0; x < n; ++x)
{
temp[(int)x] = ((pi[(int)x] ^ 1) << 16) | pi[(int)(x ^ 1)];
}
}
else
{
for (x = 0; x < n; ++x)
{
temp[(int)x] = ((get_q_short(temp, (int)(qIndex + x)) ^ 1) << 16) | get_q_short(temp, (int)((qIndex) + (x ^ 1)));
}
}
sort32(temp, 0, (int)n); /* A = (id<<16)+pibar */
for (x = 0; x < n; ++x)
{
int Ax = temp[(int)x];
int px = Ax & 0xffff;
int cx = px;
if (x < cx)
{
cx = (int)x;
}
temp[(int)(n + x)] = (px << 16) | cx;
}
for (x = 0; x < n; ++x)
{
temp[(int)x] = (int)((temp[(int)x] << 16) | x); /* A = (pibar<<16)+id */
}
sort32(temp, 0, (int)n); /* A = (id<<16)+pibar^-1 */
for (x = 0; x < n; ++x)
{
temp[(int)x] = (temp[(int)x] << 16) + (temp[(int)(n + x)] >> 16); /* A = (pibar^(-1)<<16)+pibar */
}
sort32(temp, 0, (int)n); /* A = (id<<16)+pibar^2 */
if (w <= 10)
{
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] = ((temp[(int)x] & 0xffff) << 10) | (temp[(int)(n + x)] & 0x3ff);
}
for (i = 1; i < w - 1; ++i)
{
/* B = (p<<10)+c */
for (x = 0; x < n; ++x)
{
temp[(int)x] = (int)(((temp[(int)(n + x)] & ~0x3ff) << 6) | x); /* A = (p<<16)+id */
}
sort32(temp, 0, (int)n); /* A = (id<<16)+p^{-1} */
for (x = 0; x < n; ++x)
{
temp[(int)x] = (temp[(int)x] << 20) | temp[(int)(n + x)]; /* A = (p^{-1}<<20)+(p<<10)+c */
}
sort32(temp, 0, (int)n); /* A = (id<<20)+(pp<<10)+cp */
for (x = 0; x < n; ++x)
{
int ppcpx = temp[(int)x] & 0xfffff;
int ppcx = (temp[(int)x] & 0xffc00) | (temp[(int)(n + x)] & 0x3ff);
if (ppcpx < ppcx)
{
ppcx = ppcpx;
}
temp[(int)(n + x)] = ppcx;
}
}
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] &= 0x3ff;
}
}
else
{
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] = (temp[(int)x] << 16) | (temp[(int)(n + x)] & 0xffff);
}
for (i = 1; i < w - 1; ++i)
{
/* B = (p<<16)+c */
for (x = 0; x < n; ++x)
{
temp[(int)x] = (int)((temp[(int)(n + x)] & ~0xffff) | x);
}
sort32(temp, 0, (int)n); /* A = (id<<16)+p^(-1) */
for (x = 0; x < n; ++x)
{
temp[(int)x] = (temp[(int)x] << 16) | (temp[(int)(n + x)] & 0xffff);
}
/* A = p^(-1)<<16+c */
if (i < w - 2)
{
//if loop 1 B
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] = (temp[(int)x] & ~0xffff) | (temp[(int)(n + x)] >> 16);
}
/* B = (p^(-1)<<16)+p */
sort32(temp, (int)n, (int)(n * 2)); /* B = (id<<16)+p^(-2) */
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] = (temp[(int)(n + x)] << 16) | (temp[(int)x] & 0xffff);
}
/* B = (p^(-2)<<16)+c */
}
sort32(temp, 0, (int)n);
/* A = id<<16+cp */
for (x = 0; x < n; ++x)
{
int cpx = (temp[(int)(n + x)] & ~0xffff) | (temp[(int)x] & 0xffff);
if (cpx < temp[(int)(n + x)])
{
temp[(int)(n + x)] = cpx;
}
}
}
for (x = 0; x < n; ++x)
{
temp[(int)(n + x)] &= 0xffff;
}
}
if (pi != null)
{
for (x = 0; x < n; ++x)
{
temp[(int)x] = (int)((((int)pi[(int)x]) << 16) + x);
}
}
else
{
for (x = 0; x < n; ++x)
{
temp[(int)x] = (int)(((get_q_short(temp, (int)(qIndex + x))) << 16) + x);
}
}
sort32(temp, 0, (int)n); /* A = (id<<16)+pi^(-1) */
for (j = 0; j < n / 2; ++j)
{
long _x = 2 * j;
int fj = temp[(int)(n + _x)] & 1; /* f[j] */
int Fx = (int)(_x + fj); /* F[x] */
int Fx1 = Fx ^ 1; /* F[x+1] */
out[(int)(pos >> 3)] ^= fj << (pos & 7);
pos += step;
temp[(int)(n + _x)] = (temp[(int)_x] << 16) | Fx;
temp[(int)(n + _x + 1)] = (temp[(int)(_x + 1)] << 16) | Fx1;
}
/* B = (pi^(-1)<<16)+F */
sort32(temp, (int)n, (int)(n * 2)); /* B = (id<<16)+F(pi) */
pos += (2 * w - 3) * step * (n / 2);
for (k = 0; k < n / 2; ++k)
{
long y = 2 * k;
int lk = temp[(int)(n + y)] & 1; /* l[k] */
int Ly = (int)(y + lk); /* L[y] */
int Ly1 = Ly ^ 1; /* L[y+1] */
out[(int)(pos >> 3)] ^= lk << (pos & 7);
pos += step;
temp[(int)y] = (Ly << 16) | (temp[(int)(n + y)] & 0xffff);
temp[(int)(y + 1)] = (Ly1 << 16) | (temp[(int)(n + y + 1)] & 0xffff);
}
/* A = (L<<16)+F(pi) */
sort32(temp, 0, (int)n); /* A = (id<<16)+F(pi(L)) = (id<<16)+M */
pos -= (2 * w - 2) * step * (n / 2);
short[] q = new short[(int)n * 4];
for (i = 0/*n + n/4*/; i < n * 2; i++)
{
q[(int)(i * 2 + 0)] = (short)temp[(int)i];
q[(int)(i * 2 + 1)] = (short)((temp[(int)i] & 0xffff0000) >> 16);
}
for (j = 0; j < n / 2; ++j)
{
q[(int)j] = (short)((temp[(int)(2 * j)] & 0xffff) >>> 1);
q[(int)(j + n / 2)] = (short)((temp[(int)(2 * j + 1)] & 0xffff) >>> 1);
}
for (i = 0; i < n / 2; i++)
{
temp[(int)(n + n / 4 + i)] = (((int)q[(int)(i * 2 + 1)]) << 16) | ((int)q[(int)(i * 2)]);
}
cbrecursion(out, pos, step * 2, null, (int)(n + n / 4) * 2, w - 1, n / 2, temp);
cbrecursion(out, pos + step, step * 2, null, (int)((n + n / 4) * 2 + n / 2), w - 1, n / 2, temp);
}
private int pk_gen(byte[] pk, byte[] sk, int[] perm, short[] pi, long[] pivots)
{
short[] g = new short[SYS_T + 1]; // Goppa polynomial
int i, j, k;
g[SYS_T] = 1;
for (i = 0; i < SYS_T; i++)
{
g[i] = Utils.load_gf(sk, 40 + i * 2, GFMASK);
}
// Create buffer
long[] buf = new long[1 << GFBITS];
for (i = 0; i < (1 << GFBITS); i++)
{
buf[i] = perm[i];
buf[i] <<= 31;
buf[i] |= i;
buf[i] &= 0x7fffffffffffffffL; // getting rid of signed longs
}
// sort32 the buffer
// FieldOrdering 2.4.2 - 3. sort32 the pairs (ai,i) in lexicographic order to obtain pairs (aπ(i),π(i))
// where π is a permutation of {0,1,...,q −1}
sort64(buf, 0, buf.length);
// FieldOrdering 2.4.2 - 2. If a0,a1,...,aq−1 are not distinct, return ⊥.
for (i = 1; i < (1 << GFBITS); i++)
{
if ((buf[i - 1] >> 31) == (buf[i] >> 31))
{
// System.out.println("FAIL 1");
return -1;
}
}
// FieldOrdering 2.4.2 - 4.
short[] L = new short[SYS_N];
for (i = 0; i < (1 << GFBITS); i++)
{
pi[i] = (short)(buf[i] & GFMASK);
}
for (i = 0; i < SYS_N; i++)
{
L[i] = Utils.bitrev(pi[i], GFBITS);
}
// filling matrix
short[] inv = new short[SYS_N];
root(inv, g, L);
for (i = 0; i < SYS_N; i++)
{
inv[i] = gf.gf_inv(inv[i]);
}
byte[][] mat = new byte[PK_NROWS][(SYS_N / 8)];
byte b;
for (i = 0; i < PK_NROWS; i++)
{
for (j = 0; j < SYS_N / 8; j++)
{
mat[i][j] = 0;
}
}
for (i = 0; i < SYS_T; i++)
{
for (j = 0; j < SYS_N; j += 8)
{
for (k = 0; k < GFBITS; k++)
{
b = (byte)((inv[j + 7] >>> k) & 1);
b <<= 1;
b |= (inv[j + 6] >>> k) & 1;
b <<= 1;
b |= (inv[j + 5] >>> k) & 1;
b <<= 1;
b |= (inv[j + 4] >>> k) & 1;
b <<= 1;
b |= (inv[j + 3] >>> k) & 1;
b <<= 1;
b |= (inv[j + 2] >>> k) & 1;
b <<= 1;
b |= (inv[j + 1] >>> k) & 1;
b <<= 1;
b |= (inv[j + 0] >>> k) & 1;
mat[i * GFBITS + k][j / 8] = b;
}
}
for (j = 0; j < SYS_N; j++)
{
inv[j] = gf.gf_mul(inv[j], L[j]);
}
}
// gaussian elimination
int row, c;
byte mask;
for (row = 0; row < PK_NROWS; row++)
{
i = row >>> 3;
j = row & 7;
if (usePivots)
{
if (row == PK_NROWS - 32)
{
if (mov_columns(mat, pi, pivots) != 0)
{
// System.out.println("failed mov column!");
return -1;
}
}
}
for (k = row + 1; k < PK_NROWS; k++)
{
mask = (byte)(mat[row][i] ^ mat[k][i]);
mask >>= j;
mask &= 1;
mask = (byte)-mask;
for (c = 0; c < SYS_N / 8; c++)
{
mat[row][c] ^= mat[k][c] & mask;
}
}
// 7. Compute (T,cn−k−μ+1,...,cn−k,Γ′) = MatGen(Γ). If this fails, set δ = δ′ and
// restart the algorithm.
if (((mat[row][i] >> j) & 1) == 0) // return if not systematic
{
// System.out.println("FAIL 2\n");
return -1;
}
for (k = 0; k < PK_NROWS; k++)
{
if (k != row)
{
mask = (byte)(mat[k][i] >> j);
mask &= 1;
mask = (byte)-mask;
for (c = 0; c < SYS_N / 8; c++)
{
mat[k][c] ^= mat[row][c] & mask;
}
}
}
}
// FieldOrdering 2.4.2 - 5. Output (α1,α2,...,αq)
if (pk != null)
{
if (usePadding)
{
int pk_index = 0, tail = PK_NROWS % 8;
if (tail == 0)
{
System.arraycopy(mat[i], (PK_NROWS - 1) / 8, pk, pk_index, SYS_N / 8);
pk_index += SYS_N / 8;
}
else
{
for (i = 0; i < PK_NROWS; i++)
{
for (j = (PK_NROWS - 1) / 8; j < SYS_N / 8 - 1; j++)
{
pk[pk_index++] = (byte)(((mat[i][j] & 0xff) >>> tail) | (mat[i][j + 1] << (8 - tail)));
}
pk[pk_index++] = (byte)((mat[i][j] & 0xff) >>> tail);
}
}
}
else
{
// for (i = 0; i < PK_NROWS; i++)
// {
// k = 0;
// for (j = 0; j < (((SYS_N - PK_NROWS) + 7) / 8); j++)
// {
// pk[i * (((SYS_N - PK_NROWS) + 7) / 8) + k] = mat[i][j + PK_NROWS / 8];
// k++;
// }
// }
int count = (SYS_N - PK_NROWS + 7) / 8;
for (i = 0; i < PK_NROWS; i++)
{
System.arraycopy(mat[i], PK_NROWS / 8, pk, count * i, count);
}
}
}
return 0;
}
private short eval(short[] f, short a)
{
short r = f[SYS_T];
for (int i = SYS_T - 1; i >= 0; i--)
{
r = (short)(gf.gf_mul(r, a) ^ f[i]);
}
return r;
}
private void root(short[] out, short[] f, short[] L)
{
for (int i = 0; i < SYS_N; i++)
{
out[i] = eval(f, L[i]);
}
}
private int generate_irr_poly(short[] field)
{
// Irreducible 2.4.1 - 2. Define β = β0 + β1y + ···+ βt−1yt−1 ∈Fq[y]/F(y).
// generating poly
short[][] m = new short[SYS_T + 1][SYS_T];
// filling matrix
{
m[0][0] = 1;
// for (int i = 1; i < SYS_T; i++)
// {
// m[0][i] = 0;
// }
System.arraycopy(field, 0, m[1], 0, SYS_T);
int[] temp = new int[SYS_T * 2 - 1];
int j = 2;
while (j < SYS_T)
{
gf.gf_sqr_poly(SYS_T, poly, m[j], m[j >>> 1], temp);
gf.gf_mul_poly(SYS_T, poly, m[j + 1], m[j], field, temp);
j += 2;
}
if (j == SYS_T)
{
gf.gf_sqr_poly(SYS_T, poly, m[j], m[j >>> 1], temp);
}
}
// Irreducible 2.4.1 - 3. Compute the minimal polynomial g of β over Fq. (By definition g is monic and irre-
// ducible, and g(β) = 0.)
// gaussian
for (int j = 0; j < SYS_T; j++)
{
for (int k = j + 1; k < SYS_T; k++)
{
short mask = gf.gf_iszero(m[j][j]);
for (int c = j; c < SYS_T + 1; c++)
{
m[c][j] ^= (short)(m[c][k] & mask);
}
}
// Irreducible 2.4.1 - 4. Return g if g has degree t. Otherwise return ⊥
if (m[j][j] == 0) // return if not systematic
{
// System.out.println("FAILED GENERATING IRR POLY");
return -1;
}
short inv = gf.gf_inv(m[j][j]);
for (int c = j; c < SYS_T + 1; c++)
{
m[c][j] = gf.gf_mul(m[c][j], inv);
}
for (int k = 0; k < SYS_T; k++)
{
if (k != j)
{
short t = m[j][k];
for (int c = j; c <= SYS_T; c++)
{
m[c][k] ^= gf.gf_mul(m[c][j], t);
}
}
}
}
System.arraycopy(m[SYS_T], 0, field, 0, SYS_T);
return 0;
}
/* check if the padding bits of pk are all zero */
int check_pk_padding(byte[] pk)
{
byte b;
int i, ret;
b = 0;
for (i = 0; i < PK_NROWS; i++)
{
b |= pk[i * PK_ROW_BYTES + PK_ROW_BYTES - 1];
}
b = (byte)((b & 0xff) >>> (PK_NCOLS % 8));
b -= 1;
b = (byte)((b & 0xff) >>> 7);
ret = b;
return ret - 1;
}
/* check if the padding bits of c are all zero */
int check_c_padding(byte[] c)
{
byte b;
int ret;
b = (byte)((c[SYND_BYTES - 1] & 0xff) >>> (PK_NROWS % 8));
b -= 1;
b = (byte)((b & 0xff) >>> 7);
ret = b;
return ret - 1;
}
public int getDefaultSessionKeySize()
{
return defaultKeySize;
}
private static void sort32(int[] temp, int from, int to)
{
int top,p,q,r,i;
int n = to - from;
if (n < 2) return;
top = 1;
while (top < n - top) top += top;
for (p = top;p > 0;p >>>= 1)
{
for (i = 0;i < n - p;++i)
{
if ((i & p) == 0)
{
int ab = temp[from + i + p] ^ temp[from + i];
int c = temp[from + i + p] - temp[from + i];
c ^= ab & (c ^ temp[from + i + p]);
c >>= 31;
c &= ab;
temp[from + i] ^= c;
temp[from + i + p] ^= c;
}
}
i = 0;
for (q = top;q > p;q >>>= 1)
{
for (;i < n - q;++i)
{
if ((i & p) == 0)
{
int a = temp[from + i + p];
for (r = q;r > p;r >>>= 1)
{
int ab = temp[from + i + r] ^ a;
int c = temp[from + i + r] - a;
c ^= ab & (c ^ temp[from + i + r]);
c >>= 31;
c &= ab;
a ^= c;
temp[from + i + r] ^= c;
}
temp[from + i + p] = a;
}
}
}
}
}
private static void sort64(long[] temp, int from, int to)
{
int top,p,q,r,i;
int n = to - from;
if (n < 2) return;
top = 1;
while (top < n - top) top += top;
for (p = top;p > 0;p >>>= 1)
{
for (i = 0;i < n - p;++i)
{
if ((i & p) == 0)
{
long c = temp[from + i + p] - temp[from + i];
c >>>= 63;
c = -c;
c &= temp[from + i] ^ temp[from + i + p];
temp[from + i] ^= c;
temp[from + i + p] ^= c;
}
}
i = 0;
for (q = top;q > p;q >>>= 1)
{
for (;i < n - q;++i)
{
if ((i & p) == 0)
{
long a = temp[from + i + p];
for (r = q;r > p;r >>>= 1)
{
long c = temp[from + i + r] - a;
c >>>= 63;
c = -c;
c &= a ^ temp[from + i + r];
a ^= c;
temp[from + i + r] ^= c;
}
temp[from + i + p] = a;
}
}
}
}
}
}