mirror of
https://github.com/Next-Flip/Momentum-Firmware.git
synced 2026-04-25 03:29:58 -07:00
350 lines
11 KiB
C
350 lines
11 KiB
C
/*
|
|
---------------------------------------------------------------------------
|
|
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
|
|
|
|
The redistribution and use of this software (with or without changes)
|
|
is allowed without the payment of fees or royalties provided that:
|
|
|
|
source code distributions include the above copyright notice, this
|
|
list of conditions and the following disclaimer;
|
|
|
|
binary distributions include the above copyright notice, this list
|
|
of conditions and the following disclaimer in their documentation.
|
|
|
|
This software is provided 'as is' with no explicit or implied warranties
|
|
in respect of its operation, including, but not limited to, correctness
|
|
and fitness for purpose.
|
|
---------------------------------------------------------------------------
|
|
Issue Date: 20/12/2007
|
|
*/
|
|
|
|
#include "aesopt.h"
|
|
#include "aestab.h"
|
|
|
|
#if defined(USE_INTEL_AES_IF_PRESENT)
|
|
#include "aes_ni.h"
|
|
#else
|
|
/* map names here to provide the external API ('name' -> 'aes_name') */
|
|
#define aes_xi(x) aes_##x
|
|
#endif
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define si(y, x, k, c) (s(y, c) = word_in(x, c) ^ (k)[c])
|
|
#define so(y, x, c) word_out(y, c, s(x, c))
|
|
|
|
#if defined(ARRAYS)
|
|
#define locals(y, x) x[4], y[4]
|
|
#else
|
|
#define locals(y, x) x##0, x##1, x##2, x##3, y##0, y##1, y##2, y##3
|
|
#endif
|
|
|
|
#define l_copy(y, x) \
|
|
s(y, 0) = s(x, 0); \
|
|
s(y, 1) = s(x, 1); \
|
|
s(y, 2) = s(x, 2); \
|
|
s(y, 3) = s(x, 3);
|
|
#define state_in(y, x, k) \
|
|
si(y, x, k, 0); \
|
|
si(y, x, k, 1); \
|
|
si(y, x, k, 2); \
|
|
si(y, x, k, 3)
|
|
#define state_out(y, x) \
|
|
so(y, x, 0); \
|
|
so(y, x, 1); \
|
|
so(y, x, 2); \
|
|
so(y, x, 3)
|
|
#define round(rm, y, x, k) \
|
|
rm(y, x, k, 0); \
|
|
rm(y, x, k, 1); \
|
|
rm(y, x, k, 2); \
|
|
rm(y, x, k, 3)
|
|
|
|
#if(FUNCS_IN_C & ENCRYPTION_IN_C)
|
|
|
|
/* Visual C++ .Net v7.1 provides the fastest encryption code when using
|
|
Pentium optimiation with small code but this is poor for decryption
|
|
so we need to control this with the following VC++ pragmas
|
|
*/
|
|
|
|
#if defined(_MSC_VER) && !defined(_WIN64) && !defined(__clang__)
|
|
#pragma optimize("s", on)
|
|
#endif
|
|
|
|
/* Given the column (c) of the output state variable, the following
|
|
macros give the input state variables which are needed in its
|
|
computation for each row (r) of the state. All the alternative
|
|
macros give the same end values but expand into different ways
|
|
of calculating these values. In particular the complex macro
|
|
used for dynamically variable block sizes is designed to expand
|
|
to a compile time constant whenever possible but will expand to
|
|
conditional clauses on some branches (I am grateful to Frank
|
|
Yellin for this construction)
|
|
*/
|
|
|
|
#define fwd_var(x, r, c) \
|
|
(r == 0 ? (c == 0 ? s(x, 0) : \
|
|
c == 1 ? s(x, 1) : \
|
|
c == 2 ? s(x, 2) : \
|
|
s(x, 3)) : \
|
|
r == 1 ? (c == 0 ? s(x, 1) : \
|
|
c == 1 ? s(x, 2) : \
|
|
c == 2 ? s(x, 3) : \
|
|
s(x, 0)) : \
|
|
r == 2 ? (c == 0 ? s(x, 2) : \
|
|
c == 1 ? s(x, 3) : \
|
|
c == 2 ? s(x, 0) : \
|
|
s(x, 1)) : \
|
|
(c == 0 ? s(x, 3) : \
|
|
c == 1 ? s(x, 0) : \
|
|
c == 2 ? s(x, 1) : \
|
|
s(x, 2)))
|
|
|
|
#if defined(FT4_SET)
|
|
#undef dec_fmvars
|
|
#define fwd_rnd(y, x, k, c) (s(y, c) = (k)[c] ^ four_tables(x, t_use(f, n), fwd_var, rf1, c))
|
|
#elif defined(FT1_SET)
|
|
#undef dec_fmvars
|
|
#define fwd_rnd(y, x, k, c) (s(y, c) = (k)[c] ^ one_table(x, upr, t_use(f, n), fwd_var, rf1, c))
|
|
#else
|
|
#define fwd_rnd(y, x, k, c) \
|
|
(s(y, c) = (k)[c] ^ fwd_mcol(no_table(x, t_use(s, box), fwd_var, rf1, c)))
|
|
#endif
|
|
|
|
#if defined(FL4_SET)
|
|
#define fwd_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ four_tables(x, t_use(f, l), fwd_var, rf1, c))
|
|
#elif defined(FL1_SET)
|
|
#define fwd_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ one_table(x, ups, t_use(f, l), fwd_var, rf1, c))
|
|
#else
|
|
#define fwd_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ no_table(x, t_use(s, box), fwd_var, rf1, c))
|
|
#endif
|
|
|
|
AES_RETURN
|
|
aes_xi(encrypt)(const unsigned char* in, unsigned char* out, const aes_encrypt_ctx cx[1]) {
|
|
uint32_t locals(b0, b1);
|
|
const uint32_t* kp = NULL;
|
|
#if defined(dec_fmvars)
|
|
dec_fmvars; /* declare variables for fwd_mcol() if needed */
|
|
#endif
|
|
|
|
if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE &&
|
|
cx->inf.b[0] != 14 * AES_BLOCK_SIZE)
|
|
return EXIT_FAILURE;
|
|
|
|
kp = cx->ks;
|
|
state_in(b0, in, kp);
|
|
|
|
#if(ENC_UNROLL == FULL)
|
|
|
|
switch(cx->inf.b[0]) {
|
|
case 14 * AES_BLOCK_SIZE:
|
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
|
|
kp += 2 * N_COLS;
|
|
//-fallthrough
|
|
case 12 * AES_BLOCK_SIZE:
|
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
|
|
kp += 2 * N_COLS;
|
|
//-fallthrough
|
|
case 10 * AES_BLOCK_SIZE:
|
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
|
|
round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
|
|
round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
|
|
round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
|
|
round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
|
|
round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
|
|
round(fwd_lrnd, b0, b1, kp + 10 * N_COLS);
|
|
//-fallthrough
|
|
}
|
|
|
|
#else
|
|
|
|
#if(ENC_UNROLL == PARTIAL)
|
|
{
|
|
uint32_t rnd;
|
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) {
|
|
kp += N_COLS;
|
|
round(fwd_rnd, b1, b0, kp);
|
|
kp += N_COLS;
|
|
round(fwd_rnd, b0, b1, kp);
|
|
}
|
|
kp += N_COLS;
|
|
round(fwd_rnd, b1, b0, kp);
|
|
#else
|
|
{
|
|
uint32_t rnd;
|
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) {
|
|
kp += N_COLS;
|
|
round(fwd_rnd, b1, b0, kp);
|
|
l_copy(b0, b1);
|
|
}
|
|
#endif
|
|
kp += N_COLS;
|
|
round(fwd_lrnd, b0, b1, kp);
|
|
}
|
|
#endif
|
|
|
|
state_out(out, b0);
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
#endif
|
|
|
|
#if(FUNCS_IN_C & DECRYPTION_IN_C)
|
|
|
|
/* Visual C++ .Net v7.1 provides the fastest encryption code when using
|
|
Pentium optimiation with small code but this is poor for decryption
|
|
so we need to control this with the following VC++ pragmas
|
|
*/
|
|
|
|
#if defined(_MSC_VER) && !defined(_WIN64) && !defined(__clang__)
|
|
#pragma optimize("t", on)
|
|
#endif
|
|
|
|
/* Given the column (c) of the output state variable, the following
|
|
macros give the input state variables which are needed in its
|
|
computation for each row (r) of the state. All the alternative
|
|
macros give the same end values but expand into different ways
|
|
of calculating these values. In particular the complex macro
|
|
used for dynamically variable block sizes is designed to expand
|
|
to a compile time constant whenever possible but will expand to
|
|
conditional clauses on some branches (I am grateful to Frank
|
|
Yellin for this construction)
|
|
*/
|
|
|
|
#define inv_var(x, r, c) \
|
|
(r == 0 ? (c == 0 ? s(x, 0) : \
|
|
c == 1 ? s(x, 1) : \
|
|
c == 2 ? s(x, 2) : \
|
|
s(x, 3)) : \
|
|
r == 1 ? (c == 0 ? s(x, 3) : \
|
|
c == 1 ? s(x, 0) : \
|
|
c == 2 ? s(x, 1) : \
|
|
s(x, 2)) : \
|
|
r == 2 ? (c == 0 ? s(x, 2) : \
|
|
c == 1 ? s(x, 3) : \
|
|
c == 2 ? s(x, 0) : \
|
|
s(x, 1)) : \
|
|
(c == 0 ? s(x, 1) : \
|
|
c == 1 ? s(x, 2) : \
|
|
c == 2 ? s(x, 3) : \
|
|
s(x, 0)))
|
|
|
|
#if defined(IT4_SET)
|
|
#undef dec_imvars
|
|
#define inv_rnd(y, x, k, c) (s(y, c) = (k)[c] ^ four_tables(x, t_use(i, n), inv_var, rf1, c))
|
|
#elif defined(IT1_SET)
|
|
#undef dec_imvars
|
|
#define inv_rnd(y, x, k, c) (s(y, c) = (k)[c] ^ one_table(x, upr, t_use(i, n), inv_var, rf1, c))
|
|
#else
|
|
#define inv_rnd(y, x, k, c) \
|
|
(s(y, c) = inv_mcol((k)[c] ^ no_table(x, t_use(i, box), inv_var, rf1, c)))
|
|
#endif
|
|
|
|
#if defined(IL4_SET)
|
|
#define inv_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ four_tables(x, t_use(i, l), inv_var, rf1, c))
|
|
#elif defined(IL1_SET)
|
|
#define inv_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ one_table(x, ups, t_use(i, l), inv_var, rf1, c))
|
|
#else
|
|
#define inv_lrnd(y, x, k, c) (s(y, c) = (k)[c] ^ no_table(x, t_use(i, box), inv_var, rf1, c))
|
|
#endif
|
|
|
|
/* This code can work with the decryption key schedule in the */
|
|
/* order that is used for encrytpion (where the 1st decryption */
|
|
/* round key is at the high end ot the schedule) or with a key */
|
|
/* schedule that has been reversed to put the 1st decryption */
|
|
/* round key at the low end of the schedule in memory (when */
|
|
/* AES_REV_DKS is defined) */
|
|
|
|
#ifdef AES_REV_DKS
|
|
#define key_ofs 0
|
|
#define rnd_key(n) (kp + n * N_COLS)
|
|
#else
|
|
#define key_ofs 1
|
|
#define rnd_key(n) (kp - n * N_COLS)
|
|
#endif
|
|
|
|
AES_RETURN
|
|
aes_xi(decrypt)(const unsigned char* in, unsigned char* out, const aes_decrypt_ctx cx[1]) {
|
|
uint32_t locals(b0, b1);
|
|
#if defined(dec_imvars)
|
|
dec_imvars; /* declare variables for inv_mcol() if needed */
|
|
#endif
|
|
const uint32_t* kp = NULL;
|
|
|
|
if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE &&
|
|
cx->inf.b[0] != 14 * AES_BLOCK_SIZE)
|
|
return EXIT_FAILURE;
|
|
|
|
kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
|
|
state_in(b0, in, kp);
|
|
|
|
#if(DEC_UNROLL == FULL)
|
|
|
|
kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
|
|
switch(cx->inf.b[0]) {
|
|
case 14 * AES_BLOCK_SIZE:
|
|
round(inv_rnd, b1, b0, rnd_key(-13));
|
|
round(inv_rnd, b0, b1, rnd_key(-12));
|
|
//-fallthrough
|
|
case 12 * AES_BLOCK_SIZE:
|
|
round(inv_rnd, b1, b0, rnd_key(-11));
|
|
round(inv_rnd, b0, b1, rnd_key(-10));
|
|
//-fallthrough
|
|
case 10 * AES_BLOCK_SIZE:
|
|
round(inv_rnd, b1, b0, rnd_key(-9));
|
|
round(inv_rnd, b0, b1, rnd_key(-8));
|
|
round(inv_rnd, b1, b0, rnd_key(-7));
|
|
round(inv_rnd, b0, b1, rnd_key(-6));
|
|
round(inv_rnd, b1, b0, rnd_key(-5));
|
|
round(inv_rnd, b0, b1, rnd_key(-4));
|
|
round(inv_rnd, b1, b0, rnd_key(-3));
|
|
round(inv_rnd, b0, b1, rnd_key(-2));
|
|
round(inv_rnd, b1, b0, rnd_key(-1));
|
|
round(inv_lrnd, b0, b1, rnd_key(0));
|
|
//-fallthrough
|
|
}
|
|
|
|
#else
|
|
|
|
#if(DEC_UNROLL == PARTIAL)
|
|
{
|
|
uint32_t rnd;
|
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) {
|
|
kp = rnd_key(1);
|
|
round(inv_rnd, b1, b0, kp);
|
|
kp = rnd_key(1);
|
|
round(inv_rnd, b0, b1, kp);
|
|
}
|
|
kp = rnd_key(1);
|
|
round(inv_rnd, b1, b0, kp);
|
|
#else
|
|
{
|
|
uint32_t rnd;
|
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) {
|
|
kp = rnd_key(1);
|
|
round(inv_rnd, b1, b0, kp);
|
|
l_copy(b0, b1);
|
|
}
|
|
#endif
|
|
kp = rnd_key(1);
|
|
round(inv_lrnd, b0, b1, kp);
|
|
}
|
|
#endif
|
|
|
|
state_out(out, b0);
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__cplusplus)
|
|
}
|
|
#endif
|