Fix ARM CE byte ordering, expand C/C++ API, and harden build
ARM backends: fix round key byte-swap on little-endian (vrev32q_u8), rewrite decrypt to pre-process middle keys with InvMixColumns, fix GHASH PMULL reflect and reduction ordering. API: add nonce/IV-generating convenience overloads for CTR, CBC, and GCM (library generates and prepends nonce, appends tag). Add C API for IV/nonce generation. Rename error codes (TINYAES_OK, Result::Ok, Result::AuthenticationFailed, etc.). Build: add MinGW GCC AVX-512 debug alignment fix, harden bench/fuzz CMake targets (warnings-as-errors, linker hardening), align with tinysha CMake conventions. Add README. Tests: expand coverage for nonce-generating API overloads, add NIST GCM test vectors, improve fuzz target differential testing.
This commit is contained in:
@@ -13,8 +13,20 @@ set_target_properties(tinyaes_benchmarks PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
)
|
||||
|
||||
# Warning flags for benchmarks
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
target_compile_options(tinyaes_benchmarks PRIVATE -Wall -Wextra -Wpedantic)
|
||||
target_compile_options(tinyaes_benchmarks PRIVATE -Wall -Wextra -Wpedantic -Werror)
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
set_property(TARGET tinyaes_benchmarks APPEND_STRING PROPERTY LINK_FLAGS
|
||||
" -Wl,-z,relro,-z,now -Wl,-z,noexecstack")
|
||||
endif()
|
||||
# macOS: -bind_at_load is deprecated on modern macOS (eager binding is the default)
|
||||
if(MINGW)
|
||||
set_property(TARGET tinyaes_benchmarks APPEND_STRING PROPERTY LINK_FLAGS
|
||||
" -Wl,--nxcompat -Wl,--dynamicbase -Wl,--high-entropy-va")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
target_compile_options(tinyaes_benchmarks PRIVATE /W4)
|
||||
target_compile_options(tinyaes_benchmarks PRIVATE /W4 /WX)
|
||||
set_property(TARGET tinyaes_benchmarks APPEND_STRING PROPERTY LINK_FLAGS
|
||||
" /DYNAMICBASE /NXCOMPAT /HIGHENTROPYVA")
|
||||
endif()
|
||||
|
||||
@@ -2,84 +2,307 @@
|
||||
// BSD 3-Clause License (see LICENSE)
|
||||
|
||||
#include "tinyaes.h"
|
||||
#include "internal/aes_impl.h"
|
||||
#include "internal/ghash.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
static constexpr size_t BENCH_SIZE = 1024 * 1024; // 1 MiB
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
|
||||
#define HAS_RDTSC 1
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
static inline uint64_t rdtsc()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
#else
|
||||
static inline uint64_t rdtsc()
|
||||
{
|
||||
uint32_t lo, hi;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return (static_cast<uint64_t>(hi) << 32) | lo;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define HAS_RDTSC 0
|
||||
#endif
|
||||
|
||||
static constexpr int ITERATIONS = 100;
|
||||
|
||||
template<typename Fn> static double bench(const char *name, Fn &&fn)
|
||||
struct BenchResult
|
||||
{
|
||||
double mib_per_sec;
|
||||
double cycles_per_byte; // 0 if rdtsc not available
|
||||
};
|
||||
|
||||
template<typename Fn> static BenchResult bench(const char *name, size_t data_size, Fn &&fn)
|
||||
{
|
||||
// Warmup
|
||||
fn();
|
||||
|
||||
#if HAS_RDTSC
|
||||
uint64_t tsc_start = rdtsc();
|
||||
#endif
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < ITERATIONS; ++i)
|
||||
{
|
||||
fn();
|
||||
}
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
#if HAS_RDTSC
|
||||
uint64_t tsc_end = rdtsc();
|
||||
#endif
|
||||
|
||||
double ms = std::chrono::duration<double, std::milli>(end - start).count();
|
||||
double total_bytes = static_cast<double>(BENCH_SIZE) * ITERATIONS;
|
||||
double total_bytes = static_cast<double>(data_size) * ITERATIONS;
|
||||
double mib_per_sec = (total_bytes / (1024.0 * 1024.0)) / (ms / 1000.0);
|
||||
std::printf("%-30s %8.2f MiB/s (%6.2f ms total)\n", name, mib_per_sec, ms);
|
||||
return mib_per_sec;
|
||||
|
||||
double cpb = 0.0;
|
||||
#if HAS_RDTSC
|
||||
if (total_bytes > 0)
|
||||
cpb = static_cast<double>(tsc_end - tsc_start) / total_bytes;
|
||||
#endif
|
||||
|
||||
if (cpb > 0)
|
||||
std::printf(" %-40s %8.2f MiB/s %6.2f c/B (%6.2f ms)\n", name, mib_per_sec, cpb, ms);
|
||||
else
|
||||
std::printf(" %-40s %8.2f MiB/s (%6.2f ms)\n", name, mib_per_sec, ms);
|
||||
|
||||
return {mib_per_sec, cpb};
|
||||
}
|
||||
|
||||
// Bench a single AES encrypt_block function at low level
|
||||
template<typename EncFn>
|
||||
static void bench_block_fn(const char *name, tinyaes::internal::key_expand_fn key_expand, EncFn enc_fn,
|
||||
const uint8_t *key, size_t key_len)
|
||||
{
|
||||
int rounds = tinyaes::internal::aes_rounds(key_len);
|
||||
uint32_t rk[tinyaes::internal::AES_MAX_RK_WORDS];
|
||||
key_expand(key, key_len, rk);
|
||||
|
||||
uint8_t block[16] = {0};
|
||||
bench(
|
||||
name, 16,
|
||||
[&]()
|
||||
{
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
enc_fn(rk, rounds, block, block);
|
||||
});
|
||||
}
|
||||
|
||||
static void run_mode_benchmarks()
|
||||
{
|
||||
static const size_t sizes[] = {16, 256, 1024, 4096, 16384, 65536, 262144, 1048576};
|
||||
static const size_t key_lens[] = {16, 24, 32};
|
||||
static const char *key_names[] = {"128", "192", "256"};
|
||||
|
||||
for (size_t ki = 0; ki < 3; ++ki)
|
||||
{
|
||||
size_t kl = key_lens[ki];
|
||||
std::vector<uint8_t> key(kl, 0x42);
|
||||
std::vector<uint8_t> iv_16(16, 0x01);
|
||||
std::vector<uint8_t> iv_12(12, 0x01);
|
||||
std::vector<uint8_t> aad = {0xAA, 0xBB, 0xCC, 0xDD};
|
||||
|
||||
std::printf("\n=== AES-%s ===\n", key_names[ki]);
|
||||
|
||||
for (size_t sz : sizes)
|
||||
{
|
||||
// ECB needs block-aligned
|
||||
size_t ecb_sz = (sz / 16) * 16;
|
||||
if (ecb_sz == 0)
|
||||
ecb_sz = 16;
|
||||
std::vector<uint8_t> pt_ecb(ecb_sz, 0x55);
|
||||
std::vector<uint8_t> pt(sz, 0x55);
|
||||
|
||||
char label[128];
|
||||
|
||||
std::printf("\n--- %zu bytes ---\n", sz);
|
||||
|
||||
std::snprintf(label, sizeof(label), "ECB-%s encrypt", key_names[ki]);
|
||||
bench(
|
||||
label, ecb_sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ecb_encrypt(key, pt_ecb, ct);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "CBC-%s encrypt", key_names[ki]);
|
||||
bench(
|
||||
label, ecb_sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::cbc_encrypt(key, iv_16, pt_ecb, ct);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "CTR-%s encrypt", key_names[ki]);
|
||||
bench(
|
||||
label, sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ctr_crypt(key, iv_16, pt, ct);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "GCM-%s encrypt", key_names[ki]);
|
||||
bench(
|
||||
label, sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> ct, tag;
|
||||
tinyaes::gcm_encrypt(key, iv_12, aad, pt, ct, tag);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "GCM-%s decrypt", key_names[ki]);
|
||||
// Pre-encrypt for decrypt bench
|
||||
std::vector<uint8_t> ct_pre, tag_pre;
|
||||
tinyaes::gcm_encrypt(key, iv_12, aad, pt, ct_pre, tag_pre);
|
||||
bench(
|
||||
label, sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> dec;
|
||||
tinyaes::gcm_decrypt(key, iv_12, aad, ct_pre, tag_pre, dec);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void run_gcm_aad_benchmarks()
|
||||
{
|
||||
std::printf("\n=== GCM AAD Variation (AES-128, 4096B plaintext) ===\n");
|
||||
|
||||
std::vector<uint8_t> key(16, 0x42);
|
||||
std::vector<uint8_t> iv(12, 0x01);
|
||||
std::vector<uint8_t> pt(4096, 0x55);
|
||||
|
||||
static const size_t aad_sizes[] = {0, 16, 64, 256, 1024, 4096};
|
||||
for (size_t aad_sz : aad_sizes)
|
||||
{
|
||||
std::vector<uint8_t> aad(aad_sz, 0xAA);
|
||||
char label[128];
|
||||
std::snprintf(label, sizeof(label), "GCM-128 AAD=%zuB", aad_sz);
|
||||
bench(
|
||||
label, 4096 + aad_sz,
|
||||
[&]()
|
||||
{
|
||||
std::vector<uint8_t> ct, tag;
|
||||
tinyaes::gcm_encrypt(key, iv, aad, pt, ct, tag);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void run_key_expansion_benchmarks()
|
||||
{
|
||||
std::printf("\n=== Key Expansion ===\n");
|
||||
|
||||
static const size_t key_lens[] = {16, 24, 32};
|
||||
static const char *key_names[] = {"128", "192", "256"};
|
||||
|
||||
auto key_expand = tinyaes::internal::get_key_expand();
|
||||
|
||||
for (size_t ki = 0; ki < 3; ++ki)
|
||||
{
|
||||
std::vector<uint8_t> key(key_lens[ki], 0x42);
|
||||
uint32_t rk[tinyaes::internal::AES_MAX_RK_WORDS];
|
||||
|
||||
char label[128];
|
||||
std::snprintf(label, sizeof(label), "Key expand AES-%s (dispatched)", key_names[ki]);
|
||||
bench(
|
||||
label, key_lens[ki],
|
||||
[&]()
|
||||
{
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
key_expand(key.data(), key.size(), rk);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "Key expand AES-%s (portable)", key_names[ki]);
|
||||
bench(
|
||||
label, key_lens[ki],
|
||||
[&]()
|
||||
{
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
tinyaes::internal::aes_key_expand_portable(key.data(), key.size(), rk);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void run_ghash_benchmarks()
|
||||
{
|
||||
std::printf("\n=== GHASH ===\n");
|
||||
|
||||
// Compute H from a fixed key
|
||||
uint8_t key_raw[16] = {0x42};
|
||||
uint32_t rk[tinyaes::internal::AES_MAX_RK_WORDS];
|
||||
tinyaes::internal::aes_key_expand_portable(key_raw, 16, rk);
|
||||
uint8_t H[16] = {0};
|
||||
uint8_t zero[16] = {0};
|
||||
tinyaes::internal::aes_encrypt_block_portable(rk, 10, zero, H);
|
||||
|
||||
auto ghash_dispatch = tinyaes::internal::get_ghash();
|
||||
|
||||
static const size_t sizes[] = {16, 256, 1024, 4096, 65536};
|
||||
for (size_t sz : sizes)
|
||||
{
|
||||
std::vector<uint8_t> data(sz, 0x55);
|
||||
|
||||
char label[128];
|
||||
std::snprintf(label, sizeof(label), "GHASH %zuB (dispatched)", sz);
|
||||
bench(
|
||||
label, sz,
|
||||
[&]()
|
||||
{
|
||||
uint8_t Y[16] = {0};
|
||||
ghash_dispatch(H, data.data(), data.size(), Y);
|
||||
});
|
||||
|
||||
std::snprintf(label, sizeof(label), "GHASH %zuB (portable)", sz);
|
||||
bench(
|
||||
label, sz,
|
||||
[&]()
|
||||
{
|
||||
uint8_t Y[16] = {0};
|
||||
tinyaes::internal::ghash_portable(H, data.data(), data.size(), Y);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void run_backend_comparison()
|
||||
{
|
||||
std::printf("\n=== Backend Comparison (single block encrypt x1000) ===\n");
|
||||
|
||||
uint8_t key128[16] = {0x42};
|
||||
uint8_t key256[32] = {0x42};
|
||||
|
||||
// Portable
|
||||
bench_block_fn("AES-128 portable", tinyaes::internal::aes_key_expand_portable,
|
||||
tinyaes::internal::aes_encrypt_block_portable, key128, 16);
|
||||
bench_block_fn("AES-256 portable", tinyaes::internal::aes_key_expand_portable,
|
||||
tinyaes::internal::aes_encrypt_block_portable, key256, 32);
|
||||
|
||||
// Dispatched (may be AES-NI, ARM CE, or portable)
|
||||
bench_block_fn("AES-128 dispatched", tinyaes::internal::get_key_expand(), tinyaes::internal::get_encrypt_block(),
|
||||
key128, 16);
|
||||
bench_block_fn("AES-256 dispatched", tinyaes::internal::get_key_expand(), tinyaes::internal::get_encrypt_block(),
|
||||
key256, 32);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::vector<uint8_t> key_128(16, 0x42);
|
||||
std::vector<uint8_t> key_256(32, 0x42);
|
||||
std::vector<uint8_t> iv_16(16, 0x01);
|
||||
std::vector<uint8_t> iv_12(12, 0x01);
|
||||
std::vector<uint8_t> plaintext(BENCH_SIZE, 0x55);
|
||||
std::vector<uint8_t> aad = {0xAA, 0xBB, 0xCC, 0xDD};
|
||||
|
||||
std::printf("TinyAES Benchmarks (%zu bytes x %d iterations)\n", BENCH_SIZE, ITERATIONS);
|
||||
std::printf("TinyAES Benchmarks (%d iterations per measurement)\n", ITERATIONS);
|
||||
std::printf("================================================================\n");
|
||||
|
||||
// ECB
|
||||
bench("ECB-128 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ecb_encrypt(key_128, plaintext, ct);
|
||||
});
|
||||
bench("ECB-256 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ecb_encrypt(key_256, plaintext, ct);
|
||||
});
|
||||
|
||||
// CBC
|
||||
bench("CBC-128 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::cbc_encrypt(key_128, iv_16, plaintext, ct);
|
||||
});
|
||||
bench("CBC-256 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::cbc_encrypt(key_256, iv_16, plaintext, ct);
|
||||
});
|
||||
|
||||
// CTR
|
||||
bench("CTR-128 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ctr_crypt(key_128, iv_16, plaintext, ct);
|
||||
});
|
||||
bench("CTR-256 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct;
|
||||
tinyaes::ctr_crypt(key_256, iv_16, plaintext, ct);
|
||||
});
|
||||
|
||||
// GCM
|
||||
bench("GCM-128 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct, tag;
|
||||
tinyaes::gcm_encrypt(key_128, iv_12, aad, plaintext, ct, tag);
|
||||
});
|
||||
bench("GCM-256 encrypt", [&]() {
|
||||
std::vector<uint8_t> ct, tag;
|
||||
tinyaes::gcm_encrypt(key_256, iv_12, aad, plaintext, ct, tag);
|
||||
});
|
||||
run_backend_comparison();
|
||||
run_key_expansion_benchmarks();
|
||||
run_ghash_benchmarks();
|
||||
run_mode_benchmarks();
|
||||
run_gcm_aad_benchmarks();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user