From a071426bc8ec23e8bf9ea1561342340787c9cfb1 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 14 Nov 2025 09:43:59 -0800 Subject: [PATCH] Migrate wolfAsyncCrypt repo into wolfSSL proper --- .github/workflows/async.yml | 2 +- .gitignore | 10 - .wolfssl_known_macro_extras | 41 +- Makefile.am | 1 + README-async.md | 558 ++ SCRIPTS-LIST | 2 - async-check.sh | 104 - autogen.sh | 12 +- configure.ac | 9 - examples/async/README.md | 5 +- wolfcrypt/src/async.c | 1158 ++++ wolfcrypt/src/port/cavium/README.md | 265 +- wolfcrypt/src/port/cavium/README_Octeon.md | 236 +- wolfcrypt/src/port/cavium/cavium_nitrox.c | 1234 ++++ wolfcrypt/src/port/intel/README.md | 410 +- wolfcrypt/src/port/intel/quickassist.c | 5092 +++++++++++++++++ wolfcrypt/src/port/intel/quickassist_mem.c | 1131 ++++ wolfssl/wolfcrypt/async.h | 442 ++ wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h | 217 + wolfssl/wolfcrypt/port/intel/quickassist.h | 520 ++ .../wolfcrypt/port/intel/quickassist_mem.h | 64 + 21 files changed, 11352 insertions(+), 161 deletions(-) create mode 100644 README-async.md delete mode 100755 async-check.sh create mode 100644 wolfcrypt/src/async.c create mode 100644 wolfcrypt/src/port/cavium/cavium_nitrox.c create mode 100644 wolfcrypt/src/port/intel/quickassist.c create mode 100644 wolfcrypt/src/port/intel/quickassist_mem.c create mode 100644 wolfssl/wolfcrypt/async.h create mode 100644 wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h create mode 100644 wolfssl/wolfcrypt/port/intel/quickassist.h create mode 100644 wolfssl/wolfcrypt/port/intel/quickassist_mem.h diff --git a/.github/workflows/async.yml b/.github/workflows/async.yml index 567d7b693..168450a95 100644 --- a/.github/workflows/async.yml +++ b/.github/workflows/async.yml @@ -33,7 +33,7 @@ jobs: - name: Test wolfSSL async run: | - ./async-check.sh install + ./autogen.sh ./configure ${{ matrix.config }} make check diff --git a/.gitignore b/.gitignore index 2b91e077e..1898fc31f 100644 --- a/.gitignore +++ b/.gitignore @@ -50,16 +50,6 @@ wolfcrypt_first.c wolfcrypt_last.c selftest.c fipsv2.c -src/async.c -wolfssl/async.h -wolfcrypt/src/async.c -wolfssl/wolfcrypt/async.h -wolfcrypt/src/port/intel/quickassist.c -wolfcrypt/src/port/intel/quickassist_mem.c -wolfcrypt/src/port/cavium/cavium_nitrox.c -wolfssl/wolfcrypt/port/intel/quickassist.h -wolfssl/wolfcrypt/port/intel/quickassist_mem.h -wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h ctaocrypt/benchmark/benchmark ctaocrypt/test/testctaocrypt wolfcrypt/benchmark/benchmark diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 1c7ed51d2..bf80567dc 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -180,6 +180,7 @@ CONFIG_WOLFTPM_EXAMPLE_NAME_ESPRESSIF CONFIG_X86 CONV_WITH_DIV CPA_CY_API_VERSION_NUM_MAJOR +CPA_CY_API_VERSION_NUM_MINOR CPU_MIMXRT1176DVMAA_cm7 CPU_MK82FN256VLL15 CRLDP_VALIDATE_DATA @@ -412,6 +413,7 @@ NO_PKCS7_STREAM NO_POLY1305_ASM NO_PUBLIC_CCM_SET_NONCE NO_PUBLIC_GCM_SET_IV +NO_QAT_RNG NO_RESUME_SUITE_CHECK NO_RNG NO_RNG_MUTEX @@ -475,6 +477,7 @@ RTC_ALARMSUBSECONDMASK_ALL RTE_CMSIS_RTOS_RTX RTOS_MODULE_NET_AVAIL RTPLATFORM +SAL_IOMMU_CODE SA_INTERRUPT SCEKEY_INSTALLED SHA256_MANY_REGISTERS @@ -565,8 +568,10 @@ USE_CONTESTMUTEX USE_ECDSA_KEYSZ_HASH_ALGO USE_FULL_ASSERT USE_HAL_DRIVER +USE_LAC_SESSION_FOR_STRUCT_OFFSET USE_NXP_LTC USE_NXP_MMCAU +USE_QAE_STATIC_MEM USE_QAE_THREAD_LS USE_SECRET_CALLBACK USE_STSAFE_RNG_SEED @@ -580,24 +585,26 @@ WC_AES_BS_WORD_SIZE WC_AES_GCM_DEC_AUTH_EARLY WC_ASN_HASH_SHA256 WC_ASN_RUNTIME_DATE_CHECK_CONTROL -WC_ASYNC_ENABLE_3DES -WC_ASYNC_ENABLE_AES -WC_ASYNC_ENABLE_ARC4 -WC_ASYNC_ENABLE_DH -WC_ASYNC_ENABLE_ECC WC_ASYNC_ENABLE_ECC_KEYGEN -WC_ASYNC_ENABLE_HMAC -WC_ASYNC_ENABLE_MD5 -WC_ASYNC_ENABLE_RSA -WC_ASYNC_ENABLE_RSA_KEYGEN -WC_ASYNC_ENABLE_SHA -WC_ASYNC_ENABLE_SHA224 -WC_ASYNC_ENABLE_SHA256 -WC_ASYNC_ENABLE_SHA3 -WC_ASYNC_ENABLE_SHA384 -WC_ASYNC_ENABLE_SHA512 +WC_ASYNC_NO_3DES +WC_ASYNC_NO_AES +WC_ASYNC_NO_ARC4 WC_ASYNC_NO_CRYPT -WC_ASYNC_NO_HASH +WC_ASYNC_NO_DH +WC_ASYNC_NO_ECC +WC_ASYNC_NO_HMAC +WC_ASYNC_NO_MD5 +WC_ASYNC_NO_PKI +WC_ASYNC_NO_RNG +WC_ASYNC_NO_RSA +WC_ASYNC_NO_RSA_KEYGEN +WC_ASYNC_NO_SHA +WC_ASYNC_NO_SHA224 +WC_ASYNC_NO_SHA256 +WC_ASYNC_NO_SHA3 +WC_ASYNC_NO_SHA384 +WC_ASYNC_NO_SHA512 +WC_ASYNC_THREAD_BIND WC_CACHE_RESISTANT_BASE64_TABLE WC_DILITHIUM_CACHE_PRIV_VECTORS WC_DILITHIUM_CACHE_PUB_VECTORS @@ -607,6 +614,7 @@ WC_ECC_NONBLOCK_ONLY WC_FLAG_DONT_USE_AESNI WC_FORCE_LINUXKM_FORTIFY_SOURCE WC_LMS_FULL_HASH +WC_NO_ASYNC_SLEEP WC_NO_RNG_SIMPLE WC_NO_STATIC_ASSERT WC_PKCS11_FIND_WITH_ID_ONLY @@ -977,6 +985,7 @@ __BCPLUSPLUS__ __BIG_ENDIAN__ __BORLANDC__ __CCRX__ +__CC_ARM __COMPILER_VER__ __COUNTER__ __CYGWIN__ diff --git a/Makefile.am b/Makefile.am index 3fa836759..6635fde15 100644 --- a/Makefile.am +++ b/Makefile.am @@ -148,6 +148,7 @@ EXTRA_DIST+= valgrind-bash.supp EXTRA_DIST+= fips-hash.sh EXTRA_DIST+= gencertbuf.pl EXTRA_DIST+= README.md +EXTRA_DIST+= README-async.md EXTRA_DIST+= README EXTRA_DIST+= ChangeLog.md EXTRA_DIST+= LICENSING diff --git a/README-async.md b/README-async.md new file mode 100644 index 000000000..774506688 --- /dev/null +++ b/README-async.md @@ -0,0 +1,558 @@ +# wolfSSL / wolfCrypt Asynchronous Support + +The asynchronous code was previously maintained at https://github.com/wolfSSL/wolfAsyncCrypt but was integrated into wolfSSL proper starting with release v5.8.4. + +This feature is enabled using: +`./configure --enable-asynccrypt` or `#define WOLFSSL_ASYNC_CRYPT`. + +If async crypto is enabled but no hardware backend is enabled or if `WOLFSSL_ASYNC_CRYPT_SW` is defined, a software backend using wolfCrypt is used instead. This software backend can simulate periodic hardware delays using the macro `WOLF_ASYNC_SW_SKIP_MOD`, which is on by default if `DEBUG_WOLFSSL` is defined. + +## Design + +Each crypto algorithm has its own `WC_ASYNC_DEV` structure, which contains a `WOLF_EVENT`, local crypto context and local hardware context. + +For SSL/TLS the `WOLF_EVENT` context is the `WOLFSSL*` and the type is `WOLF_EVENT_TYPE_ASYNC_WOLFSSL`. For wolfCrypt operations the `WOLF_EVENT` context is the `WC_ASYNC_DEV*` and the type is `WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT`. + +A generic event system has been created using a `WOLF_EVENT` structure when `HAVE_WOLF_EVENT` is defined. The event structure resides in the `WC_ASYNC_DEV`. + +The asynchronous crypto system is modeled after epoll. The implementation uses `wolfSSL_AsyncPoll` or `wolfSSL_CTX_AsyncPoll` to check if any async operations are complete. + +## Hardware Backends + +Supported hardware backends: + +* Intel QuickAssist with QAT 1.6 or QAT 1.7 driver. See README.md in `wolfcrypt/src/port/intel/README.md`. +* Cavium Nitrox III and V. See README.md in `wolfcrypt/src/port/cavium/README.md`. + +## wolfCrypt Backend + +The wolfCrypt backend uses the same API as the hardware backends do. Once an asynchronous operation is initiated with the software backend, subsequent calls to `wolfSSL_AsyncPoll` will call into wolfCrypt to complete the operation. If non-blocking is enabled, for example, for ECC (via `WC_ECC_NONBLOCK`), each `wolfSSL_AsyncPoll` will do a chunk of work for the operation and return, to minimize blocking time. + +## API's + +### ```wolfSSL_AsyncPoll``` +``` +int wolfSSL_AsyncPoll(WOLFSSL* ssl, WOLF_EVENT_FLAG flags); +``` + +Polls the provided WOLFSSL object's reference to the WOLFSSL_CTX's event queue to see if any operations outstanding for the WOLFSSL object are done. Return the completed event count on success. + +### ```wolfSSL_CTX_AsyncPoll``` +``` +int wolfSSL_CTX_AsyncPoll(WOLFSSL_CTX* ctx, WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount) +``` + +Polls the provided WOLFSSL_CTX context event queue to see if any pending events are done. If the `events` argument is provided then a pointer to the `WOLF_EVENT` will be returned up to `maxEvents`. If `eventCount` is provided then the number of events populated will be returned. The `flags` allows for `WOLF_POLL_FLAG_CHECK_HW` to indicate if the crypto backend (i.e. hardware or wolfCrypt, if the software implementation is being used) should be polled again or just return more events. + +### ```wolfAsync_DevOpen``` +``` +int wolfAsync_DevOpen(int *devId); +``` + +Open the async device and returns an `int` device id for it. + +### ```wolfAsync_DevOpenThread``` +``` +int wolfAsync_DevOpenThread(int *devId, void* threadId); +``` +Opens the async device for a specific thread. A crypto instance is assigned and thread affinity set. + +### ```wolfAsync_DevClose``` +``` +void wolfAsync_DevClose(int *devId) +``` + +Closes the async device. + +### ```wolfAsync_DevCopy``` +``` +int wolfAsync_DevCopy(WC_ASYNC_DEV* src, WC_ASYNC_DEV* dst); +``` + +Copy async device memory safe (not pointers to old device). + +### ```wolfAsync_DevCtxInit``` +``` +int wolfAsync_DevCtxInit(WC_ASYNC_DEV* asyncDev, word32 marker, void* heap, int devId); +``` + +Initialize the device context and open the device hardware using the provided `WC_ASYNC_DEV ` pointer, marker and device id (from wolfAsync_DevOpen). + +### ```wolfAsync_DevCtxFree``` +``` +void wolfAsync_DevCtxFree(WC_ASYNC_DEV* asyncDev); +``` + +Closes and free's the device context. + + +### ```wolfAsync_EventInit``` +``` +int wolfAsync_EventInit(WOLF_EVENT* event, enum WOLF_EVENT_TYPE type, void* context, word32 flags); +``` + +Initialize an event structure with provided type and context. Sets the pending flag and the status code to `WC_PENDING_E`. Current flag options are `WC_ASYNC_FLAG_NONE` and `WC_ASYNC_FLAG_CALL_AGAIN` (indicates crypto needs called again after WC_PENDING_E). + +### ```wolfAsync_EventWait ``` +``` +int wolfAsync_EventWait(WOLF_EVENT* event); +``` + +Waits for the provided event to complete. + +### ```wolfAsync_EventPoll``` +``` +int wolfAsync_EventPoll(WOLF_EVENT* event, WOLF_EVENT_FLAG event_flags); +``` + +Polls the provided event to determine if its done. + +### ```wolfAsync_EventPop ``` + +``` +int wolfAsync_EventPop(WOLF_EVENT* event, enum WOLF_EVENT_TYPE event_type); +``` + +This will check the event to see if the event type matches and the event is complete. If it is then the async return code is returned. If not then `WC_NOT_PENDING_E` is returned. + + +### ```wolfAsync_EventQueuePush``` +``` +int wolfAsync_EventQueuePush(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event); +``` + +Pushes an event to the provided event queue and assigns the provided event. + +### ```wolfAsync_EventQueuePoll``` +``` +int wolfAsync_EventQueuePoll(WOLF_EVENT_QUEUE* queue, void* context_filter, + WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG event_flags, int* eventCount); +``` + +Polls all events in the provided event queue. Optionally filters by context. Will return pointers to the done events. + +### ```wc_AsyncHandle``` +``` +int wc_AsyncHandle(WC_ASYNC_DEV* asyncDev, WOLF_EVENT_QUEUE* queue, word32 flags); +``` + +This will push the event inside asyncDev into the provided queue. + +### ```wc_AsyncWait``` +``` +int wc_AsyncWait(int ret, WC_ASYNC_DEV* asyncDev, word32 flags); +``` + +This will wait until the provided asyncDev is done (or error). + +### ```wolfAsync_HardwareStart``` +``` +int wolfAsync_HardwareStart(void); +``` + +If using multiple threads this allows a way to start the hardware before using `wolfAsync_DevOpen` to ensure the memory system is setup. Ensure that `wolfAsync_HardwareStop` is called on exit. Internally there is a start/stop counter, so this can be called multiple times, but stop must also be called the same number of times to shutdown the hardware. + +### ```wolfAsync_HardwareStop``` +``` +void wolfAsync_HardwareStop(void); +``` + +Stops hardware if internal `--start_count == 0`. + +## Examples + +### TLS Server Example + +```c +int devId = INVALID_DEVID; + +ret = wolfAsync_DevOpen(&devId); +if (ret != 0) { + err_sys("Async device open failed"); +} +wolfSSL_CTX_SetDevId(ctx, devId); + +do { + err = 0; /* reset error */ + ret = wolfSSL_accept(ssl, msg, msgSz, &msgSz); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + } +} while (err == WC_PENDING_E); +if (ret != WOLFSSL_SUCCESS) { + err_sys("SSL_connect failed"); +} + +wolfAsync_DevClose(&devId); +``` + +### wolfCrypt RSA Example + +```c +static int devId = INVALID_DEVID; +RsaKey key; + +ret = wolfAsync_DevOpen(&devId); +if (ret != 0) + err_sys("Async device open failed"); + +wc_InitRsaKey_ex(&key, HEAP_HINT, devId); +if (ret == 0) { + ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes); + do { + ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN); + if (ret >= 0) + ret = wc_RsaPublicEncrypt(in, inLen, out, outSz, &key, &rng); + } while (ret == WC_PENDING_E); + wc_FreeRsaKey(&key); +} + +wolfAsync_DevClose(&devId); +``` + +## Build Options + +1. Async multi-threading can be disabled by defining `WC_NO_ASYNC_THREADING`. This only disables internal async threading functions. You are free to use other threading APIs or paradigms in your application. +2. Software benchmarks can be disabled by defining `NO_SW_BENCH`. +3. The `WC_ASYNC_THRESH_NONE` define can be used to disable the cipher thresholds, which are tunable values to determine at what size hardware should be used vs. software. +4. Use `WOLFSSL_DEBUG_MEMORY` and `WOLFSSL_TRACK_MEMORY` to help debug memory issues. QAT also supports `WOLFSSL_DEBUG_MEMORY_PRINT`. + + +## References + +### TLS Client/Server Async Example + +We have a full TLS client/server async examples here: + +* [https://github.com/wolfSSL/wolfssl-examples/blob/master/tls/server-tls-epoll-perf.c](https://github.com/wolfSSL/wolfssl-examples/blob/master/tls/server-tls-epoll-perf.c) + +* [https://github.com/wolfSSL/wolfssl-examples/blob/master/tls/client-tls-perf.c](https://github.com/wolfSSL/wolfssl-examples/blob/master/tls/client-tls-perf.c) + +#### TLS Threaded epoll Example Building + +```sh +git clone git@github.com:wolfSSL/wolfssl-examples.git +cd wolfssl-examples +cd tls +# For QuickAssist: Uncomment QAT lines at top of Makefile +make +``` + +#### TLS Threaded epoll Example Usage + +```sh +$ ./client-tls-perf -? +perf 4.5.0 (NOTE: All files relative to wolfSSL home dir) +-? Help, print this usage +-p Port to listen on, not 0, default 11111 +-v SSL version [0-3], SSLv3(0) - TLS1.2(3)), default 3 +-l Cipher suite list (: delimited) +-c Certificate file, default ../certs/client-cert.pem +-k Key file, default ../certs/client-key.pem +-A Certificate Authority file, default ../certs/ca-cert.pem +-r Resume session +-n Benchmark connections +-N concurrent connections +-R bytes read from client +-W bytes written to client +-B Benchmark written bytes +``` + +#### TLS Threaded epoll Example Output + +```sh +$ sudo ./server-tls-epoll-threaded -n 10000 +$ sudo ./client-tls-perf -n 10000 + +wolfSSL Server Benchmark 16384 bytes + Num Conns : 10000 + Total : 18575.800 ms + Total Avg : 1.858 ms + t/s : 538.335 + Accept : 35848.428 ms + Accept Avg : 3.585 ms + Total Read bytes : 163840000 bytes + Total Write bytes : 163840000 bytes + Read : 402.212 ms ( 388.476 MBps) + Write : 591.469 ms ( 264.173 MBps) +``` + +## Change Log + +### wolfSSL Async Release v5.8.0 (May 01, 2025) +* Includes all wolfSSL v5.8.0 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-580-apr-24-2025 +* Update for libwolfssl_sources.h refactor. (https://github.com/wolfSSL/wolfAsyncCrypt/pull/77) + +### wolfSSL Async Release v5.7.4 (Oct 29, 2024) +* Includes all wolfSSL v5.7.4 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-574-oct-24-2024 + - Plus fixes for asynchronous release - SHA3/HMAC devId (https://github.com/wolfSSL/wolfssl/pull/8119) +* Fix for Intel QuickAssist RSA Key generation exponent result. (https://github.com/wolfSSL/wolfAsyncCrypt/pull/75) + +### wolfSSL Async Release v5.7.0 (Mar 21, 2023) +* Includes all wolfSSL v5.7.0 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-570-mar-20-2024 + +### wolfSSL Async Release v5.6.6 (Dec 20, 2023) +* Includes all wolfSSL v5.6.6 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-566-dec-19-2023 + - Plus wolfSSL PR 7085 fix for invalid `dh_ffdhe_test` for even P when using Intel QuickAssist. https://github.com/wolfSSL/wolfssl/pull/7085 +* Fix for missing `IntelQaFreeFlatBuffer` with DH enabled and no keygen. (broken in PR #71) +* Add return code checking for wc_AsyncThreadCreate_ex in exit_fail section for pthread_attr_destroy. (PR #72) + +### wolfSSL Async Release v5.6.4 (Oct 30, 2023) +* Fixes for support async with crypto or pk callbacks. +* Rename `WC_NOT_PENDING_E` -> `WC_NO_PENDING_E` + +### wolfSSL Async Release v5.6.3 (June 16, 2023) +* Includes all wolfSSL v5.6.3 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-563-jun-16-2023 +* Add sanity check of index devId before accessing array +* Use the blocking call from the async test + +### wolfSSL Async Release v5.6.0 (Mar 29, 2023) +* Includes all wolfSSL v5.6.0 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-560-mar-24-2023 +* wolfAsyncCrypt github repository became public. + +### wolfSSL Async Release v5.5.4 (Dec 22, 2022) + +* Includes all wolfSSL v5.5.4 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-554-dec-21-2022 +* Use the `wc_ecc_shared_secret_ex` version for async test. Requires https://github.com/wolfSSL/wolfssl/pull/5868 + +### wolfSSL Async Release v5.5.3 (Nov 8, 2022) + +* Includes all wolfSSL v5.5.1-v5.5.3 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-553-nov-2-2022 +* Fix for Intel QAT handling of sign R when cofactor is not 1. https://github.com/wolfSSL/wolfssl/pull/5737 and https://github.com/wolfSSL/wolfAsyncCrypt/pull/54 +* Fix check scalar bits for ECC cofactor. https://github.com/wolfSSL/wolfssl/pull/5737 +* Fixes for async sniffer: https://github.com/wolfSSL/wolfssl/pull/5734 + - Handling of packets with multiple TLS messages. + - Multiple back to back sessions. + - Ensure all pending queued packets are finished before ending pcap processing. +* Fix for various tests that do not properly handle `WC_PENDING_E`. https://github.com/wolfSSL/wolfssl/pull/5773 +* Revert "Fix for sniffer to decode out of order packets". https://github.com/wolfSSL/wolfssl/pull/5771 + +### wolfSSL Async Release v5.5.0 (Sep 2, 2022) + +* Includes all wolfSSL v5.5.0 fixes. See ChangeLog.md here: https://github.com/wolfSSL/wolfssl/blob/master/ChangeLog.md#wolfssl-release-550-aug-30-2022 +* Fix for handling return codes from `pthread_attr_destroy`. +* Fix for async session tickets. https://github.com/wolfSSL/wolfssl/pull/5534 +* Fix for async with OCSP non-blocking in ProcessPeerCerts. https://github.com/wolfSSL/wolfssl/pull/5539 + +### wolfSSL Async Release v5.4.0 (July 11, 2022) +* Fix for DH trim of leading zeros to use memmove. +* Fix to print errors to stderr. +* Fix to consistently return the status of failed pthreads funcs. +* Move async device pointer (https://github.com/wolfSSL/wolfssl/pull/5149) + +### wolfSSL Async Release v5.3.0 (May 5, 2022) + +* Added Intel QuickAssist ECC Key Generation acceleration. Specifically point multiplication similar to our `wc_ecc_mulmod_ex2`. +* Fix for building Intel QAT with SP math all +* Fix for `error: unused function 'IntelQaFreeFlatBuffer'`. +* Fix for handling the Koblitz curve param "a", which is all zeros. +* Fixes for scan-build warnings. +* Includes wolfSSL PR https://github.com/wolfSSL/wolfssl/pull/5101 + +### wolfSSL Async Release v5.2.0 (Feb 21, 2022) + +* Adds `WC_NO_ASYNC_SLEEP` option to hide wc_AsyncSleep for platforms that do not need it. +* Fix for async test anonymous union on some platforms (`#pragma anon_unions` and `HAVE_ANONYMOUS_INLINE_AGGREGATES`) +* Fixes for invalidPrintfArgType_sint (cppcheck) and readability-redundant-preprocessor (clang-tidy). + +### wolfSSL Async Release v5.1.0 (Jan 3rd, 2022) + + +### wolfSSL Async Release v5.0.0 (11/01/2021) + +* Fix for issue with QAT AES GCM input buffer already NUMA and not aligned. + +### wolfSSL Async Release v4.8.0 (07/14/2021) + +* Fix for new QAT 1.7 hash types warning. +* Updated Intel QAT 1.7 build instructions. +* Includes possible HAVE_WOLF_BIGINT leaks in PR https://github.com/wolfSSL/wolfssl/pull/4208 + +### wolfSSL Async Release v4.7.0 (02/20/2021) + +* Fix for ARC4 macro typo + +### wolfSSL Async Release v4.6.0 (12/21/2020) + +* Documentation updates. +* Fixes for Cavium Nitrox and Intel Quick Assist (wolfSSL/wolfssl#3577) with TLS v1.3 + +### wolfSSL Async Release v4.4.0 (04/24/2020) + +* Fix for uninitialized `supSha3` warning. +* Fix for use of incorrect devId for wolfSSL_SHA3_256_Init. +* Fix for QAT with Shake256. +* Fix for QAT example `./build.sh`. + +### wolfSSL Async Release v4.3.0 (12/20/2019) + +* Fix for async date override callback issue. +* Updates to Octeon README. + +### wolfSSL Async Release v4.2.0 (10/22/2019) + +* Fix for QuickAssist DH Agree issue with leading zero bytes. +* Fix for QuickAssist AES CBC issue with previous IV on back-to-back operations. +* Updates to QuickAssist README.md for latest QAT v1.7 driver. +* Instructions for Octeon III (CN7300) use. + +### wolfSSL Async Release v4.0.0 (03/25/2019) + +* Fix for building with QuickAssist v1.7 driver (4.4.0-00023) (was missing usdm_drv during configure with check). +* Fix for building async with file system disabled. +* Fix for SHA-3 runtime detection for not supported in hardware. + +### wolfSSL Async Release v3.15.8 (03/01/2019) - Intermediate release + +* Performance improvements for QuickAssist. +* Added new build option `QAT_POLL_RESP_QUOTA` to indicate maximum number of callbacks to service per poll. The default is 0 (all), was previously 8. +* Added useful QAT_DEBUG logging for ECC and DH operations. +* Cleanup whitespace in quickassist.c. +* Enhanced the Cavium macros for `CAVIUM_MAX_PENDING` and `CAVIUM_MAX_POLL` over-ridable. +* Added build-time override for benchmark thread count `WC_ASYNC_BENCH_THREAD_COUNT`. +* Fixes for wolfCrypt test with asynchronous support enabled and `--enable-nginx`. +* Fix to use QAT for ECC sign and verify when SP is enabled and key was initialized with devId. +* Fixes issues with wolfCrypt test and QAT not properly calling "again" for the ECC sign, verify and shared secret. +* Correct the output for multi-threaded benchmark using `-base10` option. +* Fixes to QAT HMAC enables in benchmark tool. +* Adds new `NO_HW_BENCH` to support using multi-threaded software only benchmarks. + +### wolfSSL Async Release v3.15.7 (12/27/2018) + +* Fixes for various analysis warnings (https://github.com/wolfSSL/wolfssl/pull/2003). +* Added QAT v1.7 driver support. +* Added QAT SHA-3 support. +* Added QAT RSA Key Generation support. +* Added support for new usdm memory driver. +* Added support for detecting QAT version and features. +* Added `QAT_ENABLE_RNG` option to disable QAT TRNG/DRBG. +* Added alternate hashing method to cache all updates (avoids using partial updates). + +### wolfSSL Async Release v3.15.5 (11/09/2018) + +* Fixes for various analysis warnings (https://github.com/wolfSSL/wolfssl/pull/1918). +* Fix for QAT possible double free case where `ctx->symCtx` is not trapped. +* Improved QAT debug messages when using `QAT_DEBUG`. +* Fix for QAT RNG to allow zero length. This resolves PSS case where `wc_RNG_GenerateBlock` is called for saltLen == 0. + + +### wolfSSL Async Release v3.15.3 (06/20/2018) + +* Fixes for fsantize tests with Cavium Nitrox V. +* Removed typedef for `CspHandle`, since its already defined. +* Fixes for a couple of fsanitize warnings. +* Fix for possible leak with large request to `IntelQaDrbg`. + +### wolfSSL Async Release v3.14.4 (04/13/2018) + +* Added Nitrox V ECC. +* Added Nitrox V SHA-224 and SHA-3 +* Added Nitrox V AES GCM +* Added Nitrox III SHA2 384/512 support for HMAC. +* Added error code handling for signature check failure. +* Added error translate for `ERR_PKCS_DECRYPT_INCORRECT` +* Added useful `WOLFSSL_NITROX_DEBUG` and show count for pending checks. +* Cleanup of Nitrox symmetric processing to use single while loops. +* Cleanup to only include some headers in cavium_nitrox.c port. +* Fixes for building against Nitrox III and V SDK. +* Updates to README.md with required CFLAGS/LDFLAGS when building without ./configure. +* Fix for Intel QuickAssist HMAC to use software for unsupported hash algorithms. + + +### wolfSSL Async Release v3.12.2 (10/22/2017) + +* Fix for HMAC QAT when block size aligned. The QAT HMAC final without any buffers will fail incorrectly (bug in QAT 1.6). +* Nitrox fix for rename of `ContextType` to `context_type_t`. Updates to Nitrox README.md. +* Workaround for `USE_QAE_THREAD_LS` issue with realloc from a different thread. +* Fix for hashing to allow zero length. This resolves issue with new empty hash tests. +* Fix bug with blocking async where operation was being free'd before completion. Set freeFunc prior to performing operation and check ret code in poll. +* Fix leak with cipher symmetric context close. +* Fix QAT_DEBUG partialState offset. +* Fixes for symmetric context caching. +* Refactored async event initialization so its done prior to making possible async calls. +* Fix to resolve issue with QAT callbacks and multi-threading. +* The cleanup is now handled in polling function and the event is only marked done from the polling thread that matches the originating thread. +* Fix possible mem leak with multiple threads `g_qatEcdhY` and `g_qatEcdhCofactor1`. +* Fix the block polling to use `ret` instead of `status`. +* Change order of `IntelQaDevClear` and setting `event->ret`. +* Fixes to better handle threading with async. +* Refactor of async event state. +* Refactor to initialize event prior to operation (in case it finishes before adding to queue). +* Fixes issues with AES GCM decrypt that can corrupt up to authTag bytes at end of output buffer provided. +* Optimize the Hmac struct to replace keyRaw with ipad. +* Enhancement to allow reuse of the symmetric context for ciphers. +* Fixes for QuickAssist (QAT) multi-threading. Fix to not set return code until after callback cleanup. +* Disable thread binding to specific CPU by default (enabled now with `WC_ASYNC_THREAD_BIND`). +* Added optional define `QAT_USE_POLLING_CHECK ` to have only one thread polling at a time (not required and doesn't improve performance). +* Reduced default QAT_MAX_PENDING for benchmark to 15 (120/num_threads). +* Fix for IntelQaDrbg to handle buffer over 0xFFFF in length. +* Added working DRBG and TRNG implementations for QAT. +* Fix to set callback status after ret and output have been set. Cleanup of the symmetric context. +* Updates to support refactored dynamic types. +* Fix for QAT symmetric to allow NULL authTag. +* Fix GCC 7 build warning with braces. +* Cleanup formatting. + +### wolfSSL Async Release v3.11.0 (05/05/2017) + +* Fixes for Cavium Nitrox III/V. + - Fix with possible crash when using a request Id that is already complete, due to partial submissions not marking event done. + - Improvements to max buffer lengths. + - Fixes to handle various return code patterns with CNN55XX-SDK. + - All Nitrox V tests and benchmarks pass. Bench: RSA 2048-bit public 336,674 ops/sec and private (CRT) 66,524 ops/sec. + +* Intel QuickAssist support and various async fixes/improvements: + - Added support for Intel QuickAssist v1.6 driver with QuickAssist 8950 hardware + - Added QAE memory option to use static memory list instead of dynamic list using `USE_QAE_STATIC_MEM`. + - Added tracking of deallocs and made the values signed long. + - Improved code for wolf header check and expanded to 16-byte alignment for performance improvement with TLS. + - Added ability to override limit dev access parameters and all configurable QAT fields. + - Added async simulator tests for DH, DES3 CBC and AES CBC/GCM. + - Rename AsyncCryptDev to WC_ASYNC_DEV. + - Refactor to move WOLF_EVENT into WC_ASYNC_DEV. + - Refactor the async struct/enum names to use WC_ naming. + - Refactor of the async event->context to use WOLF_EVENT_TYPE_ASYNC_WOLFSSL or WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT to indicate the type of context pointer. + - Added flag to WOLF_EVENT which is used to determine if the async complete should call into operation again or goto next `WC_ASYNC_FLAG_CALL_AGAIN`. + - Cleanup of the "wolfAsync_DevCtxInit" calls to make sure asyncDev is always cleared if invalid device id is used. + - Eliminated WOLFSSL_ASYNC_CRYPT_STATE. + - Removed async event type WOLF_EVENT_TYPE_ASYNC_ANY. + - Enable the random extra delay option by default for simulator as it helps catch bugs. + - Cleanup for async free to also check marker. + - Refactor of the async wait and handle to reduce duplicate code. + - Added async simulator test for RSA make key. + - Added WC_ASYNC_THRESH_NONE to allow bypass of threshold for testing + - Added static numbers for the async sim test types, for easier debugging of the “testDev->type” value. + - Populate heap hint into asyncDev struct. + - Enhancement to cache the asyncDev to improve poll performance. + - Added async threading helpers and new wolfAsync_DevOpenThread. + - Added WC_NO_ASYNC_THREADING to prevent async threading. + - Added new API “wc_AsyncGetNumberOfCpus” for getting number of CPU’s. + - Added new “wc_AsyncThreadYield” API. + - Added WOLF_ASYNC_MAX_THREADS. + - Added new API for wolfAsync_DevCopy. + - Fix to make sure an async init failure sets the deviceId to INVALID_DEVID. + - Fix for building with async threading support on Mac. + - Fix for using simulator so it supports multiple threads. + +* Moved Intel QuickAssist and Cavium Nitrox III/V code into async repo. +* Added new WC_ASYNC_NO_* options to allow disabling of individual async algorithms. + - New defines are: WC_ASYNC_NO_CRYPT, WC_ASYNC_NO_PKI and WC_ASYNC_NO_HASH. + - Additionally each algorithm has a WC_ASYNC_NO_[ALGO] define. + + +### wolfSSL Async Release v3.9.8 (07/25/2016) + +* Asynchronous wolfCrypt and Cavium Nitrox V support. + +### wolfSSL Async Release v3.9.0 (03/04/2016) + +* Initial version with async simulator and README.md. + + +## Support + +For questions email wolfSSL support at support@wolfssl.com diff --git a/SCRIPTS-LIST b/SCRIPTS-LIST index f99b7ce1a..b22794dc2 100644 --- a/SCRIPTS-LIST +++ b/SCRIPTS-LIST @@ -11,8 +11,6 @@ fips-hash.sh - updates the verifyCore hash in fips_test.c fips-check.sh - checks if current wolfSSL version works against FIPS wolfCrypt comment out last line to leave working directory -async-check.sh - internal script for validating wolfSSL Async using the simulator. - gencertbuf.pl - creates certs_test.h, our certs / keys C array for easy non filesystem testing diff --git a/async-check.sh b/async-check.sh deleted file mode 100755 index bb24bded2..000000000 --- a/async-check.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env bash - -# This script creates symbolic links to the required asynchronous -# file for using the asynchronous simulator and make check - -# Fail on any error in script -set -e - -ASYNC_REPO=https://github.com/wolfSSL/wolfAsyncCrypt.git -ASYNC_DIR=${ASYNC_DIR:-wolfAsyncCrypt} - -function Usage() { - printf "Usage: $0 [install|uninstall|test|remove]\n" - printf "\tinstall - get and set up links to wolfAsyncCrypt files\n" - printf "\tuninstall - remove the links to wolfAsyncCrypt\n" - printf "\ttest - install and run 'make check'\n" - printf "\tremove - uninstall and remove wolfAsyncCrypt\n" -} - -function UnlinkFiles() { - unlink ./wolfcrypt/src/async.c - unlink ./wolfssl/wolfcrypt/async.h - unlink ./wolfcrypt/src/port/intel/quickassist.c - unlink ./wolfcrypt/src/port/intel/quickassist_mem.c - unlink ./wolfcrypt/src/port/intel/README.md - unlink ./wolfssl/wolfcrypt/port/intel/quickassist.h - unlink ./wolfssl/wolfcrypt/port/intel/quickassist_mem.h - unlink ./wolfcrypt/src/port/cavium/cavium_nitrox.c - unlink ./wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h - unlink ./wolfcrypt/src/port/cavium/README.md - - # restore original README.md files - git checkout -- wolfcrypt/src/port/cavium/README.md - git checkout -- wolfcrypt/src/port/intel/README.md -} - -function LinkFiles() { - # link files - ln -s -f ../../${ASYNC_DIR}/wolfcrypt/src/async.c ./wolfcrypt/src/async.c - ln -s -f ../../${ASYNC_DIR}/wolfssl/wolfcrypt/async.h ./wolfssl/wolfcrypt/async.h - ln -s -f ../../../../${ASYNC_DIR}/wolfcrypt/src/port/intel/quickassist.c ./wolfcrypt/src/port/intel/quickassist.c - ln -s -f ../../../../${ASYNC_DIR}/wolfcrypt/src/port/intel/quickassist_mem.c ./wolfcrypt/src/port/intel/quickassist_mem.c - ln -s -f ../../../../${ASYNC_DIR}/wolfcrypt/src/port/intel/README.md ./wolfcrypt/src/port/intel/README.md - ln -s -f ../../../../${ASYNC_DIR}/wolfssl/wolfcrypt/port/intel/quickassist.h ./wolfssl/wolfcrypt/port/intel/quickassist.h - ln -s -f ../../../../${ASYNC_DIR}/wolfssl/wolfcrypt/port/intel/quickassist_mem.h ./wolfssl/wolfcrypt/port/intel/quickassist_mem.h - ln -s -f ../../../../${ASYNC_DIR}/wolfcrypt/src/port/cavium/cavium_nitrox.c ./wolfcrypt/src/port/cavium/cavium_nitrox.c - ln -s -f ../../../../${ASYNC_DIR}/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h ./wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h - ln -s -f ../../../../${ASYNC_DIR}/wolfcrypt/src/port/cavium/README.md ./wolfcrypt/src/port/cavium/README.md -} - -function Install() { - if [ -d $ASYNC_DIR ]; - then - echo "Using existing async repo" - else - # make a clone of the wolfAsyncCrypt repository - git clone --depth 1 $ASYNC_REPO $ASYNC_DIR - fi - -# setup auto-conf - ./autogen.sh - LinkFiles -} - -function Uninstall() { - UnlinkFiles -} - -function Test() { - Install - ./configure --enable-asynccrypt --enable-all - make check -} - -function Remove() { - UnlinkFiles - - rm -rf ${ASYNC_DIR} -} - -if [ "$#" -gt 1 ]; then - Usage - exit 1 -fi - -case "x$1" in - "xinstall") - Install - ;; - "xuninstall") - Uninstall - ;; - "xremove") - Remove - ;; - "xtest") - Test - ;; - *) - Usage - exit 1 - ;; -esac - diff --git a/autogen.sh b/autogen.sh index 854aef453..6c2232cc5 100755 --- a/autogen.sh +++ b/autogen.sh @@ -17,7 +17,7 @@ if [ -n "$WSL_DISTRO_NAME" ]; then fi # if and as needed, create empty dummy versions of various files, mostly -# associated with fips/self-test and asynccrypt: +# associated with fips/self-test: for dir in \ ./wolfssl/wolfcrypt/port/intel \ @@ -35,15 +35,7 @@ for file in \ ./wolfcrypt/src/wolfcrypt_first.c \ ./wolfcrypt/src/wolfcrypt_last.c \ ./wolfssl/wolfcrypt/fips.h \ - ./wolfcrypt/src/selftest.c \ - ./wolfcrypt/src/async.c \ - ./wolfssl/wolfcrypt/async.h \ - ./wolfcrypt/src/port/intel/quickassist.c \ - ./wolfcrypt/src/port/intel/quickassist_mem.c \ - ./wolfcrypt/src/port/cavium/cavium_nitrox.c \ - ./wolfssl/wolfcrypt/port/intel/quickassist.h \ - ./wolfssl/wolfcrypt/port/intel/quickassist_mem.h \ - ./wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h + ./wolfcrypt/src/selftest.c do if [ ! -e "$file" ]; then > "$file" || exit $? diff --git a/configure.ac b/configure.ac index 5b1f8368e..4c82ce6db 100644 --- a/configure.ac +++ b/configure.ac @@ -9535,10 +9535,6 @@ fi if test "$ENABLED_ASYNCCRYPT" = "yes" then AC_MSG_NOTICE([Enabling asynchronous support]) - if ! test -f ${srcdir}/wolfcrypt/src/async.c || ! test -f ${srcdir}/wolfssl/wolfcrypt/async.h - then - AC_MSG_ERROR([--enable-asynccrypt requested, but WOLFSSL_ASYNC_CRYPT source files are missing.]) - fi AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT -DHAVE_WOLF_BIGINT -DWOLFSSL_NO_HASH_RAW" @@ -11623,11 +11619,6 @@ fi # $silent != yes # Show warnings at bottom so they are noticed ################################################################################ -if test "$ENABLED_ASYNCCRYPT" = "yes" && ! test -s $srcdir/wolfcrypt/src/async.c -then - AC_MSG_WARN([Make sure real async files are loaded. See async-check.sh or the wolfssl/wolfAsyncCrypt GitHub repo.]) -fi - # MinGW static vs shared library # Reference URL from libtool for MinGW is located at # http://www.gnu.org/software/libtool/manual/libtool.html#Cygwin-to-MinGW-Cross diff --git a/examples/async/README.md b/examples/async/README.md index 2f7e5f5a3..45229b6ab 100644 --- a/examples/async/README.md +++ b/examples/async/README.md @@ -6,9 +6,6 @@ Supported with: * Crypto Callbacks (`--enable-cryptocb`) * PK Callbacks (`--enable-pkcallbacks`) -Requires files from https://github.com/wolfSSL/wolfAsyncCrypt -See `async-check.sh` for how to setup. - Tested with: * `./configure --enable-asynccrypt --enable-rsa --disable-ecc` * `./configure --enable-asynccrypt --disable-rsa --enable-ecc` @@ -25,7 +22,7 @@ make ## Asynchronous Cryptography Design -When a cryptogaphic call is handed off to hardware it return `WC_PENDING_E` up to caller. Then it can keep calling until the operation completes. For some platforms it is required to call `wolfSSL_AsyncPoll`. At the TLS layer a "devId" (Device ID) must be set using `wolfSSL_CTX_SetDevId` to indicate desire to offload cryptography. +When a cryptographic call is handed off to hardware it return `WC_PENDING_E` up to caller. Then it can keep calling until the operation completes. For some platforms it is required to call `wolfSSL_AsyncPoll`. At the TLS layer a "devId" (Device ID) must be set using `wolfSSL_CTX_SetDevId` to indicate desire to offload cryptography. For further design details please see: https://github.com/wolfSSL/wolfAsyncCrypt#design diff --git a/wolfcrypt/src/async.c b/wolfcrypt/src/async.c new file mode 100644 index 000000000..efc06eef5 --- /dev/null +++ b/wolfcrypt/src/async.c @@ -0,0 +1,1158 @@ +/* async.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include + +#ifdef WOLFSSL_ASYNC_CRYPT + +#include +#include + +#include + + +static WC_ASYNC_DEV* wolfAsync_GetDev(WOLF_EVENT* event) +{ + WC_ASYNC_DEV* dev = NULL; + + if (event && event->context) { + switch (event->type) { + /* context is WOLFSSL* */ + case WOLF_EVENT_TYPE_ASYNC_WOLFSSL: + { + WOLFSSL* ssl = (WOLFSSL*)event->context; + dev = ssl->asyncDev; + break; + } + + /* context is WC_ASYNC_DEV */ + case WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT: + { + dev = (WC_ASYNC_DEV*)event->context; + break; + } + + case WOLF_EVENT_TYPE_NONE: + default: + WOLFSSL_MSG("Unhandled event->type context!"); + dev = NULL; + break; + } + } + + return dev; +} + + +#if defined(WOLFSSL_ASYNC_CRYPT_SW) + +/* Allow way to have async SW code included, and disabled at run-time */ +static int wolfAsyncSwDisabled = 0; /* default off */ + + +static int wolfAsync_DoSw(WC_ASYNC_DEV* asyncDev) +{ + int ret = 0; + WC_ASYNC_SW* sw; + + if (asyncDev == NULL) { + return BAD_FUNC_ARG; + } + sw = &asyncDev->sw; + + switch (sw->type) { +#ifdef HAVE_ECC + case ASYNC_SW_ECC_MAKE: + { + ret = wc_ecc_make_key_ex( + (WC_RNG*)sw->eccMake.rng, + sw->eccMake.size, + (ecc_key*)sw->eccMake.key, + sw->eccMake.curve_id + ); + break; + } + #ifdef HAVE_ECC_SIGN + case ASYNC_SW_ECC_SIGN: + { + ret = wc_ecc_sign_hash_ex( + sw->eccSign.in, + sw->eccSign.inSz, + (WC_RNG*)sw->eccSign.rng, + (ecc_key*)sw->eccSign.key, + (mp_int*)sw->eccSign.r, + (mp_int*)sw->eccSign.s + ); + break; + } + #endif /* HAVE_ECC_SIGN */ + #ifdef HAVE_ECC_VERIFY + case ASYNC_SW_ECC_VERIFY: + { + ret = wc_ecc_verify_hash_ex( + (mp_int*)sw->eccVerify.r, + (mp_int*)sw->eccVerify.s, + sw->eccVerify.hash, + sw->eccVerify.hashlen, + sw->eccVerify.stat, + (ecc_key*)sw->eccVerify.key + ); + break; + } + #endif /* HAVE_ECC_VERIFY */ + #ifdef HAVE_ECC_DHE + case ASYNC_SW_ECC_SHARED_SEC: + { + ret = wc_ecc_shared_secret_gen_sync( + (ecc_key*)sw->eccSharedSec.private_key, + (ecc_point*)sw->eccSharedSec.public_point, + sw->eccSharedSec.out, + sw->eccSharedSec.outLen + ); + break; + } + #endif /* HAVE_ECC_DHE */ +#endif /* HAVE_ECC */ +#ifndef NO_RSA + #ifdef WOLFSSL_KEY_GEN + case ASYNC_SW_RSA_MAKE: + { + ret = wc_MakeRsaKey( + (RsaKey*)sw->rsaMake.key, + sw->rsaMake.size, + sw->rsaMake.e, + (WC_RNG*)sw->rsaMake.rng + ); + break; + } + #endif /* WOLFSSL_KEY_GEN */ + case ASYNC_SW_RSA_FUNC: + { + ret = wc_RsaFunction( + sw->rsaFunc.in, + sw->rsaFunc.inSz, + sw->rsaFunc.out, + sw->rsaFunc.outSz, + sw->rsaFunc.type, + (RsaKey*)sw->rsaFunc.key, + (WC_RNG*)sw->rsaFunc.rng + ); + break; + } +#endif /* !NO_RSA */ +#ifndef NO_DH + case ASYNC_SW_DH_AGREE: + { + ret = wc_DhAgree( + (DhKey*)sw->dhAgree.key, + sw->dhAgree.agree, + sw->dhAgree.agreeSz, + sw->dhAgree.priv, + sw->dhAgree.privSz, + sw->dhAgree.otherPub, + sw->dhAgree.pubSz + ); + break; + } + case ASYNC_SW_DH_GEN: + { + ret = wc_DhGenerateKeyPair( + (DhKey*)sw->dhGen.key, + (WC_RNG*)sw->dhGen.rng, + sw->dhGen.priv, + sw->dhGen.privSz, + sw->dhGen.pub, + sw->dhGen.pubSz + ); + break; + } +#endif /* !NO_DH */ +#ifndef NO_AES + case ASYNC_SW_AES_CBC_ENCRYPT: + { + ret = wc_AesCbcEncrypt( + (Aes*)sw->aes.aes, + sw->aes.out, + sw->aes.in, + sw->aes.sz + ); + break; + } + #ifdef HAVE_AES_DECRYPT + case ASYNC_SW_AES_CBC_DECRYPT: + { + ret = wc_AesCbcDecrypt( + (Aes*)sw->aes.aes, + sw->aes.out, + sw->aes.in, + sw->aes.sz + ); + break; + } + #endif /* HAVE_AES_DECRYPT */ + + #ifdef HAVE_AESGCM + case ASYNC_SW_AES_GCM_ENCRYPT: + { + ret = wc_AesGcmEncrypt( + (Aes*)sw->aes.aes, + sw->aes.out, + sw->aes.in, + sw->aes.sz, + sw->aes.iv, + sw->aes.ivSz, + sw->aes.authTag, + sw->aes.authTagSz, + sw->aes.authIn, + sw->aes.authInSz + ); + break; + } + #ifdef HAVE_AES_DECRYPT + case ASYNC_SW_AES_GCM_DECRYPT: + { + ret = wc_AesGcmDecrypt( + (Aes*)sw->aes.aes, + sw->aes.out, + sw->aes.in, + sw->aes.sz, + sw->aes.iv, + sw->aes.ivSz, + sw->aes.authTag, + sw->aes.authTagSz, + sw->aes.authIn, + sw->aes.authInSz + ); + break; + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#ifndef NO_DES3 + case ASYNC_SW_DES3_CBC_ENCRYPT: + { + ret = wc_Des3_CbcEncrypt( + (Des3*)sw->des.des, + sw->des.out, + sw->des.in, + sw->des.sz + ); + break; + } + case ASYNC_SW_DES3_CBC_DECRYPT: + { + ret = wc_Des3_CbcDecrypt( + (Des3*)sw->des.des, + sw->des.out, + sw->des.in, + sw->des.sz + ); + break; + } +#endif /* !NO_DES3 */ + default: + WOLFSSL_MSG("Invalid async crypt SW type!"); + ret = BAD_FUNC_ARG; + break; + }; + + /* Reset test type */ + if (ret == FP_WOULDBLOCK) { + ret = WC_PENDING_E; + } + else if (ret == 0) { + sw->type = ASYNC_SW_NONE; + } + + return ret; +} + +int wc_AsyncSwInit(WC_ASYNC_DEV* dev, int type) +{ + if (dev) { + WC_ASYNC_SW* sw = &dev->sw; + if (sw->type == ASYNC_SW_NONE) { + sw->type = type; + return 1; + } + } + return 0; +} + +#endif /* WOLFSSL_ASYNC_CRYPT_SW */ + +int wolfAsync_DevOpenThread(int *pDevId, void* threadId) +{ + int ret = 0; + int devId = INVALID_DEVID; + +#ifdef HAVE_CAVIUM + ret = NitroxOpenDeviceDefault(); + if (ret >= 0) + devId = ret; + else + ret = ASYNC_INIT_E; +#elif defined(HAVE_INTEL_QA) + ret = IntelQaInit(threadId); + if (ret >= 0) + devId = ret; + else + ret = ASYNC_INIT_E; +#elif defined(WOLFSSL_ASYNC_CRYPT_SW) + if (!wolfAsyncSwDisabled) { + /* For SW use any value 0 or greater */ + devId = 0; + } +#endif + + (void)threadId; + + /* return devId if requested */ + if (*pDevId) + *pDevId = devId; + + return ret; +} + +int wolfAsync_HardwareStart(void) +{ + int ret = 0; + + #ifdef HAVE_CAVIUM + /* nothing to do */ + #elif defined(HAVE_INTEL_QA) + ret = IntelQaHardwareStart(QAT_PROCESS_NAME, QAT_LIMIT_DEV_ACCESS); + #endif + + return ret; +} + +void wolfAsync_HardwareStop(void) +{ + #ifdef HAVE_CAVIUM + /* nothing to do */ + #elif defined(HAVE_INTEL_QA) + IntelQaHardwareStop(); + #endif +} + +int wolfAsync_DevOpen(int *devId) +{ + return wolfAsync_DevOpenThread(devId, NULL); +} + +void wolfAsync_DevClose(int *devId) +{ + if (devId && *devId != INVALID_DEVID) { + #ifdef HAVE_CAVIUM + NitroxCloseDevice(*devId); + #elif defined(HAVE_INTEL_QA) + IntelQaDeInit(*devId); + #endif + *devId = INVALID_DEVID; + } +} + +int wolfAsync_DevCtxInit(WC_ASYNC_DEV* asyncDev, word32 marker, void* heap, + int devId) +{ + int ret = 0; + + if (asyncDev == NULL) { + return BAD_FUNC_ARG; + } + + /* always clear async device context */ + XMEMSET(asyncDev, 0, sizeof(WC_ASYNC_DEV)); + + /* negative device Id's are invalid */ + if (devId >= 0) { + asyncDev->marker = marker; + asyncDev->heap = heap; + + #ifdef HAVE_CAVIUM + ret = NitroxAllocContext(asyncDev, devId, CONTEXT_SSL); + #elif defined(HAVE_INTEL_QA) + ret = IntelQaOpen(asyncDev, devId); + #endif + } + + return ret; +} + +void wolfAsync_DevCtxFree(WC_ASYNC_DEV* asyncDev, word32 marker) +{ + if (asyncDev && asyncDev->marker == marker) { + #ifdef HAVE_CAVIUM + NitroxFreeContext(asyncDev); + #elif defined(HAVE_INTEL_QA) + IntelQaClose(asyncDev); + #endif + asyncDev->marker = WOLFSSL_ASYNC_MARKER_INVALID; + } +} + +int wolfAsync_DevCopy(WC_ASYNC_DEV* src, WC_ASYNC_DEV* dst) +{ + int ret = 0; + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + + /* make sure we aren't copying to self */ + if (src == dst) + return ret; + +#ifdef HAVE_CAVIUM + /* nothing to do here */ +#elif defined(HAVE_INTEL_QA) + ret = IntelQaDevCopy(src, dst); +#endif + + return ret; +} + +/* called from `wolfSSL_AsyncPop` to check if event is done and deliver + * async return code */ +int wolfAsync_EventPop(WOLF_EVENT* event, enum WOLF_EVENT_TYPE event_type) +{ + int ret; + + if (event == NULL) { + return BAD_FUNC_ARG; + } + + if (event->type == event_type) { + /* Trap the scenario where event is not done */ + if (event->state == WOLF_EVENT_STATE_PENDING) { + return WC_PENDING_E; + } + + /* Get async return code */ + ret = event->ret; + + /* Reset state */ + event->state = WOLF_EVENT_STATE_READY; + } + else { + ret = WC_NO_PENDING_E; + } + + return ret; +} + +int wolfAsync_EventQueuePush(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event) +{ + if (queue == NULL) { + return BAD_FUNC_ARG; + } + + /* Setup event and push to event queue */ + event->dev.async = wolfAsync_GetDev(event); + return wolfEventQueue_Push(queue, event); +} + +#ifdef HAVE_CAVIUM +static int wolfAsync_NitroxCheckReq(WC_ASYNC_DEV* asyncDev, WOLF_EVENT* event) +{ + int ret; + + /* populate event requestId */ + event->reqId = asyncDev->nitrox.reqId; + if (event->reqId == 0) + return WC_INIT_E; + + /* poll specific request */ + ret = NitroxCheckRequest(asyncDev, event); + +#ifdef WOLFSSL_NITROX_DEBUG + if (event->ret == WC_NO_ERR_TRACE(WC_PENDING_E)) + event->pendCount++; + else + printf("NitroxCheckRequest: ret %x, req %lx, count %u\n", + ret, + event->reqId, + event->pendCount); +#else + (void)ret; +#endif + + /* if not pending then clear requestId */ + if (event->ret != WC_NO_ERR_TRACE(WC_PENDING_E)) { + event->reqId = 0; + } + + return 0; +} +#endif /* HAVE_CAVIUM */ + +int wolfAsync_EventPoll(WOLF_EVENT* event, WOLF_EVENT_FLAG flags) +{ + int ret = 0; + WC_ASYNC_DEV* asyncDev; + + (void)flags; + + if (event == NULL) { + return BAD_FUNC_ARG; + } + asyncDev = event->dev.async; + if (asyncDev == NULL) { + return WC_INIT_E; + } + + if (flags & WOLF_POLL_FLAG_CHECK_HW) { + #if defined(HAVE_CAVIUM) + ret = wolfAsync_NitroxCheckReq(asyncDev, event); + #elif defined(HAVE_INTEL_QA) + /* poll QAT hardware, callback returns data, IntelQaPoll sets event */ + ret = IntelQaPoll(asyncDev); + #elif defined(WOLFSSL_ASYNC_CRYPT_SW) + event->ret = wolfAsync_DoSw(asyncDev); + #endif + + /* If not pending then mark as done */ + if (event->ret != WC_NO_ERR_TRACE(WC_PENDING_E)) { + event->state = WOLF_EVENT_STATE_DONE; + } + } + + return ret; +} + + +#ifdef HAVE_CAVIUM +static int wolfAsync_NitroxCheckMultiReqBuf(WC_ASYNC_DEV* asyncDev, + WOLF_EVENT_QUEUE* queue, void* context_filter, + CspMultiRequestStatusBuffer* multi_req, int req_count) +{ + WOLF_EVENT* event; + int ret = 0, i; + + if (asyncDev == NULL || queue == NULL || multi_req == NULL) { + return BAD_FUNC_ARG; + } + + /* Perform multi hardware poll */ + ret = NitroxCheckRequests(asyncDev, multi_req); + if (ret != 0) { + return ret; + } + + /* Iterate event queue */ + for (event = queue->head; event != NULL; event = event->next) { + if (event->type >= WOLF_EVENT_TYPE_ASYNC_FIRST && + event->type <= WOLF_EVENT_TYPE_ASYNC_LAST) + { + /* optional filter based on context */ + if (context_filter == NULL || event->context == context_filter) { + /* find request */ + for (i = 0; i < req_count; i++) { + if (event->reqId == multi_req->req[i].request_id) { + + event->ret = NitroxTranslateResponseCode( + multi_req->req[i].status); + + #ifdef WOLFSSL_NITROX_DEBUG + if (event->ret == WC_NO_ERR_TRACE(WC_PENDING_E)) + event->pendCount++; + else + printf("NitroxCheckRequests: " + "ret %x, req %lx, count %u\n", + multi_req->req[i].status, + multi_req->req[i].request_id, + event->pendCount); + #endif + + /* If not pending then mark as done */ + if (event->ret != WC_NO_ERR_TRACE(WC_PENDING_E)) { + event->state = WOLF_EVENT_STATE_DONE; + event->reqId = 0; + } + break; + } + } + } + } + } + + /* reset multi request buffer */ + XMEMSET(multi_req, 0, sizeof(CspMultiRequestStatusBuffer)); + multi_req->count = CAVIUM_MAX_POLL; + + return ret; +} +#endif /* HAVE_CAVIUM */ + +int wolfAsync_EventQueuePoll(WOLF_EVENT_QUEUE* queue, void* context_filter, + WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount) +{ + WOLF_EVENT* event; + int ret = 0, count = 0; + WC_ASYNC_DEV* asyncDev = NULL; +#if defined(HAVE_CAVIUM) + CspMultiRequestStatusBuffer multi_req; + int req_count = 0; + + /* reset multi request buffer */ + XMEMSET(&multi_req, 0, sizeof(CspMultiRequestStatusBuffer)); + multi_req.count = CAVIUM_MAX_POLL; +#endif + + /* possible un-used variable */ + (void)asyncDev; + + if (queue == NULL) { + return BAD_FUNC_ARG; + } + +#ifndef SINGLE_THREADED + /* In single threaded mode "event_queue.lock" doesn't exist */ + if ((ret = wc_LockMutex(&queue->lock)) != 0) { + return ret; + } +#endif + + if (flags & WOLF_POLL_FLAG_CHECK_HW) { + /* check event queue */ + for (event = queue->head; event != NULL; event = event->next) { + if (event->type >= WOLF_EVENT_TYPE_ASYNC_FIRST && + event->type <= WOLF_EVENT_TYPE_ASYNC_LAST) + { + /* optional filter based on context */ + if (context_filter == NULL || + event->context == context_filter) { + asyncDev = event->dev.async; + + if (asyncDev == NULL) { + ret = WC_INIT_E; + break; + } + + count++; + + #if defined(HAVE_CAVIUM) + /* populate event requestId */ + event->reqId = asyncDev->nitrox.reqId; + + /* add entry to multi-request buffer for polling */ + if (event->reqId > 0) { + multi_req.req[req_count++].request_id = event->reqId; + } + /* submit filled multi-request query */ + if (req_count == CAVIUM_MAX_POLL) { + ret = wolfAsync_NitroxCheckMultiReqBuf(asyncDev, + queue, context_filter, &multi_req, req_count); + if (ret != 0) { + break; + } + } + #else + #if defined(HAVE_INTEL_QA) + /* poll QAT hardware, callback returns data, + * IntelQaPoll sets event */ + ret = IntelQaPoll(asyncDev); + if (ret != 0) { + break; + } + + #elif defined(WOLFSSL_ASYNC_CRYPT_SW) + #ifdef WOLF_ASYNC_SW_SKIP_MOD + /* Simulate random hardware not done */ + if (count % WOLF_ASYNC_SW_SKIP_MOD) + #endif + { + event->ret = wolfAsync_DoSw(asyncDev); + } + #elif defined(WOLF_CRYPTO_CB) || defined(HAVE_PK_CALLBACKS) + /* Use crypto or PK callbacks */ + + #else + #warning No async crypt device defined! + #endif + + /* If not pending then mark as done */ + if (event->ret != WC_NO_ERR_TRACE(WC_PENDING_E)) { + event->state = WOLF_EVENT_STATE_DONE; + } + #endif + } + } + } /* for */ + + #if defined(HAVE_CAVIUM) + /* submit partial multi-request query (if no prev errors) */ + if (ret == 0 && req_count > 0) { + ret = wolfAsync_NitroxCheckMultiReqBuf(asyncDev, + queue, context_filter, &multi_req, req_count); + } + #endif + } /* flag WOLF_POLL_FLAG_CHECK_HW */ + + /* process event queue */ + count = 0; + for (event = queue->head; event != NULL; event = event->next) { + if (event->type >= WOLF_EVENT_TYPE_ASYNC_FIRST && + event->type <= WOLF_EVENT_TYPE_ASYNC_LAST) + { + /* optional filter based on context */ + if (context_filter == NULL || event->context == context_filter) { + /* If event is done then process */ + if (event->state == WOLF_EVENT_STATE_DONE) { + /* remove from queue */ + ret = wolfEventQueue_Remove(queue, event); + if (ret < 0) break; /* exit for */ + + /* return pointer in 'events' arg */ + if (events) { + events[count] = event; /* return pointer */ + } + count++; + + /* check to make sure our event list isn't full */ + if (events && count >= maxEvents) { + break; /* exit for */ + } + } + } + } + } + +#ifndef SINGLE_THREADED + wc_UnLockMutex(&queue->lock); +#endif + + /* Return number of properly populated events */ + if (eventCount) { + *eventCount = count; + } + + return ret; +} + +int wolfAsync_EventInit(WOLF_EVENT* event, WOLF_EVENT_TYPE type, void* context, + word32 flags) +{ + int ret = 0; + WC_ASYNC_DEV* asyncDev; + + if (event == NULL) { + return BAD_FUNC_ARG; + } + + event->type = type; + event->context = context; +#ifndef WC_NO_ASYNC_THREADING + event->threadId = wc_AsyncThreadId(); +#endif + event->ret = WC_PENDING_E; + event->state = WOLF_EVENT_STATE_PENDING; + + asyncDev = wolfAsync_GetDev(event); + event->dev.async = asyncDev; + event->flags = flags; +#ifdef HAVE_CAVIUM + event->reqId = 0; +#endif + + return ret; +} + +int wolfAsync_EventWait(WOLF_EVENT* event) +{ + int ret = 0; + + if (event == NULL) { + return BAD_FUNC_ARG; + } + + /* wait for completion */ + while (ret == 0 && event->ret == WC_NO_ERR_TRACE(WC_PENDING_E)) { + ret = wolfAsync_EventPoll(event, WOLF_POLL_FLAG_CHECK_HW); + } + + return ret; +} + +int wc_AsyncHandle(WC_ASYNC_DEV* asyncDev, WOLF_EVENT_QUEUE* queue, + word32 event_flags) +{ + int ret; + WOLF_EVENT* event; + + if (asyncDev == NULL || queue == NULL) { + return BAD_FUNC_ARG; + } + + /* setup the event and push to queue */ + event = &asyncDev->event; + ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, + asyncDev, event_flags); + if (ret == 0) { + ret = wolfEventQueue_Push(queue, event); + } + + /* check for error (helps with debugging) */ + if (ret != 0) { + WOLFSSL_MSG("wc_AsyncHandle failed"); + } + + return ret; +} + +int wc_AsyncWait(int ret, WC_ASYNC_DEV* asyncDev, word32 event_flags) +{ + if (ret == WC_NO_ERR_TRACE(WC_PENDING_E)) { + WOLF_EVENT* event; + + if (asyncDev == NULL) + return BAD_FUNC_ARG; + + event = &asyncDev->event; + ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, + asyncDev, event_flags); + if (ret == 0) { + ret = wolfAsync_EventWait(event); + if (ret == 0) { + ret = event->ret; + + /* clear event */ + event->state = WOLF_EVENT_STATE_READY; + } + } + } + return ret; +} + +#ifndef WC_NO_ASYNC_SLEEP +int wc_AsyncSleep(word32 ms) +{ + int ret = 0; + struct timespec resTime, remTime; + resTime.tv_sec = ms/1000; + resTime.tv_nsec = (ms%1000)*1000000; + do { + ret = nanosleep(&resTime, &remTime); + resTime = remTime; + } while ((ret!=0) && (errno == EINTR)); + + if (ret != 0) { + fprintf(stderr, "nanoSleep failed with code %d\n", ret); + return BAD_FUNC_ARG; + } + + return ret; +} +#endif + +/* Pthread Helpers */ +#ifndef WC_NO_ASYNC_THREADING + +int wc_AsyncGetNumberOfCpus(void) +{ + int numCpus; + + numCpus = (int)sysconf(_SC_NPROCESSORS_ONLN); + + return numCpus; +} + +int wc_AsyncThreadCreate_ex(pthread_t *thread, + word32 priority, int policy, + AsyncThreadFunc_t function, void* params) +{ + int status = 1; + pthread_attr_t attr; + struct sched_param param; + + status = pthread_attr_init(&attr); + if (status !=0) { + fprintf(stderr, "pthread_attr_init error: %d\n", status); + return ASYNC_OP_E; + } + + /* Setting scheduling parameter will fail for non root user, + * as the default value of inheritsched is PTHREAD_EXPLICIT_SCHED in + * POSIX. It is not required to set it explicitly before setting the + * scheduling policy */ + + /* Set scheduling policy based on values provided */ + if ((policy != SCHED_RR) && + (policy != SCHED_FIFO) && + (policy != SCHED_OTHER)) + { + policy = SCHED_OTHER; + } + + status = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (status != 0) { + goto exit_fail; + } + + status = pthread_attr_setschedpolicy(&attr, policy); + if (status != 0) { + goto exit_fail; + } + + /* Set priority based on value in threadAttr */ + memset(¶m, 0, sizeof(param)); + param.sched_priority = priority; + if (policy != SCHED_OTHER) { + status = pthread_attr_setschedparam(&attr, ¶m); + if (status != 0) { + goto exit_fail; + } + } + + status = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + if (status != 0) { + goto exit_fail; + } + + status = pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + if (status != 0) { + goto exit_fail; + } + + status = pthread_create(thread, &attr, function, params); + if (status != 0) { + goto exit_fail; + } + + /*destroy the thread attributes as they are no longer required, this does + * not affect the created thread*/ + status = pthread_attr_destroy(&attr); + if (status != 0) { + fprintf(stderr, "AsyncThreadCreate error: %d\n", status); + return ASYNC_OP_E; + } else { + return 0; + } + +exit_fail: + + fprintf(stderr, "AsyncThreadCreate error: %d\n", status); + status = pthread_attr_destroy(&attr); + if (status != 0) + fprintf(stderr, "AsyncThreadCreate cleanup error: %d\n", status); + return ASYNC_OP_E; +} + +int wc_AsyncThreadCreate(pthread_t *thread, + AsyncThreadFunc_t function, void* params) +{ + return wc_AsyncThreadCreate_ex(thread, THREAD_DEFAULT_PRIORITY, + THREAD_DEFAULT_POLICY, function, params); +} + +#ifdef __MACH__ + #include + #include + + /* native MACH API wrappers */ + #define SYSCTL_CORE_COUNT "machdep.cpu.core_count" + + typedef struct cpu_set { + uint32_t count; + } cpu_set_t; + + static WC_INLINE void CPU_ZERO(cpu_set_t *cs) { + cs->count = 0; + } + static WC_INLINE void CPU_SET(int num, cpu_set_t *cs) { + cs->count |= (1 << num); + } + static WC_INLINE int CPU_ISSET(int num, cpu_set_t *cs) { + return (cs->count & (1 << num)); + } + + static int pthread_setaffinity_np(pthread_t thread, size_t cpu_size, + cpu_set_t *cpu_set) + { + thread_port_t mach_thread; + thread_affinity_policy_data_t policy; + int core = 0; + + for (core = 0; core < 8 * (int)cpu_size; core++) { + if (CPU_ISSET(core, cpu_set)) + break; + } + + policy.affinity_tag = core; + mach_thread = pthread_mach_thread_np(thread); + thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, + (thread_policy_t)&policy, 1); + + return 0; + } +#endif /* __MACH__ */ + +int wc_AsyncThreadBind(pthread_t *thread, word32 logicalCore) +{ + int status = 0; + cpu_set_t cpuset; + + if (!thread) return BAD_FUNC_ARG; + + CPU_ZERO(&cpuset); + CPU_SET(logicalCore, &cpuset); + + status = pthread_setaffinity_np(*thread, sizeof(cpu_set_t), &cpuset); + if (status != 0) { + fprintf(stderr, "pthread_setaffinity_np error: %d\n", status); + } + + return status; +} + +int wc_AsyncThreadStart(pthread_t *thread) +{ + (void)thread; + return 0; +} + +__attribute__((noreturn)) +void wc_AsyncThreadExit(void *retval) +{ + pthread_exit(retval); +} + +int wc_AsyncThreadKill(pthread_t *thread) +{ + int status; + + if (!thread) return BAD_FUNC_ARG; + + status = pthread_cancel(*thread); + if (status != 0) { + fprintf(stderr, "pthread_cancel fail with status %d\n", status); + } + + return status; +} + + +int wc_AsyncThreadPrioritySet(pthread_t *thread, word32 priority) +{ + int status; + struct sched_param param; + int policy; + word32 minPrio; + word32 maxPrio; + + if (!thread) return BAD_FUNC_ARG; + + status = pthread_getschedparam(*thread, &policy, ¶m); + if (status != 0) { + fprintf(stderr, "pthread_getschedparam, failed with status %d\n", + status); + return status; + } + + minPrio = sched_get_priority_min(policy); + maxPrio = sched_get_priority_max(policy); + + if ((priority < minPrio) || (priority > maxPrio)) { + fprintf(stderr, "priority %u outside valid range\n", priority); + return BAD_FUNC_ARG; + } + + param.sched_priority = priority; + + status = pthread_setschedparam(*thread, policy, ¶m); + if (status != 0) { + fprintf(stderr, "pthread_setschedparam, failed with status %d\n", + status); + return status; + } + + return status; +} + +int wc_AsyncThreadSetPolicyAndPriority(pthread_t *thread, word32 policy, + word32 priority) +{ + int status; + struct sched_param param; + word32 minPrio, maxPrio; + int policy1; + + if (!thread) return BAD_FUNC_ARG; + + /* check for a valid value for 'policy' */ + if ((policy != SCHED_RR) && + (policy != SCHED_FIFO) && + (policy != SCHED_OTHER)) + { + fprintf(stderr, "wc_AsyncThreadSetPolicyAndPriority: " + "invalid policy %u\n", policy); + return BAD_FUNC_ARG; + } + + memset(¶m, 0, sizeof(param)); + + status = pthread_getschedparam(*thread, &policy1, ¶m); + if (status != 0) { + fprintf(stderr, "pthread_getschedparam error: %d\n", status); + return status; + } + + minPrio = sched_get_priority_min(policy); + maxPrio = sched_get_priority_max(policy); + + if ((priority < minPrio) || (priority > maxPrio)) { + return BAD_FUNC_ARG; + } + + param.sched_priority = priority; + + status = pthread_setschedparam(*thread, policy, ¶m); + if (status != 0) { + fprintf(stderr, "pthread_setschedparam error: %d\n", status); + return status; + } + + return 0; +} + +int wc_AsyncThreadJoin(pthread_t *thread) +{ + int status; + status = pthread_join(*thread, NULL); + if (status != 0) { + fprintf(stderr, "pthread_join failed, status: %d\n", status); + } + return status; +} + +void wc_AsyncThreadYield(void) +{ + sched_yield(); +} + +pthread_t wc_AsyncThreadId(void) +{ + return pthread_self(); +} + +#endif /* WC_NO_ASYNC_THREADING */ + +#endif /* WOLFSSL_ASYNC_CRYPT */ diff --git a/wolfcrypt/src/port/cavium/README.md b/wolfcrypt/src/port/cavium/README.md index b98d866dd..ceda95753 100644 --- a/wolfcrypt/src/port/cavium/README.md +++ b/wolfcrypt/src/port/cavium/README.md @@ -1,3 +1,264 @@ -# Cavium Nitrox III/V Support +# Cavium Nitrox V Support -Please contact wolfSSL at info@wolfssl.com to request an evaluation. +## Directory Structure: +`/` + `/CNN55XX-SDK` + `/wolfssl` + +## Building Cavium Driver + +Tested using `CNN55XX-Driver-Linux-KVM-XEN-PF-SDK-1.4.14.tar` + +### Installation + +```sh +$ cd CN55XX-SDK +$ make clean +$ make +$ cd bin +$ sudo perl ./init_nitrox.pl + +NITROX-V devices found: 1 +NITROX-V driver(nitrox_drv.ko) load: SUCCESS +NITROX-V Device-0 part: CNN5560-900BG676-C45-G + +Reading config file: ../microcode/ssl.conf +Device count: 1 Config file device count: 2 + + NITROX Model: 0x1200 [ CNN55XX PASS 1.0 ] + + Microcode Details: + Version : CNN5x-MC-AE-MAIN-0001 + Core Count : 80 + Code length : 9514 + Block number: 0 + + Microcode Details: + Version : CNN5x-MC-SE-SSL-0004 + Core Count : 64 + Code length : 23738 + Block number: 1 + + Microcode Load Succeed on device: 0 + + [ AE ] Microcode: CNN5x-MC-AE-MAIN-0001 + Group : 0 + Core Mask [Hi Low]: ffff ffffffffffffffff [ 80 ] + + [ SE ] Microcode: CNN5x-MC-SE-SSL-0004 + Group : 0 + Core Mask : ffffffffffffffff [ 64 ] + +Microcode Load success +``` + +```sh +$ lspci | grep Cavium +09:00.0 Network and computing encryption device: Cavium, Inc. Nitrox XL NPX (rev 01) +81:00.0 Network and computing encryption device: Cavium, Inc. Device 0012 +``` + +#### Issues + +1. Fixes to Nitrox Driver for includes into wolfSSL + +a. Modify `include/vf_defs.h:120` -> `vf_config_mode_str()` function to: + +```c +static inline const char *vf_config_mode_str(vf_config_type_t vf_mode) +{ + const char *vf_mode_str; +``` + +b. Add `case PF:` to `include/vf_defs.h:82` above `default:` in `vf_config_mode_to_num_vfs()`. + +c. In `include/linux/sysdep.h:46` rename `__BYTED_ORDER` to `__BYTE_ORDER`. + + +2. If the CNN55XX driver is not extracted on the Linux box it can cause issues with the symbolic links in the microcode folder. Fix was to resolve the symbolic links in `./microcode`. + +```sh +NITROX Model: 0x1200 [ CNN55XX PASS 1.0 ] +Invalid microcode +ucode_dload: failed to initialize +``` + +Resolve Links: +```sh +cd microcode +rm main_asym.out +ln -s ./build/main_ae.out ./main_asym.out +rm main_ipsec.out +ln -s ./build/main_ipsec.out ./main_ipsec.out +rm main_ssl.out +ls -s ./build/main_ssl.out ./main_ssl.out +``` + + +## Building wolfSSL + +```sh +./configure --with-cavium-v=../CNN55XX-SDK --enable-asynccrypt --enable-aesni --enable-intelasm +make +sudo make install +``` + +### CFLAGS + +`CFLAGS+= -DHAVE_CAVIUM -DHAVE_CAVIUM_V -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT -DHAVE_WOLF_BIGINT` +`CFLAGS+= -I../CNN55XX-SDK/include -lrt -lcrypto` + +* `HAVE_CAVIUM`: The Cavium define +* `HAVE_CAVIUM_V`: Nitrox V +* `WOLFSSL_ASYNC_CRYPT`: Enable asynchronous wolfCrypt. +* `HAVE_WOLF_EVENT`: Enable wolf event support (required for async) +* `HAVE_WOLF_BIGINT`: Enable wolf big integer support (required for async) + + +### LDFLAGS + +Include the libnitrox static library: +`LDFLAGS+= ../CNN55XX-SDK/lib/libnitrox.a` + + +### wolfSSL Build Issues + +a. If building with debug `-g` and using an older binutils LD version 2.23 or less you may see a linker crash. Example of error: `BFD (GNU Binutils) 2.23.2 internal error, aborting at merge.c line 873 in _bfd_merged_section_offset`. Resolution is to use this in the CFLAGS `-g -fno-merge-debug-strings -fdebug-types-section`. + + +## Usage + +Note: Must run applications with `sudo` to access device. + +``` +sudo ./wolfcrypt/benchmark/benchmark +sudo ./wolfcrypt/test/testwolfcrypt +``` + + +## TLS Code Template + +```c +/* GLOBAL DEVICE IDENTIFIER */ +#ifdef WOLFSSL_ASYNC_CRYPT + static int devId = INVALID_DEVID; +#endif + + +/* DONE AT INIT */ +#ifdef WOLFSSL_ASYNC_CRYPT + if (wolfAsync_DevOpen(&devId) != 0) { + fprintf(stderr, "Async device open failed\nRunning without async\n"); + } + + wolfSSL_CTX_UseAsync(ctx, devId); +#endif + +/* DONE IN YOUR WORKER LOOP IN WC_PENDING_E CASES AGAINST YOUR WOLFSSL_CTX */ +#ifdef WOLFSSL_ASYNC_CRYPT + int ret; + WOLF_EVENT* wolfEvents[MAX_WOLF_EVENTS]; + int eventCount, i; + + /* get list of events that are done (not pending) */ + ret = wolfSSL_CTX_AsyncPoll(ctx, wolfEvents, MAX_WOLF_EVENTS, WOLF_POLL_FLAG_CHECK_HW, &eventCount); + if (ret != 0) + goto error; + + for (i = 0; i < eventCount; i++) { + WOLFSSL* ssl = (WOLFSSL*)wolfEvents[i]->context; + if (ssl) { + /* your SSL object is ready to be called again */ + } + } +#endif + +/* DONE AT CLEANUP */ +#ifdef WOLFSSL_ASYNC_CRYPT + wolfAsync_DevClose(&devId); +#endif +``` + +## Benchmarks + +Nitrox V: CNN5560-900-C45 +Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz +CentOS: Kernel 3.10.0-514.16.1.el7.x86_64 +Single Thread + +``` +./configure --with-cavium-v=../CNN55XX-SDK --enable-asynccrypt --enable-aesni --enable-intelasm --enable-sp --enable-sp-asm CFLAGS="-DWC_NO_ASYNC_THREADING" && make + +sudo ./wolfcrypt/benchmark/benchmark + +wolfCrypt Benchmark (block bytes 1048576, min 1.0 sec each) +RNG SW 135 MB took 1.012 seconds, 133.356 MB/s Cycles per byte = 25.69 +RNG HW 85 MB took 1.049 seconds, 81.039 MB/s Cycles per byte = 42.27 +AES-128-CBC-enc SW 845 MB took 1.001 seconds, 844.293 MB/s Cycles per byte = 4.06 +AES-128-CBC-dec SW 6060 MB took 1.001 seconds, 6055.102 MB/s Cycles per byte = 0.57 +AES-192-CBC-enc SW 710 MB took 1.004 seconds, 707.248 MB/s Cycles per byte = 4.84 +AES-192-CBC-dec SW 5055 MB took 1.001 seconds, 5050.086 MB/s Cycles per byte = 0.68 +AES-256-CBC-enc SW 610 MB took 1.003 seconds, 608.296 MB/s Cycles per byte = 5.63 +AES-256-CBC-dec SW 4330 MB took 1.001 seconds, 4326.604 MB/s Cycles per byte = 0.79 +AES-128-CBC-enc HW 240 MB took 1.018 seconds, 235.801 MB/s Cycles per byte = 14.53 +AES-128-CBC-dec HW 240 MB took 1.011 seconds, 237.312 MB/s Cycles per byte = 14.43 +AES-192-CBC-enc HW 220 MB took 1.021 seconds, 215.411 MB/s Cycles per byte = 15.90 +AES-192-CBC-dec HW 215 MB took 1.002 seconds, 214.516 MB/s Cycles per byte = 15.97 +AES-256-CBC-enc HW 200 MB took 1.016 seconds, 196.910 MB/s Cycles per byte = 17.40 +AES-256-CBC-dec HW 200 MB took 1.016 seconds, 196.758 MB/s Cycles per byte = 17.41 +AES-128-GCM-enc SW 3095 MB took 1.000 seconds, 3093.571 MB/s Cycles per byte = 1.11 +AES-128-GCM-dec SW 3090 MB took 1.001 seconds, 3087.702 MB/s Cycles per byte = 1.11 +AES-192-GCM-enc SW 2825 MB took 1.002 seconds, 2820.654 MB/s Cycles per byte = 1.21 +AES-192-GCM-dec SW 2815 MB took 1.000 seconds, 2814.153 MB/s Cycles per byte = 1.22 +AES-256-GCM-enc SW 2550 MB took 1.001 seconds, 2548.379 MB/s Cycles per byte = 1.34 +AES-256-GCM-dec SW 2555 MB took 1.002 seconds, 2550.183 MB/s Cycles per byte = 1.34 +AES-128-GCM-enc HW 135 MB took 1.018 seconds, 132.618 MB/s Cycles per byte = 25.83 +AES-128-GCM-dec HW 130 MB took 1.022 seconds, 127.202 MB/s Cycles per byte = 26.93 +AES-192-GCM-enc HW 135 MB took 1.019 seconds, 132.435 MB/s Cycles per byte = 25.86 +AES-192-GCM-dec HW 130 MB took 1.025 seconds, 126.789 MB/s Cycles per byte = 27.02 +AES-256-GCM-enc HW 135 MB took 1.019 seconds, 132.418 MB/s Cycles per byte = 25.87 +AES-256-GCM-dec HW 130 MB took 1.023 seconds, 127.071 MB/s Cycles per byte = 26.96 +CHACHA SW 3245 MB took 1.001 seconds, 3241.680 MB/s Cycles per byte = 1.06 +CHA-POLY SW 1930 MB took 1.000 seconds, 1929.817 MB/s Cycles per byte = 1.77 +MD5 SW 710 MB took 1.005 seconds, 706.678 MB/s Cycles per byte = 4.85 +POLY1305 SW 4850 MB took 1.000 seconds, 4849.127 MB/s Cycles per byte = 0.71 +SHA SW 560 MB took 1.008 seconds, 555.558 MB/s Cycles per byte = 6.17 +SHA-224 SW 460 MB took 1.002 seconds, 459.021 MB/s Cycles per byte = 7.46 +SHA-256 SW 460 MB took 1.002 seconds, 459.013 MB/s Cycles per byte = 7.46 +SHA-384 SW 690 MB took 1.002 seconds, 688.368 MB/s Cycles per byte = 4.98 +SHA-512 SW 690 MB took 1.002 seconds, 688.414 MB/s Cycles per byte = 4.98 +SHA3-224 SW 330 MB took 1.007 seconds, 327.713 MB/s Cycles per byte = 10.45 +SHA3-256 SW 310 MB took 1.000 seconds, 309.909 MB/s Cycles per byte = 11.05 +SHA3-384 SW 235 MB took 1.007 seconds, 233.355 MB/s Cycles per byte = 14.68 +SHA3-512 SW 170 MB took 1.027 seconds, 165.547 MB/s Cycles per byte = 20.69 +HMAC-MD5 SW 705 MB took 1.002 seconds, 703.344 MB/s Cycles per byte = 4.87 +HMAC-MD5 HW 62670 MB took 1.000 seconds,62666.115 MB/s Cycles per byte = 0.05 +HMAC-SHA SW 555 MB took 1.000 seconds, 554.964 MB/s Cycles per byte = 6.17 +HMAC-SHA HW 62745 MB took 1.000 seconds,62744.312 MB/s Cycles per byte = 0.05 +HMAC-SHA224 SW 475 MB took 1.005 seconds, 472.870 MB/s Cycles per byte = 7.24 +HMAC-SHA224 HW 62415 MB took 1.000 seconds,62412.262 MB/s Cycles per byte = 0.05 +HMAC-SHA256 SW 475 MB took 1.005 seconds, 472.710 MB/s Cycles per byte = 7.25 +HMAC-SHA256 HW 63185 MB took 1.000 seconds,63180.255 MB/s Cycles per byte = 0.05 +HMAC-SHA384 SW 690 MB took 1.005 seconds, 686.794 MB/s Cycles per byte = 4.99 +HMAC-SHA384 HW 62575 MB took 1.000 seconds,62573.195 MB/s Cycles per byte = 0.05 +HMAC-SHA512 SW 690 MB took 1.004 seconds, 687.563 MB/s Cycles per byte = 4.98 +HMAC-SHA512 HW 62430 MB took 1.000 seconds,62428.497 MB/s Cycles per byte = 0.05 +RSA 2048 public SW 3900 ops took 1.026 sec, avg 0.263 ms, 3801.211 ops/sec +RSA 2048 private SW 300 ops took 1.035 sec, avg 3.452 ms, 289.722 ops/sec +RSA 2048 public HW 140900 ops took 1.001 sec, avg 0.007 ms, 140825.228 ops/sec +RSA 2048 private HW 8300 ops took 1.004 sec, avg 0.121 ms, 8267.789 ops/sec +DH 2048 key gen SW 1010 ops took 1.004 sec, avg 0.994 ms, 1005.939 ops/sec +DH 2048 agree SW 1000 ops took 1.005 sec, avg 1.005 ms, 995.404 ops/sec +ECC 256 key gen SW 1090 ops took 1.001 sec, avg 0.918 ms, 1089.153 ops/sec +ECDHE 256 agree SW 1400 ops took 1.038 sec, avg 0.742 ms, 1348.211 ops/sec +ECDSA 256 sign SW 1400 ops took 1.076 sec, avg 0.769 ms, 1300.595 ops/sec +ECDSA 256 verify SW 1900 ops took 1.016 sec, avg 0.535 ms, 1870.353 ops/sec +ECDHE 256 agree HW 10500 ops took 1.001 sec, avg 0.095 ms, 10485.383 ops/sec +ECDSA 256 sign HW 22200 ops took 1.001 sec, avg 0.045 ms, 22169.233 ops/sec +ECDSA 256 verify HW 7500 ops took 1.012 sec, avg 0.135 ms, 7408.213 ops/sec +``` + + +## Support + +For questions or issues email us at support@wolfssl.com. diff --git a/wolfcrypt/src/port/cavium/README_Octeon.md b/wolfcrypt/src/port/cavium/README_Octeon.md index b2670d02e..588f00072 100644 --- a/wolfcrypt/src/port/cavium/README_Octeon.md +++ b/wolfcrypt/src/port/cavium/README_Octeon.md @@ -1,3 +1,235 @@ -# Cavium Octeon III CN7300 +# Octeon III -Please contact wolfSSL at info@wolfssl.com to request an evaluation. +Guide for setting up wolfSSL on the Octeon III CN7300 + +## Octeon SDK + +```sh +sudo yum install libuuid-devel +sudo yum install perl-Env + +sudo rpm -i OCTEON-SDK-5.1.0-609.x86_64.rpm +``` + +The OCTEON-SDK package has been successfully installed under the +/usr/local/Cavium_Networks directory. + +The installation requires the OCTEON_MODEL environment variable +to be set. To set this environment variable, cd to the +/usr/local/Cavium_Networks/OCTEON-SDK directory, and invoke + + source env-setup + +script. Valid OCTEON_MODELs are listed in octeon-models.txt file +under OCTEON-SDK directory. + +You may want to copy the OCTEON-SDK package to your home directory to allow +modification without root privileges. + +For more information please refer to the online SDK documentation: +file:///usr/local/Cavium_Networks/OCTEON-SDK/docs/html/index.html + +```sh +sudo rpm -i OCTEON-CRYPTO-CORE-5.1.0-01.x86_64.rpm +``` + +The OCTEON-CRYPTO-CORE is installed under +/usr/local/Cavium_Networks/OCTEON-SDK/components/crypto-api/core directory. +This package installs the following sources. + * Crypto-Core API Sources + * Sample Crypto-Core Test Applications + +Please refer to following documentation under +/usr/local/Cavium_Networks/OCTEON-SDK/components/crypto-api/core directory + * README.txt - contains build instructions and other details + * Release_Notes.txt - contains change history + +```sh +sudo rpm -i OCTEON-LINUX-5.1.0-609.x86_64.rpm +``` + +The Linux Kernel has been successfully installed under the directory +/usr/local/Cavium_Networks/OCTEON-SDK/linux + +Please refer to file:///usr/local/Cavium_Networks/OCTEON-SDK/docs/html/linux.html +on how to use Linux on the OCTEON. + + +Final Setup: + +```sh +cp -r /usr/local/Cavium_Networks/OCTEON-SDK/ ~ +cd OCTEON-SDK +source env-setup OCTEON_CN73XX + +cd examples +make +``` + +Setup for CN73XX board EVB7304 + +```sh +setenv qlm2_mode pcie +setenv pcie2_mode ep +setenv pcie2_gen 2 +setenv pcie2_lanes 4 +setenv qlm2_mode sgmii + +setenv qlm4_mode sata + +#setenv ethact octeth0 +#setenv ethprime octeth0 +dhcp + +saveenv +``` + +## Building bootloader + +```sh +# On Host +cd OCTEON-SDK/bootloader/u-boot +make distclean +make octeon_ebb7304 +make + +cp *.bin /mnt/cf1 + +# On Target +fatls mmc 1 +fatload mmc 1 $(loadaddr) u-boot-octeon_ebb7304.bin + +bootloadervalidate +bootloaderupdate +``` + +Note: You must run `source env-setup OCTEON_CN73XX` anytime a new shell is opened to setup the build environment. + +## Building Linux Kernel (Busybox) + +```sh +# On Host +su root +cd OCTEON-SDK +source env-setup OCTEON_CN73XX +cd linux +make clean +cd embedded_rootfs +make menuconfig +cd .. +make kernel +make strip + +cp ./kernel/linux/vmlinux.64 /mnt/cf1 +``` + +```sh +# On Target +telnet 192.168.0.114 9761 + +fatls mmc 1 +fatload mmc 1 $(loadaddr) vmlinux.64 +bootoctlinux $(loadaddr) coremask=0xffff root=/dev/sda2 mem=0 +``` + +Shortcut macro from U-Boot: + +```sh +# On Target +setenv linux_mmc 'fatload mmc 1 $(loadaddr) vmlinux.64;bootoctlinux $(loadaddr) coremask=0xffff root=/dev/sda2 mem=0' +run linux_mmc +saveenv +``` + +## Building Linux Debian + +```sh +su root +cd OCTEON-SDK +source env-setup OCTEON_CN73XX + +cd linux +make kernel-deb + +# Identify external /dev/sd* +fdisk -l + +# Edit /etc/fstab. Replace "sda" with the device name determined from above. +/dev/sdb1 /mnt/cf1 auto noauto,noatime,user 0 0 +/dev/sdb2 /mnt/cf2 ext3 noauto,noatime,user 0 0 + +mkdir -p /mnt/cf1 +mkdir -p /mnt/cf2 + +cd debian +make DISK=/dev/sdb compact-flash +cd .. +make kernel-deb flash + +usb start +fatls usb 0 +fatload usb 0 $(loadaddr) vmlinux.64 + +fatls mmc 0 +fatload mmc 0 $(loadaddr) vmlinux.64 +bootoctlinux $(loadaddr) coremask=0xffff root=/dev/mmcblk0p2 mem=0 rootdelay=5 +``` + +### Setting up default boot + +```sh +setenv bootcmd 'fatload mmc 0 $(loadaddr) vmlinux.64; bootoctlinux $(loadaddr) coremask=0xffff root=/dev/mmcblk0p2 mem=0 rootdelay=5' +saveenv +reset +``` + +### Debian Packages + +```sh +vi /etc/sources.list +deb http://archive.debian.org/debian/ jessie main contrib non-free +deb-src http://archive.debian.org/debian/ jessie main contrib non-free +#deb-src http://archive.debian.org/ jessie/updates main contrib non-free +#deb http://archive.debian.org/ jessie/updates main contrib non-free +``` + +## wolfSSL Building for Octeon + +```sh +cd examples +ln -s ../../wolfssl wolfssl +cd wolfssl +./autogen.sh +./configure --host=mips64 CC="mips64-octeon-linux-gnu-gcc -mabi=64" \ + --with-octeon-sync=../OCTEON-SDK OCTEON_OBJ=obj-octeon3 \ + --enable-cryptocb --enable-des3 CPPFLAGS="-DWOLFSSL_AES_DIRECT" \ + CFLAGS="-Wno-error=redundant-decls" +make + +``` + +Installing to USB media for use on Octeon Board: + +```sh +cp -r src /run/media/dgarske/OCTEON/ +cp -r wolfcrypt/ /run/media/dgarske/OCTEON/ +cp -r wolfssl /run/media/dgarske/OCTEON/ +cp -r certs /run/media/dgarske/OCTEON/ +``` + + +## Remote Access + +### UART and Telnet + +EBB7304_DEFAULT D8-80-39-7D-6D-0B + +telnet 192.168.0.114 9760 +telnet 192.168.0.114 9761 + +date 070216502019 + + +## Support + +For questions please email wolfSSL at support@wolfssl.com diff --git a/wolfcrypt/src/port/cavium/cavium_nitrox.c b/wolfcrypt/src/port/cavium/cavium_nitrox.c new file mode 100644 index 000000000..4876af261 --- /dev/null +++ b/wolfcrypt/src/port/cavium/cavium_nitrox.c @@ -0,0 +1,1234 @@ +/* cavium_nitrox.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_CAVIUM + +#include +#include +#include +#include +#ifndef NO_RSA + #include +#endif +#ifndef NO_AES + #include +#endif +#ifdef HAVE_ECC + #include +#endif +#include + +#include +#ifndef HAVE_CAVIUM_V + #include "cavium_ioctl.h" +#else + #include "cavium_sym_crypto.h" + #include "cavium_asym_crypto.h" +#endif +#include +#include /* For ntohs */ + +static CspHandle mLastDevHandle = INVALID_DEVID; + +#ifndef NITROX_MAX_BUF_LEN + /* max buffer pool size is 32768, but need to leave room for request */ + #define NITROX_MAX_BUF_LEN (32768U / 2) +#endif + +int NitroxTranslateResponseCode(int ret) +{ + switch (ret) { + case EAGAIN: + case ERR_REQ_PENDING: + case REQUEST_PENDING: + ret = WC_PENDING_E; + break; + case ERR_REQ_TIMEOUT: + ret = WC_TIMEOUT_E; + break; + case ERR_DATA_LEN_INVALID: + ret = BAD_FUNC_ARG; + break; + case ERR_ECC_SIGNATURE_MISMATCH: + ret = SIG_VERIFY_E; + break; + case ERR_PKCS_DECRYPT_INCORRECT: + ret = ASN_SIG_CONFIRM_E; /* RSA_PAD_E */ + break; + case ERR_GC_ICV_MISCOMPARE: + ret = AES_GCM_AUTH_E; + break; + case 0: + case 1: + ret = 0; /* treat as success */ + break; + default: + printf("NitroxTranslateResponseCode Unknown ret=0x%x\n", ret); + ret = ASYNC_INIT_E; + } + return ret; +} + +static WC_INLINE void NitroxDevClear(WC_ASYNC_DEV* dev) +{ + /* values that must be reset prior to calling algo */ + /* this is because operation may complete before added to event list */ + dev->event.ret = WC_PENDING_E; + dev->event.state = WOLF_EVENT_STATE_PENDING; + dev->event.reqId = 0; +} + +CspHandle NitroxGetDeviceHandle(void) +{ + return mLastDevHandle; +} + +CspHandle NitroxOpenDevice(int dma_mode, int dev_id) +{ + mLastDevHandle = INVALID_DEVID; + +#ifdef HAVE_CAVIUM_V + (void)dma_mode; + + if (CspInitialize(dev_id, &mLastDevHandle)) { + return -1; + } + +#else + Csp1CoreAssignment core_assign; + Uint32 device; + + if (CspInitialize(CAVIUM_DIRECT, CAVIUM_DEV_ID)) { + return -1; + } + if (Csp1GetDevType(&device)) { + return -1; + } + if (device != NPX_DEVICE) { + if (ioctl(gpkpdev_hdlr[CAVIUM_DEV_ID], IOCTL_CSP1_GET_CORE_ASSIGNMENT, + (Uint32 *)&core_assign)!= 0) { + return -1; + } + } + CspShutdown(CAVIUM_DEV_ID); + + mLastDevHandle = CspInitialize(dma_mode, dev_id); + if (mLastDevHandle == 0) { + mLastDevHandle = dev_id; + } + +#endif /* HAVE_CAVIUM_V */ + + return mLastDevHandle; +} + +CspHandle NitroxOpenDeviceDefault(void) +{ + return NitroxOpenDevice(CAVIUM_DIRECT, CAVIUM_DEV_ID); +} + + +int NitroxAllocContext(WC_ASYNC_DEV* dev, CspHandle devId, + context_type_t type) +{ + int ret; + + if (dev == NULL) { + return -1; + } + + /* If invalid handle provided, use last open one */ + if (devId == INVALID_DEVID) { + devId = NitroxGetDeviceHandle(); + } + +#ifdef HAVE_CAVIUM_V + ret = CspAllocContext(devId, type, &dev->nitrox.contextHandle); +#else + ret = CspAllocContext(type, &dev->nitrox.contextHandle, devId); +#endif + if (ret != 0) { + return -1; + } + + dev->nitrox.type = type; + dev->nitrox.devId = devId; + + return 0; +} + +void NitroxFreeContext(WC_ASYNC_DEV* dev) +{ + if (dev == NULL) { + return; + } + +#ifdef HAVE_CAVIUM_V + CspFreeContext(dev->nitrox.devId, dev->nitrox.type, + dev->nitrox.contextHandle); +#else + CspFreeContext(dev->nitrox.type, dev->nitrox.contextHandle, + dev->nitrox.devId); +#endif +} + +void NitroxCloseDevice(CspHandle devId) +{ + if (devId >= 0) { + CspShutdown(devId); + } +} + +#if defined(WOLFSSL_ASYNC_CRYPT) + +int NitroxCheckRequest(WC_ASYNC_DEV* dev, WOLF_EVENT* event) +{ + int ret = BAD_FUNC_ARG; + if (dev && event) { + ret = CspCheckForCompletion(dev->nitrox.devId, event->reqId); + event->ret = NitroxTranslateResponseCode(ret); + } + return ret; +} + +int NitroxCheckRequests(WC_ASYNC_DEV* dev, + CspMultiRequestStatusBuffer* req_stat_buf) +{ + int ret; + + if (dev == NULL || req_stat_buf == NULL) + return BAD_FUNC_ARG; + +#ifdef HAVE_CAVIUM_V + ret = CspGetAllResults(req_stat_buf, dev->nitrox.devId); +#else + word32 res_count = 0; + word32 buf_size = sizeof(req_stat_buf->req); + ret = CspGetAllResults(req_stat_buf->req, buf_size, &res_count, + dev->nitrox.devId); + multi_req->count = res_count; +#endif + + return NitroxTranslateResponseCode(ret); +} + + +#ifndef NO_RSA + +int NitroxRsaExptMod(const byte* in, word32 inLen, + byte* exponent, word32 expLen, + byte* modulus, word32 modLen, + byte* out, word32* outLen, RsaKey* key) +{ + int ret; + + if (key == NULL || in == NULL || inLen == 0 || exponent == NULL || + modulus == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + + (void)outLen; + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspMe(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, + CAVIUM_DPORT, modLen, expLen, inLen, modulus, exponent, (Uint8*)in, + out, &key->asyncDev.nitrox.reqId); + #if 0 + /* TODO: Try MeCRT */ + ret = CspMeCRT(); + #endif +#else + /* Not implemented/supported */ + ret = NOT_COMPILED_IN; +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRsaExptMod: ret %x, req %lx in %p (%d), out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, in, inLen, out, *outLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return ret; +} + +int NitroxRsaPublicEncrypt(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + int ret; + + if (key == NULL || in == NULL || out == NULL || + outLen < (word32)key->n.raw.len) { + return BAD_FUNC_ARG; + } + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspPkcs1v15Enc(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, BT2, key->n.raw.len, key->e.raw.len, + (word16)inLen, key->n.raw.buf, key->e.raw.buf, (byte*)in, out, + &key->asyncDev.nitrox.reqId); +#else + ret = CspPkcs1v15Enc(CAVIUM_REQ_MODE, BT2, key->n.raw.len, key->e.raw.len, + (word16)inLen, key->n.raw.buf, key->e.raw.buf, (byte*)in, out, + &key->asyncDev.nitrox.reqId, key->asyncDev.nitrox.devId); +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRsaPublicEncrypt: ret %x, req %lx in %p (%d), out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, in, inLen, out, outLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return key->n.raw.len; +} + + +int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen, byte* out, + word32* outLen, RsaKey* key) +{ + int ret; + + if (key == NULL || in == NULL || out == NULL || + inLen != (word32)key->n.raw.len) { + return BAD_FUNC_ARG; + } + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspPkcs1v15CrtDec(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, BT2, key->n.raw.len, key->q.raw.buf, + key->dQ.raw.buf, key->p.raw.buf, key->dP.raw.buf, key->u.raw.buf, + (byte*)in, (Uint16*)outLen, out, &key->asyncDev.nitrox.reqId); +#else + ret = CspPkcs1v15CrtDec(CAVIUM_REQ_MODE, BT2, key->n.raw.len, + key->q.raw.buf, key->dQ.raw.buf, key->p.raw.buf, key->dP.raw.buf, + key->u.raw.buf, (byte*)in, &outLen, out, &key->asyncDev.nitrox.reqId, + key->asyncDev.nitrox.devId); +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRsaPrivateDecrypt: ret %x, req %lx in %p (%d), out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, in, inLen, out, *outLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + *outLen = ntohs(*outLen); + + return *outLen; +} + + +int NitroxRsaSSL_Sign(const byte* in, word32 inLen, byte* out, + word32 outLen, RsaKey* key) +{ + int ret; + + if (key == NULL || in == NULL || out == NULL || inLen == 0 || outLen < + (word32)key->n.raw.len) { + return BAD_FUNC_ARG; + } + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspPkcs1v15CrtEnc(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, BT1, key->n.raw.len, (word16)inLen, + key->q.raw.buf, key->dQ.raw.buf, key->p.raw.buf, key->dP.raw.buf, + key->u.raw.buf, (byte*)in, out, &key->asyncDev.nitrox.reqId); +#else + ret = CspPkcs1v15CrtEnc(CAVIUM_REQ_MODE, BT1, key->n.raw.len, (word16)inLen, + key->q.raw.buf, key->dQ.raw.buf, key->p.raw.buf, key->dP.raw.buf, + key->u.raw.buf, (byte*)in, out, &key->asyncDev.nitrox.reqId, + key->asyncDev.nitrox.devId); +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRsaSSL_Sign: ret %x, req %lx in %p (%d), out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, in, inLen, out, outLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return key->n.raw.len; +} + + +int NitroxRsaSSL_Verify(const byte* in, word32 inLen, byte* out, + word32* outLen, RsaKey* key) +{ + int ret; + + if (key == NULL || in == NULL || out == NULL || + inLen != (word32)key->n.raw.len) { + return BAD_FUNC_ARG; + } + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspPkcs1v15Dec(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, BT1, key->n.raw.len, key->e.raw.len, + key->n.raw.buf, key->e.raw.buf, (byte*)in, (Uint16*)outLen, out, + &key->asyncDev.nitrox.reqId); +#else + ret = CspPkcs1v15Dec(CAVIUM_REQ_MODE, BT1, key->n.raw.len, key->e.raw.len, + key->n.raw.buf, key->e.raw.buf, (byte*)in, &outLen, out, + &key->asyncDev.nitrox.reqId, key->asyncDev.nitrox.devId); +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRsaSSL_Verify: ret %x, req %lx in %p (%d), out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, in, inLen, out, *outLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + *outLen = ntohs(*outLen); + + return *outLen; +} +#endif /* !NO_RSA */ + + + +#if defined(HAVE_ECC) && defined(HAVE_CAVIUM_V) + + +static int NitroxEccGetCid(ecc_key* key, CurveId* cid) +{ + int ret = 0; + + if (key == NULL || key->dp == NULL) + return BAD_FUNC_ARG; + + switch (key->dp->id) { + #if 0 /* ECDH P521 appears to be broken on Nitrox V v1.4 SDK */ + case ECC_SECP521R1: + *cid = P521; + break; + #endif + case ECC_SECP384R1: + *cid = P384; + break; + case ECC_SECP256R1: + *cid = P256; + break; + case ECC_SECP224R1: + *cid = P224; + break; + case ECC_SECP192R1: + *cid = P192; + break; + default: + ret = BAD_FUNC_ARG; + break; + } + + return ret; +} +int NitroxEccIsCurveSupported(ecc_key* key) +{ + CurveId cid; + return NitroxEccGetCid(key, &cid) == 0 ? 1 : 0; +} + +int NitroxEccGetSize(ecc_key* key) +{ + return ROUNDUP8(key->dp->size); +} + +int NitroxEccPad(WC_BIGINT* bi, word32 padTo) +{ + if (bi->len < padTo) { + int x = padTo - bi->len; + XMEMCPY(bi->buf + x, bi->buf, bi->len); + XMEMSET(bi->buf, 0, x); + bi->len = padTo; + } + return 0; +} + +int NitroxEccRsSplit(ecc_key* key, WC_BIGINT* r, WC_BIGINT* s) +{ + if (NitroxEccIsCurveSupported(key)) { + int rSz = NitroxEccGetSize(key); + + /* split r and s */ + XMEMCPY(s->buf, r->buf + rSz, key->dp->size); + XMEMSET(r->buf + key->dp->size, 0, key->dp->size); + r->len = key->dp->size; + s->len = key->dp->size; + } + return 0; +} + +#ifdef HAVE_ECC_DHE +int NitroxEcdh(ecc_key* key, + WC_BIGINT* k, WC_BIGINT* xG, WC_BIGINT* yG, + byte* out, word32* outlen, WC_BIGINT* q) +{ + int ret; + CurveId cid; + word32 curveSz; + + ret = NitroxEccGetCid(key, &cid); + if (ret < 0) + return ret; + + /* out buffer requires spaces for X and Y even though only X is used */ + curveSz = NitroxEccGetSize(key); + if (*outlen < curveSz * 2) + return BUFFER_E; + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + + ret = CspECPointMul(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, cid, + xG->buf, yG->buf, q->buf, k->len, k->buf, out, out + curveSz, + &key->asyncDev.nitrox.reqId); + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxEcdh: ret %x, req %lx out %p (%d)\n", + ret, key->asyncDev.nitrox.reqId, out, *outlen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return ret; +} +#endif /* HAVE_ECC_DHE */ + +#ifdef HAVE_ECC_SIGN +int NitroxEcdsaSign(ecc_key* key, + WC_BIGINT* m, WC_BIGINT* d, WC_BIGINT* k, + WC_BIGINT* r, WC_BIGINT* s, WC_BIGINT* q, WC_BIGINT* n) +{ + int ret; + CurveId cid; + + ret = NitroxEccGetCid(key, &cid); + if (ret < 0) + return ret; + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + + (void)s; /* placed at end of R */ + + ret = CspECDSASign(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, cid, q->buf, n->buf, k->len, k->buf, + m->len, m->buf, d->buf, r->buf, &key->asyncDev.nitrox.reqId); + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxEcdsaSign: ret %x, req %lx msg %p (%d), r %p\n", + ret, key->asyncDev.nitrox.reqId, m->buf, m->len, r->buf); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return ret; +} +#endif /* HAVE_ECC_SIGN */ + +#ifdef HAVE_ECC_VERIFY +int NitroxEcdsaVerify(ecc_key* key, + WC_BIGINT* m, WC_BIGINT* xp, WC_BIGINT* yp, + WC_BIGINT* r, WC_BIGINT* s, + WC_BIGINT* q, WC_BIGINT* n, int* stat) +{ + int ret; + CurveId cid; + int curveSz = key->dp->size; + + ret = NitroxEccGetCid(key, &cid); + if (ret < 0) + return ret; + + /* init return codes */ + NitroxDevClear(&key->asyncDev); + + /* adjust r and s for leading zero pad */ + NitroxEccPad(r, curveSz); + NitroxEccPad(s, curveSz); + + ret = CspECDSAVerify(key->asyncDev.nitrox.devId, CAVIUM_REQ_MODE, + CAVIUM_SSL_GRP, CAVIUM_DPORT, cid, r->buf, s->buf, m->len, m->buf, + n->buf, q->buf, xp->buf, yp->buf, &key->asyncDev.nitrox.reqId); + + /* hardware will ret failure if verify fails */ + *stat = 1; + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxEcdsaVerify: ret %x, req %lx msg %p (%d), r %p, s%p\n", + ret, key->asyncDev.nitrox.reqId, m->buf, m->len, r->buf, s->buf); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return ret; +} +#endif /* HAVE_ECC_VERIFY */ +#endif /* HAVE_ECC */ + + +#ifndef NO_AES + +#if defined(HAVE_AES_CBC) || defined(HAVE_AESGCM) + +static int NitroxAesGetType(Aes* aes, AesType* type) +{ + int ret = 0; + switch (aes->keylen) { + case 16: + *type = AES_128_BIT; + break; + case 24: + *type = AES_192_BIT; + break; + case 32: + *type = AES_256_BIT; + break; + default: + ret = BAD_FUNC_ARG; + break; + } + return ret; +} + +static int NitroxAesEncrypt(Aes* aes, int aes_algo, + const byte* key, const byte* iv, + byte* out, const byte* in, word32 length, + word32 aad_len, const byte* aad, byte* tag) +{ + int ret = 0, cav_ret = 0; + int offset = 0; + AesType aes_type; + const int blockMode = CAVIUM_BLOCKING; + + ret = NitroxAesGetType(aes, &aes_type); + if (ret != 0) { + return ret; + } + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&aes->asyncDev); + + while (length > 0) { + word32 slen = length; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + #ifdef HAVE_CAVIUM_V + cav_ret = CspEncryptAes(aes->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + aes->asyncDev.nitrox.contextHandle, FROM_DPTR, FROM_CTX, aes_algo, + aes_type, (byte*)key, (byte*)iv, aad_len, (byte*)aad, (byte*)tag, + (word16)slen, (byte*)in + offset, out + offset, + &aes->asyncDev.nitrox.reqId); + #else + if (aes_type != AES_CBC) { + ret = NOT_COMPILED_IN; + break; + } + + (void)aad_len; + (void)aad; + (void)tag; + + cav_ret = CspEncryptAes(blockMode, aes->asyncDev.nitrox.contextHandle, + CAVIUM_NO_UPDATE, aes_type, + (word16)slen, (byte*)in + offset, out + offset, + (byte*)iv, (byte*)key, + &aes->asyncDev.nitrox.reqId, aes->asyncDev.nitrox.devId); + #endif + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + + length -= slen; + offset += slen; + + XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxAesEncrypt: ret %x (%d), algo %d, in %p, out %p, sz %d, " + "iv %p, aad %p (%d), tag %p\n", + cav_ret, ret, aes_algo, in, out, offset, iv, aad, aad_len, tag); +#endif + + return ret; +} + +#ifdef HAVE_AES_DECRYPT +static int NitroxAesDecrypt(Aes* aes, int aes_algo, + const byte* key, const byte* iv, + byte* out, const byte* in, word32 length, + word32 aad_len, const byte* aad, const byte* tag) +{ + int ret = 0, cav_ret = 0; + int offset = 0; + AesType aes_type; + const int blockMode = CAVIUM_BLOCKING; + + ret = NitroxAesGetType(aes, &aes_type); + if (ret != 0) { + return ret; + } + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&aes->asyncDev); + + while (length > 0) { + word32 slen = length; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + #ifdef HAVE_CAVIUM_V + cav_ret = CspDecryptAes(aes->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + aes->asyncDev.nitrox.contextHandle, FROM_DPTR, FROM_CTX, aes_algo, + aes_type, (byte*)key, (byte*)iv, aad_len, (byte*)aad, (byte*)tag, + (word16)slen, (byte*)in + offset, out + offset, + &aes->asyncDev.nitrox.reqId); + #else + if (aes_type != AES_CBC) { + ret = NOT_COMPILED_IN; + break; + } + + (void)aad_len; + (void)aad; + (void)tag; + + cav_ret = CspDecryptAes(blockMode, aes->asyncDev.nitrox.contextHandle, + CAVIUM_NO_UPDATE, aes_sz_type, (word16)slen, (byte*)in + offset, + out + offset, (byte*)iv, (byte*)key, + &aes->asyncDev.nitrox.reqId, aes->asyncDev.nitrox.devId); + #endif + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + length -= slen; + offset += slen; + + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxAesDecrypt: ret %x (%d), algo %d, in %p, out %p, sz %d, " + "iv %p, aad %p (%d), tag %p\n", + cav_ret, ret, aes_algo, in, out, offset, iv, aad, aad_len, tag); +#endif + + return ret; +} +#endif /* HAVE_AES_DECRYPT */ + +#ifdef HAVE_AES_CBC +int NitroxAesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 length) +{ + return NitroxAesEncrypt(aes, AES_CBC, + (byte*)aes->devKey, (byte*)aes->reg, + out, in, length, 0, NULL, NULL); +} + +#ifdef HAVE_AES_DECRYPT +int NitroxAesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 length) +{ + return NitroxAesDecrypt(aes, AES_CBC, + (byte*)aes->devKey, (byte*)aes->reg, + out, in, length, 0, NULL, NULL); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_CBC */ + +#ifdef HAVE_AESGCM +int NitroxAesGcmEncrypt(Aes* aes, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + const byte* ivTmp = iv; + byte ivLcl[AES_BLOCK_SIZE]; + + (void)keySz; + (void)authTagSz; + + /* Nitrox HW requires IV buffer to be 16-bytes */ + if (ivSz < AES_BLOCK_SIZE) { + ivTmp = ivLcl; + XMEMCPY(ivLcl, iv, ivSz); + } + + return NitroxAesEncrypt(aes, AES_GCM, key, ivTmp, out, in, sz, + authInSz, authIn, authTag); +} + +#ifdef HAVE_AES_DECRYPT +int NitroxAesGcmDecrypt(Aes* aes, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + const byte* ivTmp = iv; + byte ivLcl[AES_BLOCK_SIZE]; + + (void)keySz; + (void)authTagSz; + + /* Nitrox HW requires IV buffer to be 16-bytes */ + if (ivSz < AES_BLOCK_SIZE) { + ivTmp = ivLcl; + XMEMCPY(ivLcl, iv, ivSz); + } + + return NitroxAesDecrypt(aes, AES_GCM, key, ivTmp, out, in, sz, + authInSz, authIn, authTag); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESGCM */ + +#endif /* HAVE_AES_CBC || HAVE_AESGCM */ +#endif /* !NO_AES */ + + +#if !defined(NO_RC4) && !defined(HAVE_CAVIUM_V) +int NitroxArc4SetKey(Arc4* arc4, const byte* key, word32 length) +{ + if (CspInitializeRc4(CAVIUM_BLOCKING, arc4->asyncDev.nitrox.contextHandle, + length, (byte*)key, &arc4->asyncDev.nitrox.reqId, arc4->devId) != 0) { + WOLFSSL_MSG("Bad Cavium Arc4 Init"); + return ASYNC_INIT_E; + } + return 0; +} + +int NitroxArc4Process(Arc4* arc4, byte* out, const byte* in, word32 length) +{ + int ret = 0, cav_ret = 0; + int offset = 0; + const int blockMode = CAVIUM_BLOCKING; + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&arc4->asyncDev); + + while (length > 0) { + word32 slen = length; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + cav_ret = CspEncryptRc4(blockMode, + arc4->asyncDev.nitrox.contextHandle, CAVIUM_UPDATE, (word16)slen, + (byte*)in + offset, out + offset, + &arc4->asyncDev.nitrox.reqId, arc4->devId); + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + + length -= slen; + offset += slen; + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxArc4Process: ret %x (%d), in %p, output %p, sz %d\n", + cav_ret, ret, in, output, offset); +#endif + + return ret; +} +#endif /* !NO_RC4 && !HAVE_CAVIUM_V */ + + +#ifndef NO_DES3 +int NitroxDes3CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 length) +{ + int ret = 0, cav_ret = 0; + int offset = 0; + const int blockMode = CAVIUM_BLOCKING; + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&des3->asyncDev); + + while (length > 0) { + word32 slen = length; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + #ifdef HAVE_CAVIUM_V + cav_ret = CspEncrypt3Des(des3->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + des3->asyncDev.nitrox.contextHandle, FROM_DPTR, FROM_CTX, DES3_CBC, + (byte*)des3->devKey, (byte*)des3->reg, (word16)slen, + (byte*)in + offset, out + offset, &des3->asyncDev.nitrox.reqId); + #else + cav_ret = CspEncrypt3Des(blockMode, + des3->asyncDev.nitrox.contextHandle, CAVIUM_NO_UPDATE, (word16)slen, + (byte*)in + offset, out + offset, (byte*)des3->reg, + (byte*)des3->devKey, &des3->asyncDev.nitrox.reqId, + des3->asyncDev.nitrox.devId); + #endif + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + length -= slen; + offset += slen; + + XMEMCPY(des3->reg, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxDes3CbcEncrypt: ret %x (%d), in %p, out %p, sz %d\n", + cav_ret, ret, in, out, offset); +#endif + + return ret; +} + +int NitroxDes3CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 length) +{ + int ret = 0, cav_ret = 0; + int offset = 0; + const int blockMode = CAVIUM_BLOCKING; + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&des3->asyncDev); + + while (length > 0) { + word32 slen = length; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + + #ifdef HAVE_CAVIUM_V + cav_ret = CspDecrypt3Des(des3->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + des3->asyncDev.nitrox.contextHandle, FROM_DPTR, FROM_CTX, DES3_CBC, + (byte*)des3->devKey, (byte*)des3->reg, (word16)slen, + (byte*)in + offset, out + offset, &des3->asyncDev.nitrox.reqId); + #else + cav_ret = CspDecrypt3Des(blockMode, + des3->asyncDev.nitrox.contextHandle, CAVIUM_NO_UPDATE, (word16)slen, + (byte*)in + offset, out + offset, (byte*)des3->reg, + (byte*)des3->devKey, &des3->asyncDev.nitrox.reqId, + des3->asyncDev.nitrox.devId); + #endif + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + length -= slen; + offset += slen; + + XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE); + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxDes3CbcDecrypt: ret %x (%d), in %p, out %p, sz %d\n", + cav_ret, ret, in, out, offset); +#endif + + return ret; +} +#endif /* !NO_DES3 */ + + +#ifndef NO_HMAC +static int NitroxHmacGetType(int type) +{ + int cav_type = -1; + + /* Determine Cavium HashType */ + switch(type) { + #ifndef NO_MD5 + case WC_MD5: + cav_type = MD5_TYPE; + break; + #endif + #ifndef NO_SHA + case WC_SHA: + cav_type = SHA1_TYPE; + break; + #endif + #ifndef NO_SHA256 + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + #ifdef HAVE_CAVIUM_V + cav_type = SHA2_SHA224; + #else + cav_type = SHA224_TYPE; + #endif + break; + #endif /* WOLFSSL_SHA224 */ + case WC_SHA256: + #ifdef HAVE_CAVIUM_V + cav_type = SHA2_SHA256; + #else + cav_type = SHA256_TYPE; + #endif + break; + #endif +#ifdef HAVE_CAVIUM_V + #ifdef WOLFSSL_SHA512 + case WC_SHA512: + #ifdef HAVE_CAVIUM_V + cav_type = SHA2_SHA512; + #else + cav_type = SHA512_TYPE; + #endif + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + #ifdef HAVE_CAVIUM_V + cav_type = SHA2_SHA384; + #else + cav_type = SHA384_TYPE; + #endif + break; + #endif + #ifdef WOLFSSL_SHA3 + case WC_SHA3_224: + cav_type = SHA3_SHA224; + break; + case WC_SHA3_256: + cav_type = SHA3_SHA256; + break; + case WC_SHA3_384: + cav_type = SHA3_SHA384; + break; + case WC_SHA3_512: + cav_type = SHA3_SHA512; + break; + #endif /* WOLFSSL_SHA3 */ +#endif /* HAVE_CAVIUM_V */ + default: + WOLFSSL_MSG("unsupported cavium hmac type"); + cav_type = -1; + break; + } + + return cav_type; +} + +int NitroxHmacUpdate(Hmac* hmac, const byte* msg, word32 length) +{ + int ret; + int cav_type = NitroxHmacGetType(hmac->macType); + const int blockMode = CAVIUM_BLOCKING; + + if (cav_type == -1) { + return NOT_COMPILED_IN; + } + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&hmac->asyncDev); + + if (hmac->innerHashKeyed == 0) { /* starting new */ + #ifdef HAVE_CAVIUM_V + int digest_sz = wc_HmacSizeByType(hmac->macType); + ret = CspHmacStart(hmac->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + hmac->asyncDev.nitrox.contextHandle, cav_type, + hmac->keyLen, (byte*)hmac->ipad, length, (Uint8*)msg, + digest_sz, &hmac->asyncDev.nitrox.reqId); + #else + ret = CspHmacStart(blockMode, hmac->asyncDev.nitrox.contextHandle, + cav_type, hmac->keyLen, (byte*)hmac->ipad, length, msg, + &hmac->asyncDev.nitrox.reqId, hmac->asyncDev.nitrox.devId); + #endif + + hmac->innerHashKeyed = 1; + } + else { + /* do update */ + + #ifdef HAVE_CAVIUM_V + ret = CspHmacUpdate(hmac->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + hmac->asyncDev.nitrox.contextHandle, cav_type, + length, (Uint8*)msg, &hmac->asyncDev.nitrox.reqId); + #else + ret = CspHmacUpdate(blockMode, hmac->asyncDev.nitrox.contextHandle, + cav_type, length, msg, + &hmac->asyncDev.nitrox.reqId, hmac->asyncDev.nitrox.devId); + #endif + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxHmacUpdate: ret %x, msg %p, length %d\n", ret, msg, length); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + return 0; +} + +int NitroxHmacFinal(Hmac* hmac, byte* hash, word16 hashLen) +{ + int ret; + int cav_type = NitroxHmacGetType(hmac->macType); + const int blockMode = CAVIUM_BLOCKING; + + if (cav_type == -1) { + return NOT_COMPILED_IN; + } + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&hmac->asyncDev); + +#ifdef HAVE_CAVIUM_V + ret = CspHmacFinish(hmac->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, + hmac->asyncDev.nitrox.contextHandle, cav_type, + 0, NULL, hashLen, hash, + &hmac->asyncDev.nitrox.reqId); +#else + (void)hashLen; + ret = CspHmacFinish(blockMode, hmac->asyncDev.nitrox.contextHandle, + cav_type, 0, NULL, hash, + &hmac->asyncDev.nitrox.reqId, hmac->asyncDev.nitrox.devId); +#endif + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxHmacFinal: ret %x, hash %p, hashLen %d\n", + ret, hash, hashLen); +#endif + + ret = NitroxTranslateResponseCode(ret); + if (ret != 0) { + return ret; + } + + hmac->innerHashKeyed = 0; /* tell update to start over if used again */ + + return ret; +} +#endif /* !NO_HMAC */ + +int NitroxRngGenerateBlock(WC_RNG* rng, byte* output, word32 sz) +{ + int ret = 0, cav_ret = 0; + word32 offset = 0; + CavReqId requestId; + const int blockMode = CAVIUM_BLOCKING; + + /* init return codes */ + if (blockMode == CAVIUM_REQ_MODE) + NitroxDevClear(&rng->asyncDev); + + while (sz > 0) { + word32 slen = sz; + if (slen > NITROX_MAX_BUF_LEN) + slen = NITROX_MAX_BUF_LEN; + + #ifdef HAVE_CAVIUM_V + cav_ret = CspTrueRandom(rng->asyncDev.nitrox.devId, blockMode, + DMA_DIRECT_DIRECT, CAVIUM_SSL_GRP, CAVIUM_DPORT, (word16)slen, + output + offset, &requestId); + #else + cav_ret = CspRandom(blockMode, (word16)slen, output + offset, + &requestId, rng->asyncDev.nitrox.devId); + #endif + ret = NitroxTranslateResponseCode(cav_ret); + if (ret != 0) { + break; + } + + sz -= slen; + offset += slen; + } + +#ifdef WOLFSSL_NITROX_DEBUG + printf("NitroxRngGenerateBlock: ret %x (%d), output %p, sz %d\n", + cav_ret, ret, output, offset); +#endif + + return ret; +} + +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#endif /* HAVE_CAVIUM */ diff --git a/wolfcrypt/src/port/intel/README.md b/wolfcrypt/src/port/intel/README.md index 4b5d971ba..4d8aefd52 100644 --- a/wolfcrypt/src/port/intel/README.md +++ b/wolfcrypt/src/port/intel/README.md @@ -1,3 +1,411 @@ # Intel QuickAssist Adapter Asynchronous Support -Please contact wolfSSL at info@wolfssl.com to request an evaluation. +The wolfSSL / wolfCrypt libraries support hardware crypto acceleration using the Intel QuickAssist adapter. This software has been tested using the Intel DH8970 and DH8950 QuickAssist adapters. + +## Overview + +Support has been added for wolfCrypt for RSA public/private (CRT/non-CRT), AES CBC/GCM, ECDH/ECDSA, DH, DES3, SHA, SHA224, SHA256, SHA384, SHA512, MD5 and HMAC. RSA padding is done via software. The wolfCrypt tests and benchmarks have asynchronous support. The wolfCrypt benchmark tool support multi-threading. The wolfSSL SSL/TLS async support has been extended to include all PKI, Encryption/Decryption and hashing/HMAC. An async hardware simulator has been added to test the asynchronous support without hardware. + +The Intel QuickAssist port files are located in `wolfcrypt/src/port/intel/quickassist.c` and `wolfssl/wolfcrypt/port/intel/quickassist.h`. The QuickAssist memory handling for NUMA and normal malloc is in `wolfcrypt/src/port/intel/quickassist_mem.c`. + +The asynchronous crypto files are located at `wolfcrypt/src/async.c` and `wolfssl/wolfcrypt/async.h`. These files are not in the public repository. Please contact info@wolfssl.com if interested in our asynchronous support to request an evaluation. + + +## Building + +1. Download Driver: The latest driver for QAT can be found here: https://www.intel.com/content/www/us/en/download/19734 + +2. Notes: + +* If you have the older driver installed you may need to remove it or unload the module and reboot. +* If you are using the QAT hardware hashing, you may need to disable the params checking, which doesn't support a last partial with 0 length source input. Code runs and works, but parameter checking will fail. +Use `./configure --disable-param-check && sudo make install` +* If you want to use legacy algorithms like RSA 1024 bit then Use `./configure --enable-legacy-algorithms` +* Recommend not using `make -j` due to synchronization issues on dependencies. + +3. Setup `QAT` and `wolfssl` next to each other in the same folder. + +4. Build QAT Driver + +Prerequisites Ubuntu: +`sudo apt-get install libudev-dev pciutils-dev g++ pkg-config libssl-dev` +OR +Prerequisites CentOS: +`sudo yum install pciutils libudev-devel kernel-devel-$(uname -r) gcc openssl-devel` + +```sh +mkdir QAT +cd QAT +tar -zxof QAT.L.4.23.0-00001.tar.gz +./configure +sudo make install +... +There is 3 QAT acceleration device(s) in the system: + qat_dev0 - type: c6xx, inst_id: 0, node_id: 1, bsf: 0000:84:00.0, #accel: 5 #engines: 10 state: up + qat_dev1 - type: c6xx, inst_id: 1, node_id: 1, bsf: 0000:85:00.0, #accel: 5 #engines: 10 state: up + qat_dev2 - type: c6xx, inst_id: 2, node_id: 1, bsf: 0000:86:00.0, #accel: 5 #engines: 10 state: up +``` + +```sh +$ lspci -d 8086: | grep QuickAssist +84:00.0 Co-processor: Intel Corporation C62x Chipset QuickAssist Technology (rev 04) +85:00.0 Co-processor: Intel Corporation C62x Chipset QuickAssist Technology (rev 04) +86:00.0 Co-processor: Intel Corporation C62x Chipset QuickAssist Technology (rev 04) +``` + +5. Build wolfSSL: + +```sh +cd ../wolfssl +./configure --with-intelqa=../QAT --enable-asynccrypt +make +``` + + +## Usage + +Running wolfCrypt test and benchmark must be done with `sudo` to allow hardware access. By default the QuickAssist code uses the "SSL" process name via `QAT_PROCESS_NAME` in quickassist.h to match up to the hardware configuration. + +Note: `sudo make check` will fail since default QAT configuration doesn't allow multiple concurrent processes to use hardware. You can run each of the make check scripts individually with sudo. The hardware configuration can be customized by editing the `QAT/build/dh895xcc_qa_dev0.conf` file to allow multiple processes. + +Here are some build options for tuning your use: + +1. `QAT_USE_POLLING_CHECK`: Enables polling check to ensure only one poll per crypto instance. +2. `WC_ASYNC_THREAD_BIND`: Enables binding of thread to crypto hardware instance. +3. `WOLFSSL_DEBUG_MEMORY_PRINT`: Enables verbose malloc/free printing. This option is used along with `WOLFSSL_DEBUG_MEMORY` and `WOLFSSL_TRACK_MEMORY`. +4. `WC_ASYNC_THRESH_NONE`: Disables the default thresholds for determining if software AES/DES3 is used. Otherwise you can define `WC_ASYNC_THRESH_AES_CBC`, `WC_ASYNC_THRESH_AES_GCM` and `WC_ASYNC_THRESH_DES3_CBC` with your own values. The defaults are AES CBC: 1024, AES GCM 128, DES3 1024. If the symmetric operation is over this size it will use QAT hardware. Otherwise software. +5. `WC_ASYNC_NO_CRYPT`: When defined with disable QAT use for AES/DES3. +6. `WC_ASYNC_NO_HASH`: When defined disables the QAT for hashing (MD5,SHA,SHA256,SHA512). +7. `WC_ASYNC_NO_RNG`: When defined disables the QAT DRBG (default for QAT v1.7) +8. `WC_NO_ASYNC_THREADING`: Disables the thread affinity code for optionally linking a thread to a specific QAT instance. To use this feature you must also define `WC_ASYNC_THREAD_BIND`. +9. `WC_ASYNC_BENCH_THREAD_COUNT`: Use specific number of threads for benchmarking. +10. `QAT_HASH_ENABLE_PARTIAL`: Enables partial hashing support, which allows sending blocks to hardware prior to final. Otherwise all hash updates are cached. + +The QuickAssist v1.6 driver uses its own memory management system in `quickassist_mem.c`. This can be tuned using the following defines: + +1. `USE_QAE_STATIC_MEM`: Uses a global pool for the list of allocations. This improves performance, but consumes extra up front memory. The pre-allocation size can be tuned using `QAE_USER_MEM_MAX_COUNT`. +2. `USE_QAE_THREAD_LS` : Uses thread-local-storage and removes the mutex. Can improve performance in multi-threaded environment, but does use extra memory. + +For QuickAssist v1.7 or later the newer usdm memory driver is used directly. + +### Recommended wolfSSL Build Options + +```sh +$ ./configure --with-intelqa=../QAT --enable-asynccrypt \ + --enable-aesni --enable-intelasm \ + --enable-sp --enable-sp-asm \ + CFLAGS="-DWC_ASYNC_NO_HASH" +``` + +* `--with-intelqa=../QAT`: Enables the Intel QuickAssist mode. +* `--enable-asynccrypt`: Enables asynchronous cryptography mode. +* `--enable-aesni`: Enables the Intel AES-NI assembly speedups. +* `--enable-intelasm`: Enables the Intel ASM (AVX/AVX2) speedups. +* `--enable-sp`: Enable Single Precision math to speedup standard key sizes and curves. +* `--enable-sp-asm`: Enable Single Precision assembly speedups. +* `WC_ASYNC_NO_HASH`: Disable the QAT hashing and use Intel AVX accelerated software hashing. Overhead for using QAT hashing is not yet well tuned. + + +### wolfCrypt Test with QAT +``` +sudo ./wolfcrypt/test/testwolfcrypt +IntelQA: Instances 2 +... +RSA test passed! +``` + +### wolfCrypt Benchmark with QAT 8970 (multi-threaded) + +Multiple concurrent threads will be started based on the number of CPU's available. If you want to exclude the software benchmarks use `./configure CFLAGS="-DNO_SW_BENCH"`. + +``` +Intel QuickAssist DH8950 on Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz: + +Recommended wolfSSL build options when benchmarking. +$ ./configure --enable-sp --enable-sp-asm --enable-aesni --enable-intelasm --enable-intelrand --enable-keygen --enable-sha3 --enable-asynccrypt --with-intelqa=../QAT CFLAGS="-DWC_ASYNC_THRESH_NONE -DQAT_MAX_PENDING=40 -DWC_ASYNC_BENCH_THREAD_COUNT=2" +$ make + +$ sudo ./wolfcrypt/benchmark/benchmark -rsa_sign -base10 -threads 2 -print +------------------------------------------------------------------------------ + wolfSSL version 4.5.0 +------------------------------------------------------------------------------ +IntelQA: Instances 18 +wolfCrypt Benchmark (block bytes 1048576, min 1.0 sec each) +CPUs: 2 +RNG SW 79 mB took 1.030 seconds, 76.388 mB/s Cycles per byte = 32.65 +RNG SW 79 mB took 1.042 seconds, 75.456 mB/s Cycles per byte = 33.05 +AES-128-CBC-enc SW 729 mB took 1.006 seconds, 724.266 mB/s Cycles per byte = 3.44 +AES-128-CBC-enc SW 729 mB took 1.007 seconds, 723.825 mB/s Cycles per byte = 3.45 +AES-128-CBC-dec SW 5185 mB took 1.000 seconds, 5184.260 mB/s Cycles per byte = 0.48 +AES-128-CBC-dec SW 5190 mB took 1.000 seconds, 5189.351 mB/s Cycles per byte = 0.48 +AES-192-CBC-enc SW 608 mB took 1.003 seconds, 606.175 mB/s Cycles per byte = 4.11 +AES-192-CBC-enc SW 608 mB took 1.004 seconds, 605.855 mB/s Cycles per byte = 4.12 +AES-192-CBC-dec SW 4325 mB took 1.000 seconds, 4325.333 mB/s Cycles per byte = 0.58 +AES-192-CBC-dec SW 4331 mB took 1.001 seconds, 4325.809 mB/s Cycles per byte = 0.58 +AES-256-CBC-enc SW 524 mB took 1.005 seconds, 521.465 mB/s Cycles per byte = 4.78 +AES-256-CBC-enc SW 524 mB took 1.006 seconds, 521.190 mB/s Cycles per byte = 4.79 +AES-256-CBC-dec SW 3707 mB took 1.000 seconds, 3705.767 mB/s Cycles per byte = 0.67 +AES-256-CBC-dec SW 3707 mB took 1.001 seconds, 3703.024 mB/s Cycles per byte = 0.67 +AES-128-CBC-enc HW 2443 mB took 1.000 seconds, 2442.819 mB/s Cycles per byte = 1.02 +AES-128-CBC-enc HW 2443 mB took 1.000 seconds, 2442.770 mB/s Cycles per byte = 1.02 +AES-128-CBC-dec HW 2380 mB took 1.001 seconds, 2378.716 mB/s Cycles per byte = 1.05 +AES-128-CBC-dec HW 2380 mB took 1.001 seconds, 2378.657 mB/s Cycles per byte = 1.05 +AES-192-CBC-enc HW 2365 mB took 1.002 seconds, 2359.520 mB/s Cycles per byte = 1.06 +AES-192-CBC-enc HW 2365 mB took 1.002 seconds, 2359.471 mB/s Cycles per byte = 1.06 +AES-192-CBC-dec HW 2417 mB took 1.002 seconds, 2411.874 mB/s Cycles per byte = 1.03 +AES-192-CBC-dec HW 2417 mB took 1.002 seconds, 2411.831 mB/s Cycles per byte = 1.03 +AES-256-CBC-enc HW 2223 mB took 1.001 seconds, 2221.082 mB/s Cycles per byte = 1.12 +AES-256-CBC-enc HW 2218 mB took 1.001 seconds, 2215.793 mB/s Cycles per byte = 1.13 +AES-256-CBC-dec HW 2113 mB took 1.002 seconds, 2108.506 mB/s Cycles per byte = 1.18 +AES-256-CBC-dec HW 2113 mB took 1.002 seconds, 2108.354 mB/s Cycles per byte = 1.18 +AES-128-GCM-enc SW 1919 mB took 1.001 seconds, 1916.366 mB/s Cycles per byte = 1.30 +AES-128-GCM-enc SW 2595 mB took 1.001 seconds, 2591.465 mB/s Cycles per byte = 0.96 +AES-128-GCM-dec SW 2611 mB took 1.000 seconds, 2610.093 mB/s Cycles per byte = 0.96 +AES-128-GCM-dec SW 2218 mB took 1.002 seconds, 2213.073 mB/s Cycles per byte = 1.13 +AES-192-GCM-enc SW 2317 mB took 1.001 seconds, 2315.896 mB/s Cycles per byte = 1.08 +AES-192-GCM-enc SW 2286 mB took 1.002 seconds, 2281.953 mB/s Cycles per byte = 1.09 +AES-192-GCM-dec SW 2207 mB took 1.001 seconds, 2206.098 mB/s Cycles per byte = 1.13 +AES-192-GCM-dec SW 1589 mB took 1.002 seconds, 1586.020 mB/s Cycles per byte = 1.57 +AES-256-GCM-enc SW 2071 mB took 1.001 seconds, 2069.342 mB/s Cycles per byte = 1.21 +AES-256-GCM-enc SW 2108 mB took 1.002 seconds, 2103.268 mB/s Cycles per byte = 1.19 +AES-256-GCM-dec SW 2108 mB took 1.001 seconds, 2105.715 mB/s Cycles per byte = 1.18 +AES-256-GCM-dec SW 2108 mB took 1.002 seconds, 2103.563 mB/s Cycles per byte = 1.19 +AES-128-GCM-enc HW 2427 mB took 1.002 seconds, 2422.522 mB/s Cycles per byte = 1.03 +AES-128-GCM-enc HW 2433 mB took 1.002 seconds, 2427.722 mB/s Cycles per byte = 1.03 +AES-128-GCM-dec HW 1861 mB took 1.001 seconds, 1860.039 mB/s Cycles per byte = 1.34 +AES-128-GCM-dec HW 1861 mB took 1.001 seconds, 1860.019 mB/s Cycles per byte = 1.34 +AES-192-GCM-enc HW 2380 mB took 1.000 seconds, 2379.218 mB/s Cycles per byte = 1.05 +AES-192-GCM-enc HW 2386 mB took 1.000 seconds, 2384.418 mB/s Cycles per byte = 1.05 +AES-192-GCM-dec HW 1971 mB took 1.002 seconds, 1966.480 mB/s Cycles per byte = 1.27 +AES-192-GCM-dec HW 1971 mB took 1.002 seconds, 1966.458 mB/s Cycles per byte = 1.27 +AES-256-GCM-enc HW 2254 mB took 1.002 seconds, 2249.535 mB/s Cycles per byte = 1.11 +AES-256-GCM-enc HW 2254 mB took 1.002 seconds, 2249.487 mB/s Cycles per byte = 1.11 +AES-256-GCM-dec HW 1746 mB took 1.001 seconds, 1744.049 mB/s Cycles per byte = 1.43 +AES-256-GCM-dec HW 1746 mB took 1.001 seconds, 1744.018 mB/s Cycles per byte = 1.43 +CHACHA SW 1478 mB took 1.000 seconds, 1478.220 mB/s Cycles per byte = 1.69 +CHACHA SW 1347 mB took 1.003 seconds, 1342.833 mB/s Cycles per byte = 1.86 +CHA-POLY SW 949 mB took 1.002 seconds, 946.915 mB/s Cycles per byte = 2.63 +CHA-POLY SW 949 mB took 1.005 seconds, 944.670 mB/s Cycles per byte = 2.64 +MD5 SW 603 mB took 1.003 seconds, 601.383 mB/s Cycles per byte = 4.15 +MD5 SW 613 mB took 1.005 seconds, 610.413 mB/s Cycles per byte = 4.09 +MD5 HW 409 mB took 1.002 seconds, 408.088 mB/s Cycles per byte = 6.11 +MD5 HW 409 mB took 1.003 seconds, 407.845 mB/s Cycles per byte = 6.12 +POLY1305 SW 2621 mB took 1.000 seconds, 2620.709 mB/s Cycles per byte = 0.95 +POLY1305 SW 2616 mB took 1.001 seconds, 2613.824 mB/s Cycles per byte = 0.95 +SHA SW 377 mB took 1.003 seconds, 376.342 mB/s Cycles per byte = 6.63 +SHA SW 383 mB took 1.011 seconds, 378.592 mB/s Cycles per byte = 6.59 +SHA HW 535 mB took 1.005 seconds, 531.941 mB/s Cycles per byte = 4.69 +SHA HW 535 mB took 1.006 seconds, 531.644 mB/s Cycles per byte = 4.69 +SHA-224 SW 351 mB took 1.010 seconds, 347.715 mB/s Cycles per byte = 7.17 +SHA-224 SW 351 mB took 1.014 seconds, 346.285 mB/s Cycles per byte = 7.20 +SHA-224 HW 414 mB took 1.012 seconds, 409.434 mB/s Cycles per byte = 6.09 +SHA-224 HW 419 mB took 1.012 seconds, 414.387 mB/s Cycles per byte = 6.02 +SHA-256 SW 351 mB took 1.011 seconds, 347.292 mB/s Cycles per byte = 7.18 +SHA-256 SW 315 mB took 1.013 seconds, 310.424 mB/s Cycles per byte = 8.03 +SHA-256 HW 419 mB took 1.004 seconds, 417.688 mB/s Cycles per byte = 5.97 +SHA-256 HW 419 mB took 1.005 seconds, 417.427 mB/s Cycles per byte = 5.98 +SHA-384 SW 530 mB took 1.001 seconds, 529.040 mB/s Cycles per byte = 4.71 +SHA-384 SW 530 mB took 1.003 seconds, 528.139 mB/s Cycles per byte = 4.72 +SHA-384 HW 357 mB took 1.001 seconds, 356.156 mB/s Cycles per byte = 7.00 +SHA-384 HW 367 mB took 1.010 seconds, 363.498 mB/s Cycles per byte = 6.86 +SHA-512 SW 530 mB took 1.002 seconds, 528.589 mB/s Cycles per byte = 4.72 +SHA-512 SW 446 mB took 1.009 seconds, 441.540 mB/s Cycles per byte = 5.65 +SHA-512 HW 367 mB took 1.004 seconds, 365.434 mB/s Cycles per byte = 6.83 +SHA-512 HW 367 mB took 1.005 seconds, 365.224 mB/s Cycles per byte = 6.83 +SHA3-224 SW 236 mB took 1.014 seconds, 232.784 mB/s Cycles per byte = 10.71 +SHA3-224 SW 236 mB took 1.018 seconds, 231.794 mB/s Cycles per byte = 10.76 +SHA3-224 HW 220 mB took 1.006 seconds, 218.860 mB/s Cycles per byte = 11.40 +SHA3-224 HW 236 mB took 1.015 seconds, 232.538 mB/s Cycles per byte = 10.73 +SHA3-256 SW 163 mB took 1.000 seconds, 162.463 mB/s Cycles per byte = 15.35 +SHA3-256 SW 225 mB took 1.023 seconds, 220.278 mB/s Cycles per byte = 11.32 +SHA3-256 HW 692 mB took 1.004 seconds, 689.291 mB/s Cycles per byte = 3.62 +SHA3-256 HW 692 mB took 1.007 seconds, 687.092 mB/s Cycles per byte = 3.63 +SHA3-384 SW 173 mB took 1.022 seconds, 169.214 mB/s Cycles per byte = 14.74 +SHA3-384 SW 173 mB took 1.024 seconds, 168.878 mB/s Cycles per byte = 14.77 +SHA3-384 HW 173 mB took 1.023 seconds, 169.202 mB/s Cycles per byte = 14.74 +SHA3-384 HW 173 mB took 1.024 seconds, 168.948 mB/s Cycles per byte = 14.76 +SHA3-512 SW 121 mB took 1.026 seconds, 117.548 mB/s Cycles per byte = 21.22 +SHA3-512 SW 121 mB took 1.027 seconds, 117.375 mB/s Cycles per byte = 21.25 +SHA3-512 HW 121 mB took 1.026 seconds, 117.585 mB/s Cycles per byte = 21.21 +SHA3-512 HW 121 mB took 1.028 seconds, 117.335 mB/s Cycles per byte = 21.26 +HMAC-MD5 SW 608 mB took 1.000 seconds, 608.096 mB/s Cycles per byte = 4.10 +HMAC-MD5 SW 613 mB took 1.004 seconds, 611.102 mB/s Cycles per byte = 4.08 +HMAC-MD5 HW 414 mB took 1.001 seconds, 413.762 mB/s Cycles per byte = 6.03 +HMAC-MD5 HW 414 mB took 1.004 seconds, 412.554 mB/s Cycles per byte = 6.05 +HMAC-SHA SW 383 mB took 1.011 seconds, 378.446 mB/s Cycles per byte = 6.59 +HMAC-SHA SW 383 mB took 1.013 seconds, 377.729 mB/s Cycles per byte = 6.60 +HMAC-SHA HW 535 mB took 1.008 seconds, 530.760 mB/s Cycles per byte = 4.70 +HMAC-SHA HW 514 mB took 1.009 seconds, 509.292 mB/s Cycles per byte = 4.90 +HMAC-SHA224 SW 267 mB took 1.008 seconds, 265.316 mB/s Cycles per byte = 9.40 +HMAC-SHA224 SW 351 mB took 1.012 seconds, 346.982 mB/s Cycles per byte = 7.19 +HMAC-SHA224 HW 404 mB took 1.003 seconds, 402.579 mB/s Cycles per byte = 6.20 +HMAC-SHA224 HW 393 mB took 1.011 seconds, 388.951 mB/s Cycles per byte = 6.41 +HMAC-SHA256 SW 294 mB took 1.007 seconds, 291.426 mB/s Cycles per byte = 8.56 +HMAC-SHA256 SW 351 mB took 1.012 seconds, 347.205 mB/s Cycles per byte = 7.18 +HMAC-SHA256 HW 419 mB took 1.004 seconds, 417.677 mB/s Cycles per byte = 5.97 +HMAC-SHA256 HW 419 mB took 1.009 seconds, 415.514 mB/s Cycles per byte = 6.00 +HMAC-SHA384 SW 530 mB took 1.002 seconds, 528.479 mB/s Cycles per byte = 4.72 +HMAC-SHA384 SW 530 mB took 1.007 seconds, 526.093 mB/s Cycles per byte = 4.74 +HMAC-SHA384 HW 367 mB took 1.004 seconds, 365.498 mB/s Cycles per byte = 6.82 +HMAC-SHA384 HW 367 mB took 1.006 seconds, 364.878 mB/s Cycles per byte = 6.84 +HMAC-SHA512 SW 530 mB took 1.002 seconds, 528.616 mB/s Cycles per byte = 4.72 +HMAC-SHA512 SW 530 mB took 1.006 seconds, 526.513 mB/s Cycles per byte = 4.74 +HMAC-SHA512 HW 367 mB took 1.003 seconds, 365.816 mB/s Cycles per byte = 6.82 +HMAC-SHA512 HW 367 mB took 1.007 seconds, 364.560 mB/s Cycles per byte = 6.84 +RSA 1024 key gen SW 40 ops took 1.191 sec, avg 29.780 ms, 33.580 ops/sec +RSA 1024 key gen SW 40 ops took 1.428 sec, avg 35.694 ms, 28.016 ops/sec +RSA 2048 key gen SW 40 ops took 4.154 sec, avg 103.853 ms, 9.629 ops/sec +RSA 2048 key gen SW 40 ops took 5.687 sec, avg 142.172 ms, 7.034 ops/sec +RSA 1024 key gen HW 120 ops took 1.064 sec, avg 8.866 ms, 112.790 ops/sec +RSA 1024 key gen HW 120 ops took 1.072 sec, avg 8.932 ms, 111.953 ops/sec +RSA 2048 key gen HW 40 ops took 1.389 sec, avg 34.717 ms, 28.804 ops/sec +RSA 2048 key gen HW 40 ops took 1.437 sec, avg 35.935 ms, 27.828 ops/sec +RSA 2048 sign SW 1000 ops took 1.046 sec, avg 1.046 ms, 956.197 ops/sec +RSA 2048 sign SW 1000 ops took 1.052 sec, avg 1.052 ms, 950.320 ops/sec +RSA 2048 verify SW 32300 ops took 1.001 sec, avg 0.031 ms, 32271.670 ops/sec +RSA 2048 verify SW 32200 ops took 1.003 sec, avg 0.031 ms, 32117.110 ops/sec +RSA 2048 sign HW 12300 ops took 1.001 sec, avg 0.081 ms, 12288.056 ops/sec +RSA 2048 sign HW 19600 ops took 1.003 sec, avg 0.051 ms, 19537.967 ops/sec +RSA 2048 verify HW 116000 ops took 1.000 sec, avg 0.009 ms, 115971.935 ops/sec +RSA 2048 verify HW 118000 ops took 1.000 sec, avg 0.008 ms, 117962.707 ops/sec +DH 2048 key gen SW 2080 ops took 1.000 sec, avg 0.481 ms, 2079.830 ops/sec +DH 2048 key gen SW 2120 ops took 1.016 sec, avg 0.479 ms, 2086.548 ops/sec +DH 2048 agree SW 2100 ops took 1.023 sec, avg 0.487 ms, 2053.478 ops/sec +DH 2048 agree SW 2100 ops took 1.026 sec, avg 0.489 ms, 2046.644 ops/sec +DH 2048 key gen HW 43720 ops took 1.000 sec, avg 0.023 ms, 43712.257 ops/sec +DH 2048 key gen HW 43320 ops took 1.000 sec, avg 0.023 ms, 43299.560 ops/sec +DH 2048 agree HW 32500 ops took 1.001 sec, avg 0.031 ms, 32471.874 ops/sec +DH 2048 agree HW 39400 ops took 1.001 sec, avg 0.025 ms, 39351.757 ops/sec +ECC 256 key gen SW 41320 ops took 1.001 sec, avg 0.024 ms, 41298.692 ops/sec +ECC 256 key gen SW 41280 ops took 1.001 sec, avg 0.024 ms, 41258.674 ops/sec +ECC 256 key gen HW 41320 ops took 1.000 sec, avg 0.024 ms, 41309.127 ops/sec +ECC 256 key gen HW 41280 ops took 1.001 sec, avg 0.024 ms, 41244.118 ops/sec +ECDHE 256 agree SW 13400 ops took 1.005 sec, avg 0.075 ms, 13328.731 ops/sec +ECDHE 256 agree SW 13300 ops took 1.006 sec, avg 0.076 ms, 13221.465 ops/sec +ECDSA 256 sign SW 29900 ops took 1.002 sec, avg 0.034 ms, 29841.744 ops/sec +ECDSA 256 sign SW 30000 ops took 1.003 sec, avg 0.033 ms, 29910.091 ops/sec +ECDSA 256 verify SW 10700 ops took 1.006 sec, avg 0.094 ms, 10641.471 ops/sec +ECDSA 256 verify SW 10700 ops took 1.009 sec, avg 0.094 ms, 10604.105 ops/sec +ECDHE 256 agree HW 26600 ops took 1.000 sec, avg 0.038 ms, 26594.522 ops/sec +ECDHE 256 agree HW 19000 ops took 1.002 sec, avg 0.053 ms, 18964.479 ops/sec +ECDSA 256 sign HW 22300 ops took 1.001 sec, avg 0.045 ms, 22286.137 ops/sec +ECDSA 256 sign HW 22000 ops took 1.002 sec, avg 0.046 ms, 21963.146 ops/sec +ECDSA 256 verify HW 12600 ops took 1.002 sec, avg 0.080 ms, 12569.531 ops/sec +ECDSA 256 verify HW 12600 ops took 1.005 sec, avg 0.080 ms, 12542.829 ops/sec +Benchmark complete +RNG SW 151.844 mB/s +AES-128-CBC-enc SW 1448.090 mB/s +AES-128-CBC-dec SW 10373.612 mB/s +AES-192-CBC-enc SW 1212.030 mB/s +AES-192-CBC-dec SW 8651.141 mB/s +AES-256-CBC-enc SW 1042.655 mB/s +AES-256-CBC-dec SW 7408.791 mB/s +AES-128-CBC-enc HW 4885.588 mB/s +AES-128-CBC-dec HW 4757.373 mB/s +AES-192-CBC-enc HW 4718.991 mB/s +AES-192-CBC-dec HW 4823.705 mB/s +AES-256-CBC-enc HW 4436.875 mB/s +AES-256-CBC-dec HW 4216.860 mB/s +AES-128-GCM-enc SW 4507.831 mB/s +AES-128-GCM-dec SW 4823.166 mB/s +AES-192-GCM-enc SW 4597.849 mB/s +AES-192-GCM-dec SW 3792.119 mB/s +AES-256-GCM-enc SW 4172.610 mB/s +AES-256-GCM-dec SW 4209.278 mB/s +AES-128-GCM-enc HW 4850.244 mB/s +AES-128-GCM-dec HW 3720.058 mB/s +AES-192-GCM-enc HW 4763.636 mB/s +AES-192-GCM-dec HW 3932.937 mB/s +AES-256-GCM-enc HW 4499.022 mB/s +AES-256-GCM-dec HW 3488.068 mB/s +CHACHA SW 2821.053 mB/s +CHA-POLY SW 1891.585 mB/s +MD5 SW 1211.796 mB/s +MD5 HW 815.933 mB/s +POLY1305 SW 5234.533 mB/s +SHA SW 754.934 mB/s +SHA HW 1063.586 mB/s +SHA-224 SW 694.001 mB/s +SHA-224 HW 823.821 mB/s +SHA-256 SW 657.716 mB/s +SHA-256 HW 835.115 mB/s +SHA-384 SW 1057.178 mB/s +SHA-384 HW 719.655 mB/s +SHA-512 SW 970.129 mB/s +SHA-512 HW 730.657 mB/s +SHA3-224 SW 464.579 mB/s +SHA3-224 HW 451.398 mB/s +SHA3-256 SW 382.741 mB/s +SHA3-256 HW 1376.382 mB/s +SHA3-384 SW 338.092 mB/s +SHA3-384 HW 338.150 mB/s +SHA3-512 SW 234.923 mB/s +SHA3-512 HW 234.921 mB/s +HMAC-MD5 SW 1219.198 mB/s +HMAC-MD5 HW 826.316 mB/s +HMAC-SHA SW 756.175 mB/s +HMAC-SHA HW 1040.052 mB/s +HMAC-SHA224 SW 612.297 mB/s +HMAC-SHA224 HW 791.530 mB/s +HMAC-SHA256 SW 638.631 mB/s +HMAC-SHA256 HW 833.191 mB/s +HMAC-SHA384 SW 1054.571 mB/s +HMAC-SHA384 HW 730.376 mB/s +HMAC-SHA512 SW 1055.130 mB/s +HMAC-SHA512 HW 730.377 mB/s +RSA 1024 key gen SW 61.596 ops/sec +RSA 2048 key gen SW 16.663 ops/sec +RSA 1024 key gen HW 224.743 ops/sec +RSA 2048 key gen HW 56.632 ops/sec +RSA 2048 sign SW 1906.517 ops/sec +RSA 2048 verify SW 64388.780 ops/sec +RSA 2048 sign HW 31826.022 ops/sec +RSA 2048 verify HW 233934.642 ops/sec +DH 2048 key gen SW 4166.378 ops/sec +DH 2048 agree SW 4100.122 ops/sec +DH 2048 key gen HW 87011.816 ops/sec +DH 2048 agree HW 71823.630 ops/sec +ECC 256 key gen SW 82557.366 ops/sec +ECC 256 key gen HW 82553.245 ops/sec +ECDHE 256 agree SW 26550.196 ops/sec +ECDSA 256 sign SW 59751.835 ops/sec +ECDSA 256 verify SW 21245.576 ops/sec +ECDHE 256 agree HW 45559.001 ops/sec +ECDSA 256 sign HW 44249.283 ops/sec +ECDSA 256 verify HW 25112.360 ops/sec +IntelQA: Stop +``` + +### wolfCrypt Benchmark with QAT (single-threaded) + +To use the benchmark tool against hardware in single threaded mode build the library with `CFLAGS="-DWC_NO_ASYNC_THREADING"`. + +``` +sudo ./wolfcrypt/benchmark/benchmark -rsa_sign -dh -ecc +IntelQA: Instances 2 +wolfCrypt Benchmark (block bytes 1048576, min 1.0 sec each) +RSA 2048 public HW 161000 ops took 1.000 sec, avg 0.006 ms, 160989.829 ops/sec +RSA 2048 private HW 18600 ops took 1.002 sec, avg 0.054 ms, 18566.416 ops/sec +DH 2048 key gen HW 48945 ops took 1.000 sec, avg 0.020 ms, 48931.782 ops/sec +DH 2048 agree HW 43300 ops took 1.001 sec, avg 0.023 ms, 43248.876 ops/sec +ECDHE 256 agree HW 26400 ops took 1.001 sec, avg 0.038 ms, 26382.639 ops/sec +ECDSA 256 sign HW 23900 ops took 1.004 sec, avg 0.042 ms, 23810.849 ops/sec +ECDSA 256 verify HW 13800 ops took 1.000 sec, avg 0.072 ms, 13799.878 ops/sec +IntelQA: Stop +``` + +### wolfSSL Asynchronous Test Mode + +Enable asynccrypt alone to use async simulator. +`./configure --enable-asynccrypt` + + +## Debugging + +To enable debug messages: +`./configure --enable-asynccrypt --with-intelqa=../QAT --enable-debug --disable-shared CFLAGS="-DQAT_DEBUG" && make` + + +## Support + +For questions or issues email us at support@wolfssl.com. diff --git a/wolfcrypt/src/port/intel/quickassist.c b/wolfcrypt/src/port/intel/quickassist.c new file mode 100644 index 000000000..751e98563 --- /dev/null +++ b/wolfcrypt/src/port/intel/quickassist.c @@ -0,0 +1,5092 @@ +/* quickassist.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_INTEL_QA + +#ifdef QAT_DEMO_MAIN + #define QAT_USE_POLLING_THREAD +#endif + + +#include +#include +#include +#include +#ifndef NO_RSA + #include +#endif +#ifndef NO_AES + #include +#endif +#ifndef NO_HMAC + #include +#endif +#ifndef NO_DH + #include +#endif + +#include + +#include "icp_sal_user.h" +#include "icp_sal_poll.h" +#ifndef QAT_V2 +#include "icp_sal_drbg_impl.h" +#endif + +#ifdef QAT_HASH_ENABLE_PARTIAL +#ifdef USE_LAC_SESSION_FOR_STRUCT_OFFSET + #include "lac_session.h" +#endif +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +#include + +/* Async enables (1=non-block, 0=block) */ +#ifndef QAT_RSA_ASYNC +#define QAT_RSA_ASYNC 1 +#endif +#ifndef QAT_EXPTMOD_ASYNC +#define QAT_EXPTMOD_ASYNC 1 +#endif +#ifndef QAT_CIPHER_ASYNC +#define QAT_CIPHER_ASYNC 1 +#endif +#ifndef QAT_ECDSA_ASYNC +#define QAT_ECDSA_ASYNC 1 +#endif +#ifndef QAT_ECDHE_ASYNC +#define QAT_ECDHE_ASYNC 1 +#endif +#ifndef QAT_ECMUL_ASYNC +#define QAT_ECMUL_ASYNC 1 +#endif +#ifndef QAT_DH_ASYNC +#define QAT_DH_ASYNC 1 +#endif + +/* Hash and Drbg do not support async in wolfSSL/wolfCrypt */ +#ifndef QAT_HASH_ASYNC +#define QAT_HASH_ASYNC 0 +#endif +#ifndef QAT_DRBG_ASYNC +#define QAT_DRBG_ASYNC 0 +#endif + +#define OS_HOST_TO_NW_32(uData) ByteReverseWord32(uData) + +static CpaInstanceHandle* g_cyInstances = NULL; +static CpaInstanceInfo2* g_cyInstanceInfo = NULL; +static Cpa32U* g_cyInstMap = NULL; +static Cpa16U g_numInstances = 0; +static Cpa16U g_instCounter = 0; +static CpaBoolean g_cyServiceStarted = CPA_FALSE; +#ifdef QAT_USE_POLLING_CHECK + static CpaBoolean* g_cyPolling = NULL; + static pthread_mutex_t* g_PollLock; +#endif +static volatile int g_initCount = 0; +#if defined(HAVE_ECC) && defined(HAVE_ECC_DHE) + static Cpa8U* g_qatEcdhY = NULL; + static Cpa8U* g_qatEcdhCofactor1 = NULL; +#endif +static pthread_mutex_t g_Hwlock = PTHREAD_MUTEX_INITIALIZER; + +typedef struct qatCapabilities { + /* capabilities */ + word32 supPartial:1; +#ifdef QAT_V2 + word32 supSha3:1; +#endif +} qatCapabilities_t; +static qatCapabilities_t g_qatCapabilities = { + 0 + #ifdef QAT_V2 + , 0 + #endif +}; + + +#if defined(QAT_ENABLE_CRYPTO) || defined(QAT_ENABLE_HASH) + static int IntelQaSymClose(WC_ASYNC_DEV* dev, int doFree); +#endif +#if defined(QAT_ENABLE_RNG) +static int IntelQaDrbgClose(WC_ASYNC_DEV* dev); +#endif + +extern Cpa32U osalLogLevelSet(Cpa32U level); + + +/* -------------------------------------------------------------------------- */ +/* Polling */ +/* -------------------------------------------------------------------------- */ + +#ifdef QAT_USE_POLLING_THREAD +static void* IntelQaPollingThread(void* context) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)context; +#ifdef QAT_DEBUG + printf("Polling Thread Start\n"); +#endif + while (dev->qat.pollingCy) { + icp_sal_CyPollInstance(dev->qat.handle, QAT_POLL_RESP_QUOTA); + wc_AsyncSleep(10); + } +#ifdef QAT_DEBUG + printf("Polling Thread Exit\n"); +#endif + pthread_exit(NULL); +} + +static CpaStatus IntelQaStartPollingThread(WC_ASYNC_DEV* dev) +{ + if (dev->qat.pollingCy == 0) { + dev->qat.pollingCy = 1; + #ifdef QAT_DEBUG + printf("Polling Thread Created\n"); + #endif + if (pthread_create(&dev->qat.pollingThread, NULL, IntelQaPollingThread, + (void*)dev) != 0) { + printf("Failed create polling thread!\n"); + return CPA_STATUS_FAIL; + } + } + return CPA_STATUS_SUCCESS; +} + +static void IntelQaStopPollingThread(WC_ASYNC_DEV* dev) +{ + dev->qat.pollingCy = 0; + pthread_join(dev->qat.pollingThread, 0); +} +#endif /* QAT_USE_POLLING_THREAD */ + + + +/* -------------------------------------------------------------------------- */ +/* Buffer Helpers */ +/* -------------------------------------------------------------------------- */ +#if defined(HAVE_ECC) || !defined(NO_DH) || !defined(NO_RSA) +static WC_INLINE int IntelQaAllocFlatBuffer(CpaFlatBuffer* buf, int size, + void* heap) +{ + if (buf == NULL || size <= 0) + return BAD_FUNC_ARG; + buf->pData = (byte*)XMALLOC(size, heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (buf->pData == NULL) + return MEMORY_E; + buf->dataLenInBytes = size; + return 0; +} +#if !defined(NO_DH) || defined(WOLFSSL_KEY_GEN) +static WC_INLINE void IntelQaFreeFlatBuffer(CpaFlatBuffer* buf, void* heap) +{ + if (buf && buf->pData) { + XFREE(buf->pData, heap, DYNAMIC_TYPE_ASYNC_NUMA); + buf->pData = NULL; + buf->dataLenInBytes = 0; + } +} +#endif +static WC_INLINE int IntelQaBigIntToFlatBuffer(WC_BIGINT* src, + CpaFlatBuffer* dst) +{ + if (src == NULL || src->buf == NULL || dst == NULL) { + return BAD_FUNC_ARG; + } + + dst->pData = src->buf; + dst->dataLenInBytes = src->len; + + return 0; +} + +static WC_INLINE int IntelQaFlatBufferToBigInt(CpaFlatBuffer* src, + WC_BIGINT* dst) +{ + if (src == NULL || src->pData == NULL || dst == NULL) { + return BAD_FUNC_ARG; + } + + dst->buf = src->pData; + dst->len = src->dataLenInBytes; + + return 0; +} +#endif + + +/* -------------------------------------------------------------------------- */ +/* Device */ +/* -------------------------------------------------------------------------- */ +void IntelQaHardwareStop(void) +{ + int i; + CpaStatus status; + + g_initCount--; /* track de-init count */ + if (g_initCount != 0) { + return; + } + +#if defined(HAVE_ECC) && defined(HAVE_ECC_DHE) + if (g_qatEcdhY) { + XFREE(g_qatEcdhY, NULL, DYNAMIC_TYPE_ASYNC_NUMA); + g_qatEcdhY = NULL; + } + if (g_qatEcdhCofactor1) { + XFREE(g_qatEcdhCofactor1, NULL, DYNAMIC_TYPE_ASYNC_NUMA); + g_qatEcdhCofactor1 = NULL; + } +#endif + + if (g_cyServiceStarted == CPA_TRUE) { + g_cyServiceStarted = CPA_FALSE; + for (i=0; i 1) { + return 0; + } + + status = qaeMemInit(); + if (status != CPA_STATUS_SUCCESS) { + printf("IntelQA: Could not start qae mem for user space (status %d)\n", + status); + printf("\tHas the qaeMemDrv.ko module been loaded?\n"); + return ASYNC_INIT_E; + } + + status = icp_sal_userStartMultiProcess(process_name, + limitDevAccess ? CPA_TRUE : CPA_FALSE); + if (status != CPA_STATUS_SUCCESS) { + printf("IntelQA: Could not start sal for user space! status %d\n", + status); + ret = ASYNC_INIT_E; goto error; + } + +#ifdef QAT_DEBUG + /* optionally enable debugging */ + /* osalLogLevelSet(8); */ +#endif + + status = cpaCyGetNumInstances(&g_numInstances); + if (status != CPA_STATUS_SUCCESS || g_numInstances == 0) { + printf("IntelQA: Failed to get num of instances! status %d\n", + status); + ret = INVALID_DEVID; goto error; + } + + /* Get handles / info */ + g_cyInstances = (CpaInstanceHandle*)XMALLOC( + sizeof(CpaInstanceHandle) * g_numInstances, NULL, DYNAMIC_TYPE_ASYNC); + if (g_cyInstances == NULL) { + printf("IntelQA: Failed to allocate instances\n"); + ret = INVALID_DEVID; goto error; + } + +#ifdef QAT_USE_POLLING_CHECK + g_cyPolling = (CpaBoolean*)XMALLOC(sizeof(CpaBoolean) * g_numInstances, + NULL, DYNAMIC_TYPE_ASYNC); + if (g_cyPolling == NULL) { + printf("IntelQA: Failed to allocate polling status\n"); + ret = INVALID_DEVID; goto error; + } + g_PollLock = (pthread_mutex_t*)XMALLOC(sizeof(pthread_mutex_t) * + g_numInstances, NULL, DYNAMIC_TYPE_ASYNC); + if (g_PollLock == NULL) { + printf("IntelQA: Failed to allocate polling locks\n"); + ret = INVALID_DEVID; goto error; + } + for (i=0; i> 8), + (Cpa8U)((g_cyInstanceInfo[i].physInstId.busAddress) + & 0xFF) >> 3, + (Cpa8U)((g_cyInstanceInfo[i].physInstId.busAddress) & 3), + g_cyInstanceInfo[i].isPolled); + #endif + + status = cpaCySetAddressTranslation(g_cyInstances[i], + qaeVirtToPhysNUMA); + if (status != CPA_STATUS_SUCCESS) { + printf("IntelQA: Error setting memory config for inst %d\n", i); + ret = INVALID_DEVID; goto error; + } + + status = cpaCyStartInstance(g_cyInstances[i]); + if (status != CPA_STATUS_SUCCESS) { + printf("IntelQA: Error starting crypto instance %d\n", i); + ret = INVALID_DEVID; goto error; + } + } + +#if defined(HAVE_ECC) && defined(HAVE_ECC_DHE) + g_qatEcdhY = XMALLOC(MAX_ECC_BYTES, NULL, DYNAMIC_TYPE_ASYNC_NUMA); + if (g_qatEcdhY == NULL) { + ret = MEMORY_E; goto error; + } + g_qatEcdhCofactor1 = XMALLOC(MAX_ECC_BYTES, NULL, DYNAMIC_TYPE_ASYNC_NUMA); + if (g_qatEcdhCofactor1 == NULL) { + ret = MEMORY_E; goto error; + } + *((word32*)g_qatEcdhCofactor1) = OS_HOST_TO_NW_32(1); +#endif + + printf("IntelQA: Instances %d\n", g_numInstances); + return ret; + +error: + IntelQaHardwareStop(); + return ret; +} + + +int IntelQaInit(void* threadId) +{ + int ret; + int devId; +#if !defined(WC_NO_ASYNC_THREADING) && defined(WC_ASYNC_THREAD_BIND) + pthread_t* thread = (pthread_t*)threadId; +#else + (void)threadId; +#endif + + ret = pthread_mutex_lock(&g_Hwlock); + if (ret != 0) { + printf("IntelQaInit: mutex lock failed! %d\n", ret); + return BAD_MUTEX_E; + } + + ret = IntelQaHardwareStart(QAT_PROCESS_NAME, QAT_LIMIT_DEV_ACCESS); + if (ret != 0) { + pthread_mutex_unlock(&g_Hwlock); + return ret; + } + + if (g_numInstances <= 0) { + pthread_mutex_unlock(&g_Hwlock); + return ASYNC_INIT_E; + } + + /* assign device id */ + devId = (g_instCounter % g_numInstances); + g_instCounter++; + + pthread_mutex_unlock(&g_Hwlock); + +#if !defined(WC_NO_ASYNC_THREADING) && defined(WC_ASYNC_THREAD_BIND) + /* if no thread provided then just return instance and don't bind */ + if (thread) { + ret = wc_AsyncThreadBind(thread, g_cyInstMap[devId]); + if (ret != 0) { + printf("IntelQA: Thread bind failed! %d\n", ret); + } + } +#endif /* !WC_NO_ASYNC_THREADING && !WC_NO_ASYNC_THREAD_BIND */ + + return devId; +} + +int IntelQaNumInstances(void) +{ + return g_numInstances; +} + +int IntelQaOpen(WC_ASYNC_DEV* dev, int devId) +{ + if (dev == NULL) { + return BAD_FUNC_ARG; + } + + (void)devId; + + /* clear device info */ + XMEMSET(&dev->qat, 0, sizeof(IntelQaDev)); + + if (g_cyInstances == NULL) { + printf("IntelQA not initialized\n"); + return ASYNC_INIT_E; + } + + if (devId >= g_numInstances) { + fprintf(stderr, "IntelQA: devId %d exceeds number of instances %u\n", + devId, g_numInstances); + return NO_VALID_DEVID; + } + + dev->qat.devId = devId; + dev->qat.handle = g_cyInstances[devId]; + +#ifdef QAT_DEBUG + printf("IntelQaOpen %p\n", dev); +#endif + +#ifdef QAT_USE_POLLING_THREAD + /* start polling thread */ + IntelQaStartPollingThread(dev); +#endif + + return 0; +} + +#if defined(QAT_ENABLE_CRYPTO) || defined(QAT_ENABLE_HASH) +static int IntelQaDevIsHash(WC_ASYNC_DEV* dev) +{ + int isHash = 0; + + switch (dev->marker) { + case WOLFSSL_ASYNC_MARKER_ARC4: + case WOLFSSL_ASYNC_MARKER_AES: + case WOLFSSL_ASYNC_MARKER_3DES: + case WOLFSSL_ASYNC_MARKER_RNG: + case WOLFSSL_ASYNC_MARKER_RSA: + case WOLFSSL_ASYNC_MARKER_ECC: + case WOLFSSL_ASYNC_MARKER_DH: + isHash = 0; + break; + case WOLFSSL_ASYNC_MARKER_HMAC: + case WOLFSSL_ASYNC_MARKER_SHA512: + case WOLFSSL_ASYNC_MARKER_SHA384: + case WOLFSSL_ASYNC_MARKER_SHA256: + case WOLFSSL_ASYNC_MARKER_SHA224: + case WOLFSSL_ASYNC_MARKER_SHA: + case WOLFSSL_ASYNC_MARKER_MD5: + case WOLFSSL_ASYNC_MARKER_SHA3: + isHash = 1; + break; + } + + return isHash; +} + +static IntelQaSymCtx* IntelQaGetSymCtx(WC_ASYNC_DEV* dev) +{ +#if defined(QAT_ENABLE_CRYPTO) && defined(QAT_ENABLE_HASH) + return IntelQaDevIsHash(dev) ? &dev->qat.op.hash.ctx : + &dev->qat.op.cipher.ctx; +#elif defined(QAT_ENABLE_CRYPTO) + return IntelQaDevIsHash(dev) ? NULL : &dev->qat.op.cipher.ctx; +#elif defined(QAT_ENABLE_HASH) + return IntelQaDevIsHash(dev) ? &dev->qat.op.hash.ctx : NULL; +#else + return NULL; +#endif +} + +static int IntelQaDevIsSym(WC_ASYNC_DEV* dev) +{ + int isSym = 0; + + switch (dev->marker) { + case WOLFSSL_ASYNC_MARKER_RNG: + case WOLFSSL_ASYNC_MARKER_RSA: + case WOLFSSL_ASYNC_MARKER_ECC: + case WOLFSSL_ASYNC_MARKER_DH: + isSym = 0; + break; + case WOLFSSL_ASYNC_MARKER_ARC4: + case WOLFSSL_ASYNC_MARKER_AES: + case WOLFSSL_ASYNC_MARKER_3DES: + case WOLFSSL_ASYNC_MARKER_HMAC: + case WOLFSSL_ASYNC_MARKER_SHA512: + case WOLFSSL_ASYNC_MARKER_SHA384: + case WOLFSSL_ASYNC_MARKER_SHA256: + case WOLFSSL_ASYNC_MARKER_SHA224: + case WOLFSSL_ASYNC_MARKER_SHA: + case WOLFSSL_ASYNC_MARKER_MD5: + case WOLFSSL_ASYNC_MARKER_SHA3: + isSym = 1; + break; + } + + return isSym; +} +#endif + +void IntelQaClose(WC_ASYNC_DEV* dev) +{ + if (dev) { + #ifdef QAT_DEBUG + printf("IntelQaClose %p\n", dev); + #endif + + #if defined(QAT_ENABLE_CRYPTO) || defined(QAT_ENABLE_HASH) + if (IntelQaDevIsSym(dev)) { + /* close any active session */ + IntelQaSymClose(dev, 1); + } + #endif + #if defined(QAT_ENABLE_RNG) + if (dev->marker == WOLFSSL_ASYNC_MARKER_RNG) { + IntelQaDrbgClose(dev); + } + #endif + + #ifdef QAT_USE_POLLING_THREAD + IntelQaStopPollingThread(dev); + #endif + + dev->qat.handle = NULL; + } +} + +void IntelQaDeInit(int devId) +{ + (void)devId; + + if (pthread_mutex_lock(&g_Hwlock) == 0) { + IntelQaHardwareStop(); + pthread_mutex_unlock(&g_Hwlock); + } +} + +int IntelQaDevCopy(WC_ASYNC_DEV* src, WC_ASYNC_DEV* dst) +{ + int ret = 0; +#if defined(QAT_ENABLE_HASH) || defined(QAT_ENABLE_CRYPTO) + IntelQaSymCtx *ctxSrc, *ctxDst; +#ifdef QAT_ENABLE_HASH + int isHash; +#endif +#endif + + if (src == NULL || dst == NULL) + return BAD_FUNC_ARG; + +#if defined(QAT_ENABLE_HASH) || defined(QAT_ENABLE_CRYPTO) + ctxDst = IntelQaGetSymCtx(dst); + ctxSrc = IntelQaGetSymCtx(src); + + if (ctxDst == NULL || ctxSrc == NULL) { + return ret; + } + +#ifdef QAT_DEBUG + printf("IntelQaDevCopy: dev %p->%p, symCtx %p (src %p), symCtxSize %d\n", + src, dst, ctxSrc->symCtx, ctxSrc->symCtxSrc, ctxSrc->symCtxSize); +#endif + + ctxDst->isCopy = 1; + /* force alloc/init on open for copy */ + ctxDst->symCtx = NULL; + ctxDst->isOpen = 0; + /* if src is not open, then don't set source ctx */ + if (!ctxSrc->isOpen) + ctxDst->symCtxSrc = NULL; + +#ifdef QAT_ENABLE_HASH + isHash = IntelQaDevIsHash(src); + if (isHash) { + /* need to duplicate tmpIn */ + if (src->qat.op.hash.tmpIn) { + dst->qat.op.hash.tmpIn = XMALLOC(src->qat.op.hash.tmpInBufSz, + src->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (dst->qat.op.hash.tmpIn == NULL) { + return MEMORY_E; + } + XMEMCPY(dst->qat.op.hash.tmpIn, src->qat.op.hash.tmpIn, + src->qat.op.hash.tmpInSz); + dst->qat.op.hash.tmpInSz = src->qat.op.hash.tmpInSz; + dst->qat.op.hash.tmpInBufSz = src->qat.op.hash.tmpInBufSz; + } + } +#endif /* QAT_ENABLE_HASH */ +#endif /* QAT_ENABLE_HASH || QAT_ENABLE_CRYPTO */ + + return ret; +} + +int IntelQaPoll(WC_ASYNC_DEV* dev) +{ + int ret = 0; + +#ifndef QAT_USE_POLLING_THREAD + CpaStatus status; + WOLF_EVENT* event = &dev->event; + +#ifdef QAT_USE_POLLING_CHECK + pthread_mutex_t* lock = &g_PollLock[dev->qat.devId]; + if (pthread_mutex_lock(lock) == 0) { + /* test if any other threads are polling */ + if (g_cyPolling[dev->qat.devId]) { + pthread_mutex_unlock(lock); + + /* return success even though its busy, caller will treat as + * WC_PENDING_E */ + return 0; + } + + g_cyPolling[dev->qat.devId] = 1; + pthread_mutex_unlock(lock); + } +#endif + + status = icp_sal_CyPollInstance(dev->qat.handle, QAT_POLL_RESP_QUOTA); + if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_RETRY) { + printf("IntelQa: Poll failure %d\n", status); + ret = -1; + } + +#ifndef WC_NO_ASYNC_THREADING + if (event->threadId == 0 || event->threadId == wc_AsyncThreadId()) +#endif + { + /* if event is done */ + if (dev->qat.ret != WC_PENDING_E) { + /* perform cleanup */ + IntelQaFreeFunc freeFunc = dev->qat.freeFunc; + #ifdef QAT_DEBUG + printf("IntelQaOpFree: Dev %p, FreeFunc %p\n", dev, freeFunc); + #endif + if (freeFunc) { + dev->qat.freeFunc = NULL; + freeFunc(dev); + } + + /* return response code */ + event->ret = dev->qat.ret; + } + } + +#ifdef QAT_USE_POLLING_CHECK + /* indicate we are done polling */ + if (pthread_mutex_lock(lock) == 0) { + g_cyPolling[dev->qat.devId] = 0; + pthread_mutex_unlock(lock); + } +#endif + +#else + (void)dev; +#endif + + return ret; +} + +static int IntelQaPollBlockRet(WC_ASYNC_DEV* dev, int ret_wait) +{ + int ret; + + do { + ret = IntelQaPoll(dev); + (void)ret; /* not used */ + + if (dev->qat.ret != ret_wait) { + break; + } + #ifndef WC_NO_ASYNC_THREADING + wc_AsyncThreadYield(); + #endif + } while (1); + ret = dev->qat.ret; + + return ret; +} + +int IntelQaGetCyInstanceCount(void) +{ + return g_numInstances; +} + +static WC_INLINE int IntelQaHandleCpaStatus(WC_ASYNC_DEV* dev, CpaStatus status, + int* ret, byte isAsync, void* callback, int* retryCount) +{ + int retry = 0; + + if (status == CPA_STATUS_SUCCESS) { + if (isAsync && callback) { + *ret = WC_PENDING_E; + } + else { + *ret = IntelQaPollBlockRet(dev, WC_PENDING_E); + } + } + else if (status == CPA_STATUS_RETRY) { + (*retryCount)++; + if ((*retryCount % (QAT_RETRY_LIMIT + 1)) == QAT_RETRY_LIMIT) { + #ifndef WC_NO_ASYNC_THREADING + wc_AsyncThreadYield(); + #else + wc_AsyncSleep(10); + #endif + } + retry = 1; + } + else { + *ret = ASYNC_OP_E; + } + + return retry; +} + +static WC_INLINE void IntelQaOpInit(WC_ASYNC_DEV* dev, IntelQaFreeFunc freeFunc) +{ + dev->qat.ret = WC_PENDING_E; + dev->qat.freeFunc = freeFunc; +} + + +/* -------------------------------------------------------------------------- */ +/* RSA Algo */ +/* -------------------------------------------------------------------------- */ + +#ifndef NO_RSA + +#ifdef WOLFSSL_KEY_GEN +static void IntelQaGenPrimeFree(WC_ASYNC_DEV* dev) +{ + CpaCyPrimeTestOpData* opData = dev->qat.op.prime_gen.opData; + CpaFlatBuffer* primeCandidates = dev->qat.op.prime_gen.primeCandidates; + byte* pMillerRabinData = dev->qat.op.prime_gen.pMillerRabinData; + + if (opData) { + XFREE(opData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.opData = NULL; + } + if (primeCandidates) { + int i; + for (i = 0; i < QAT_PRIME_GEN_TRIES; i++) { + if (primeCandidates[i].pData) { + XFREE(primeCandidates[i].pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + primeCandidates[i].pData = NULL; + primeCandidates[i].dataLenInBytes = 0; + } + } + XFREE(primeCandidates, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.primeCandidates = NULL; + } + if (pMillerRabinData) { + XFREE(pMillerRabinData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.pMillerRabinData = NULL; + } +} + +static void IntelQaGenPrimeCallback(void *pCallbackTag, + CpaStatus status, void *pOpData, CpaBoolean testPassed) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyPrimeTestOpData* opData = (CpaCyPrimeTestOpData*)pOpData; + int opIndex = 0; + int testStatus = QAT_PRIME_CHK_STATUS_FAILED; + + /* calculate index based on opDate pointer offset */ + if (dev->qat.op.prime_gen.opData && opData) { + byte* srcop = (byte*)dev->qat.op.prime_gen.opData; + byte* curop = (byte*)opData; + size_t offset; + if (srcop <= curop) { + offset = (size_t)curop - (size_t)srcop; + offset /= sizeof(CpaCyPrimeTestOpData); + if (offset < QAT_PRIME_GEN_TRIES) + opIndex = (int)offset; + } + } + +#ifdef QAT_DEBUG + printf("IntelQaGenPrimeCallback: dev %p, opIndex %d, status %d, " + "testPassed %d\n", dev, opIndex, status, testPassed); +#endif + + if (status == CPA_STATUS_SUCCESS) { + testStatus = (testPassed == CPA_TRUE) ? + QAT_PRIME_CHK_STATUS_PASSED : + QAT_PRIME_CHK_STATUS_FAILED; + } + + dev->qat.op.prime_gen.testStatus[opIndex] = testStatus; +} + +#ifndef QAT_PRIME_CHECK_TIMEOUT + /* times to wait in retry for operations */ + #define QAT_PRIME_CHECK_TIMEOUT 100000 +#endif +int IntelQaGenPrime(WC_ASYNC_DEV* dev, WC_RNG* rng, byte* primeBuf, + word32 primeSz) +{ + int ret = 0, retryCount = 0, i, attempt; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyPrimeTestOpData* opData = NULL; + CpaFlatBuffer* primeCandidates = NULL; + byte* pMillerRabinData = NULL; + CpaFlatBuffer millerRabins; + CpaCyPrimeTestCbFunc callback = IntelQaGenPrimeCallback; + CpaBoolean testPassed = CPA_FALSE; + + if (dev == NULL || rng == NULL || primeBuf == NULL || primeSz < 64) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaGenPrime: dev %p, sz %d\n", dev, primeSz); +#endif + + /* generate operation data and prime candidates */ + opData = (CpaCyPrimeTestOpData*)XMALLOC( + sizeof(CpaCyPrimeTestOpData) * QAT_PRIME_GEN_TRIES, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.opData = opData; + primeCandidates = (CpaFlatBuffer*)XMALLOC( + sizeof(CpaFlatBuffer) * QAT_PRIME_GEN_TRIES, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.primeCandidates = primeCandidates; + if (opData == NULL || primeCandidates == NULL) { + ret = MEMORY_E; goto exit; + } + XMEMSET(opData, 0, sizeof(CpaCyPrimeTestOpData) * QAT_PRIME_GEN_TRIES); + XMEMSET(primeCandidates, 0, sizeof(CpaFlatBuffer) * QAT_PRIME_GEN_TRIES); + for (i = 0; i < QAT_PRIME_GEN_TRIES; i++) { + primeCandidates[i].pData = (byte*)XMALLOC(primeSz, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + if (primeCandidates[i].pData == NULL) { + ret = MEMORY_E; goto exit; + } + primeCandidates[i].dataLenInBytes = primeSz; + } + + /* generate miller rabbin data */ + pMillerRabinData = (byte*)XMALLOC(primeSz * QAT_PRIME_GEN_MR_ROUNDS, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dev->qat.op.prime_gen.pMillerRabinData = pMillerRabinData; + if (pMillerRabinData == NULL) { + ret = MEMORY_E; goto exit; + } + + ret = wc_RNG_GenerateBlock(rng, pMillerRabinData, + primeSz * QAT_PRIME_GEN_MR_ROUNDS); + if (ret != 0) + goto exit; + + /* make sure each miller rabbin number is greater than 1 */ + for (i = 0; i < QAT_PRIME_GEN_MR_ROUNDS; i++) { + word32 byteCheck = primeSz - 1; + byte* round = &pMillerRabinData[i * primeSz]; + if (round[byteCheck] <= 1) { + ret = wc_RNG_GenerateBlock(rng, &round[byteCheck], 1); + if (ret != 0) + goto exit; + if (round[byteCheck] <= 1) + round[byteCheck] += 2; + } + } + millerRabins.pData = pMillerRabinData; + millerRabins.dataLenInBytes = primeSz * QAT_PRIME_GEN_MR_ROUNDS; + + /* populate operation data */ + for (i = 0; i < QAT_PRIME_GEN_TRIES; i++) { + opData[i].primeCandidate = primeCandidates[i]; + opData[i].performGcdTest = CPA_TRUE; + opData[i].performFermatTest = CPA_TRUE; + opData[i].numMillerRabinRounds = QAT_PRIME_GEN_MR_ROUNDS; + opData[i].millerRabinRandomInput = millerRabins; + opData[i].performLucasTest = CPA_TRUE; + } + + /* store info needed for output */ + dev->qat.out = primeBuf; + dev->qat.outLen = primeSz; + IntelQaOpInit(dev, IntelQaGenPrimeFree); + + for (attempt = 0; attempt < QAT_PRIME_GEN_RETRIES; attempt++) { + int expectedDone, doneCount, primePassIndex, errorCount; + byte* primeData = primeCandidates[0].pData; + /* Generate primeCandidates */ + ret = wc_RNG_GenerateBlock(rng, primeData, primeSz); + if (ret != 0) + goto exit; + /* prime lower bound has the MSB set, set it in candidate */ + primeData[0] |= 0x80; + /* make candidate odd */ + primeData[primeSz-1] |= 0x01; + + /* create candidates that are incremented by two */ + for (i = 1; i < QAT_PRIME_GEN_TRIES; i++) { + word32 byteCheck = primeSz - 1; + primeData = primeCandidates[i].pData; + XMEMCPY(primeData, + primeCandidates[i-1].pData, + primeCandidates[i-1].dataLenInBytes); + + if (primeData[byteCheck] != 0xFF) { + primeData[byteCheck] += 2; + } + else { + /* if rollover occurred increment high order bytes */ + /* increment by 1 does not affect odd/even */ + int j; + for (j = primeSz - 2; j >= 0; j--) { + if (primeData[i] != 0xFF) { + primeData[i] += 1; + break; + } + else { + primeData[i] = 0; + } + } + } + } + + /* make sure miller rabbin must be less than prime candidate */ + for (i = 0; i < QAT_PRIME_GEN_MR_ROUNDS; i++) { + byte* mrData = pMillerRabinData + (i * primeSz); + int j; + for (j = 0; j < (int)primeSz; j++) { + /* if primeData is less then mrData, and primeData is not 0, + * then make mrData to be smaller than primeData, + * and we are done */ + if ((primeData[j] <= mrData[j]) && primeData[j] != 0) { + mrData[j] = primeData[j] - 1; + break; + } + /* if primeData is 0 then mrData needs to be zero and we check + * the next index */ + else if (primeData[j] == 0) { + mrData[j] = 0; + } + /* primeData is smaller than mrData so we are done */ + else { + break; + } + } + } + + /* setup and run prime tests */ + XMEMSET(dev->qat.op.prime_gen.testStatus, 0, + sizeof(dev->qat.op.prime_gen.testStatus)); + retryCount = 0; + expectedDone = 0; + errorCount = 0; + for (i = 0; i < QAT_PRIME_GEN_TRIES; i++) { + /* perform prime test */ + do { + status = cpaCyPrimeTest(dev->qat.handle, + callback, + dev, + &opData[i], + &testPassed); + if (status == CPA_STATUS_RETRY) { + IntelQaPoll(dev); + } + } while (status == CPA_STATUS_RETRY && + retryCount++ < QAT_PRIME_CHECK_TIMEOUT); + + /* handle error */ + if (status != CPA_STATUS_SUCCESS) { + errorCount++; + break; + } + expectedDone++; + } + + /* use blocking polling, till all have completed */ + retryCount = 0; + primePassIndex = -1; + do { + IntelQaPoll(dev); + + /* tally results */ + doneCount = 0; + for (i = 0; i < expectedDone; i++) { + byte* testStatus = &dev->qat.op.prime_gen.testStatus[i]; + if (*testStatus != QAT_PRIME_CHK_STATUS_INIT) { + doneCount++; + /* Track index of first passed operation */ + if (primePassIndex == -1 && + *testStatus == QAT_PRIME_CHK_STATUS_PASSED) + primePassIndex = i; + else if (*testStatus == QAT_PRIME_CHK_STATUS_ERROR) + errorCount++; + } + } + + /* determine if all prime tests are done */ + if (doneCount == expectedDone) { + break; + } + + #ifndef WC_NO_ASYNC_THREADING + wc_AsyncThreadYield(); + #endif + } while (retryCount++ < QAT_PRIME_CHECK_TIMEOUT); + if (retryCount == QAT_PRIME_CHECK_TIMEOUT) { + #ifdef QAT_DEBUG + printf("cpaCyPrimeTest wait timeout! dev %p\n", dev); + #endif + errorCount++; + } + + /* check if we found a prime */ + if (primePassIndex != -1 && primePassIndex < QAT_PRIME_GEN_TRIES) { + ret = 0; + XMEMCPY(primeBuf, primeCandidates[primePassIndex].pData, primeSz); + break; /* done with success */ + } + + /* handle failure */ + if (errorCount != 0) { + ret = ASYNC_OP_E; + break; /* done with failure */ + } + + #ifdef QAT_DEBUG + printf("cpaCyPrimeTest attempt %d\n", attempt); + #endif + } /* for (attempt) */ + +exit: + + if (ret != 0) { + printf("cpaCyPrimeTest failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + IntelQaGenPrimeFree(dev); + + return ret; +} + + +static void IntelQaRsaKeyGenFree(WC_ASYNC_DEV* dev) +{ + CpaCyRsaPrivateKey* privateKey = &dev->qat.op.rsa_keygen.privateKey; + + /* This one is not owned by RsaKey */ + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep1.modulusN, dev->heap); + + /* free remaining on failures only */ + /* ownership of these buffers goes to RsaKey */ + if (dev->qat.ret != 0) { + CpaCyRsaKeyGenOpData* opData = &dev->qat.op.rsa_keygen.opData; + CpaCyRsaPublicKey* publicKey = &dev->qat.op.rsa_keygen.publicKey; + + IntelQaFreeFlatBuffer(&publicKey->modulusN, dev->heap); + IntelQaFreeFlatBuffer(&publicKey->publicExponentE, dev->heap); + + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep1.privateExponentD, + dev->heap); + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep2.prime1P, dev->heap); + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep2.prime2Q, dev->heap); + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep2.exponent1Dp, + dev->heap); + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep2.exponent2Dq, + dev->heap); + IntelQaFreeFlatBuffer(&privateKey->privateKeyRep2.coefficientQInv, + dev->heap); + + (void)opData; + } +} + +static void IntelQaRsaKeyGenCallback(void *pCallbackTag, + CpaStatus status, void *pKeyGenOpData, CpaCyRsaPrivateKey *pPrivateKey, + CpaCyRsaPublicKey *pPublicKey) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyRsaKeyGenOpData* opData = (CpaCyRsaKeyGenOpData*)pKeyGenOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaRsaKeyGenCallback: dev %p, status %d\n", dev, status); +#endif + + if (status == CPA_STATUS_SUCCESS) { + RsaKey* key = dev->qat.op.rsa_keygen.rsakey; + if (key) { + /* Populate RsaKey Parameters */ + /* raw BigInt buffer ownership is transferred to RsaKey */ + /* cleanup is handled in wc_FreeRsaKey */ + + /* modulusN */ + ret = IntelQaFlatBufferToBigInt( + &pPublicKey->modulusN, &key->n.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->n, + key->n.raw.buf, key->n.raw.len); + + /* publicExponentE */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPublicKey->publicExponentE, &key->e.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->e, + key->e.raw.buf, key->e.raw.len); + + /* privateExponentD */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep1.privateExponentD, &key->d.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->d, + key->d.raw.buf, key->d.raw.len); + + /* prime1P */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep2.prime1P, &key->p.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->p, + key->p.raw.buf, key->p.raw.len); + + /* prime2Q */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep2.prime2Q, &key->q.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->q, + key->q.raw.buf, key->q.raw.len); + + /* exponent1Dp */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep2.exponent1Dp, &key->dP.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->dP, + key->dP.raw.buf, key->dP.raw.len); + + /* exponent2Dq */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep2.exponent2Dq, &key->dQ.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->dQ, + key->dQ.raw.buf, key->dQ.raw.len); + + /* coefficientQInv */ + if (ret == 0) + ret = IntelQaFlatBufferToBigInt( + &pPrivateKey->privateKeyRep2.coefficientQInv, &key->u.raw); + if (ret == 0) + ret = mp_read_unsigned_bin(&key->u, + key->u.raw.buf, key->u.raw.len); + + /* mark as private key */ + if (ret == 0) + key->type = RSA_PRIVATE; + } + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaRsaKeyGen(WC_ASYNC_DEV* dev, RsaKey* key, int keyBits, long e, + WC_RNG* rng) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaFlatBuffer prime1P; + CpaFlatBuffer prime2Q; + CpaCyRsaKeyGenOpData* opData = NULL; + CpaCyRsaPrivateKey* privateKey = NULL; + CpaCyRsaPublicKey* publicKey = NULL; + CpaCyRsaKeyGenCbFunc callback = IntelQaRsaKeyGenCallback; + int keySz = keyBits/8; + int primeSz = keySz/2; /* P & Q */ + + if (dev == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaRsaKeyGen: dev %p, keyBits %d\n", dev, keyBits); +#endif + + /* allocate and generate 2 primes (P/Q) */ + XMEMSET(&prime1P, 0, sizeof(prime1P)); + XMEMSET(&prime2Q, 0, sizeof(prime2Q)); + ret = IntelQaAllocFlatBuffer(&prime1P, primeSz, dev->heap); + if (ret == 0) + ret = IntelQaGenPrime(dev, rng, prime1P.pData, prime1P.dataLenInBytes); + if (ret == 0) + ret = IntelQaAllocFlatBuffer(&prime2Q, primeSz, dev->heap); + if (ret == 0) + ret = IntelQaGenPrime(dev, rng, prime2Q.pData, prime2Q.dataLenInBytes); + if (ret != 0) { + IntelQaFreeFlatBuffer(&prime1P, dev->heap); + IntelQaFreeFlatBuffer(&prime2Q, dev->heap); + return ret; + } + + /* setup key generation operation */ + opData = &dev->qat.op.rsa_keygen.opData; + publicKey = &dev->qat.op.rsa_keygen.publicKey; + privateKey = &dev->qat.op.rsa_keygen.privateKey; + + /* init variables */ + XMEMSET(opData, 0, sizeof(CpaCyRsaDecryptOpData)); + XMEMSET(publicKey, 0, sizeof(CpaCyRsaPublicKey)); + XMEMSET(privateKey, 0, sizeof(CpaCyRsaPrivateKey)); + + /* setup private key */ + privateKey->version = CPA_CY_RSA_VERSION_TWO_PRIME; + privateKey->privateKeyRepType = CPA_CY_RSA_PRIVATE_KEY_REP_TYPE_2; + ret = IntelQaAllocFlatBuffer(&privateKey->privateKeyRep1.modulusN, + keySz, dev->heap); + ret += IntelQaAllocFlatBuffer(&privateKey->privateKeyRep1.privateExponentD, + keySz, dev->heap); + ret += IntelQaAllocFlatBuffer(&privateKey->privateKeyRep2.exponent1Dp, + primeSz, dev->heap); + ret += IntelQaAllocFlatBuffer(&privateKey->privateKeyRep2.exponent2Dq, + primeSz, dev->heap); + ret += IntelQaAllocFlatBuffer(&privateKey->privateKeyRep2.coefficientQInv, + primeSz, dev->heap); + if (ret != 0) { + ret = MEMORY_E; goto exit; + } + + /* setup public key */ + ret = IntelQaAllocFlatBuffer(&publicKey->modulusN, keySz, dev->heap); + ret += IntelQaAllocFlatBuffer(&publicKey->publicExponentE, sizeof(long), + dev->heap); + if (ret != 0) { + ret = MEMORY_E; goto exit; + } + + /* populate exponent */ + publicKey->publicExponentE.pData[3] = (e >> 24) & 0xFF; + publicKey->publicExponentE.pData[2] = (e >> 16) & 0xFF; + publicKey->publicExponentE.pData[1] = (e >> 8) & 0xFF; + publicKey->publicExponentE.pData[0] = e & 0xFF; + publicKey->publicExponentE.dataLenInBytes = + publicKey->publicExponentE.pData[3] ? 4 : + publicKey->publicExponentE.pData[2] ? 3 : + publicKey->publicExponentE.pData[1] ? 2 : + publicKey->publicExponentE.pData[0] ? 1 : 0; + + /* populate primes P and Q */ + privateKey->privateKeyRep2.prime1P = prime1P; + privateKey->privateKeyRep2.prime2Q = prime2Q; + + /* setup operation data */ + opData->version = CPA_CY_RSA_VERSION_TWO_PRIME; + opData->privateKeyRepType = CPA_CY_RSA_PRIVATE_KEY_REP_TYPE_2; + opData->modulusLenInBytes = keySz; + opData->prime1P = privateKey->privateKeyRep2.prime1P; + opData->prime2Q = privateKey->privateKeyRep2.prime2Q; + opData->publicExponentE = publicKey->publicExponentE; + + /* parameters required for output callback */ + dev->qat.op.rsa_keygen.rsakey = key; + IntelQaOpInit(dev, IntelQaRsaKeyGenFree); + + /* perform RSA key generation */ + do { + status = cpaCyRsaGenKey(dev->qat.handle, + callback, + dev, + opData, + privateKey, + publicKey); + } while (IntelQaHandleCpaStatus(dev, status, &ret, 0, + callback, &retryCount)); + +exit: + + if (ret != 0) { + printf("cpaCyRsaGenKey failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + IntelQaRsaKeyGenFree(dev); + + return ret; +} +#endif /* WOLFSSL_KEY_GEN */ + +static void IntelQaRsaPrivateFree(WC_ASYNC_DEV* dev) +{ + CpaCyRsaDecryptOpData* opData = &dev->qat.op.rsa_priv.opData; + CpaFlatBuffer *outBuf = &dev->qat.op.rsa_priv.outBuf; + + if (opData) { + if (opData->inputData.pData) { + XFREE(opData->inputData.pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->inputData.pData = NULL; + } + if (opData->pRecipientPrivateKey) { + XMEMSET(opData->pRecipientPrivateKey, 0, + sizeof(CpaCyRsaPrivateKey)); + } + XMEMSET(opData, 0, sizeof(CpaCyRsaDecryptOpData)); + } + if (outBuf) { + if (outBuf->pData) { + XFREE(outBuf->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + outBuf->pData = NULL; + } + XMEMSET(outBuf, 0, sizeof(CpaFlatBuffer)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaRsaPrivateCallback(void *pCallbackTag, + CpaStatus status, void *pOpdata, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyRsaDecryptOpData* opData = (CpaCyRsaDecryptOpData*)pOpdata; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaRsaPrivateCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + + if (dev->qat.outLenPtr) { + if (pOut->dataLenInBytes > *dev->qat.outLenPtr) { + pOut->dataLenInBytes = *dev->qat.outLenPtr; + } + *dev->qat.outLenPtr = pOut->dataLenInBytes; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pOut->pData) { + XMEMCPY(dev->qat.out, pOut->pData, pOut->dataLenInBytes); + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaRsaPrivate(WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + WC_BIGINT* d, WC_BIGINT* n, + byte* out, word32* outLen) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyRsaPrivateKey* privateKey = NULL; + CpaCyRsaDecryptOpData* opData = NULL; + CpaFlatBuffer* outBuf = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaRsaPrivateCallback; + + if (dev == NULL || in == NULL || inLen == 0 || out == NULL || + outLen == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaRsaPrivate: dev %p, in %p (%d), out %p\n", + dev, in, inLen, out); +#endif + + /* setup operation */ + opData = &dev->qat.op.rsa_priv.opData; + outBuf = &dev->qat.op.rsa_priv.outBuf; + privateKey = &dev->qat.op.rsa_priv.privateKey; + + /* init variables */ + XMEMSET(opData, 0, sizeof(CpaCyRsaDecryptOpData)); + XMEMSET(outBuf, 0, sizeof(CpaFlatBuffer)); + XMEMSET(privateKey, 0, sizeof(CpaCyRsaPrivateKey)); + + /* assign buffers */ + ret = IntelQaBigIntToFlatBuffer(d, + &privateKey->privateKeyRep1.privateExponentD); + ret += IntelQaBigIntToFlatBuffer(n, &privateKey->privateKeyRep1.modulusN); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* make sure output length is at least modulus len */ + if (*outLen < n->len) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* make sure outLen is not more than inLen */ + if (*outLen > inLen) { + *outLen = inLen; + } + + opData->inputData.dataLenInBytes = inLen; + opData->inputData.pData = XREALLOC((byte*)in, inLen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + outBuf->dataLenInBytes = *outLen; + outBuf->pData = XREALLOC(out, *outLen, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + + /* check allocations */ + if (opData->inputData.pData == NULL || outBuf->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* setup private key */ + privateKey->version = CPA_CY_RSA_VERSION_TWO_PRIME; + privateKey->privateKeyRepType = CPA_CY_RSA_PRIVATE_KEY_REP_TYPE_1; + + /* assign private key to private op data */ + opData->pRecipientPrivateKey = privateKey; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLenPtr = outLen; + IntelQaOpInit(dev, IntelQaRsaPrivateFree); + + /* perform RSA decrypt */ + do { + status = cpaCyRsaDecrypt(dev->qat.handle, + callback, + dev, + opData, + outBuf); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_RSA_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyRsaDecrypt failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaRsaPrivateFree(dev); + + return ret; +} + +int IntelQaRsaCrtPrivate(WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + WC_BIGINT* p, WC_BIGINT* q, + WC_BIGINT* dP, WC_BIGINT* dQ, + WC_BIGINT* qInv, + byte* out, word32* outLen) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyRsaPrivateKey* privateKey = NULL; + CpaCyRsaDecryptOpData* opData = NULL; + CpaFlatBuffer* outBuf = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaRsaPrivateCallback; + + if (dev == NULL || in == NULL || inLen == 0 || out == NULL || + outLen == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaRsaCrtPrivate: dev %p, in %p (%d), out %p\n", + dev, in, inLen, out); +#endif + + /* setup operation */ + opData = &dev->qat.op.rsa_priv.opData; + outBuf = &dev->qat.op.rsa_priv.outBuf; + privateKey = &dev->qat.op.rsa_priv.privateKey; + + /* init variables */ + XMEMSET(opData, 0, sizeof(CpaCyRsaDecryptOpData)); + XMEMSET(outBuf, 0, sizeof(CpaFlatBuffer)); + XMEMSET(privateKey, 0, sizeof(CpaCyRsaPrivateKey)); + + /* assign buffers */ + ret = IntelQaBigIntToFlatBuffer(p, &privateKey->privateKeyRep2.prime1P); + ret += IntelQaBigIntToFlatBuffer(q, &privateKey->privateKeyRep2.prime2Q); + ret += IntelQaBigIntToFlatBuffer(dP, + &privateKey->privateKeyRep2.exponent1Dp); + ret += IntelQaBigIntToFlatBuffer(dQ, + &privateKey->privateKeyRep2.exponent2Dq); + ret += IntelQaBigIntToFlatBuffer(qInv, + &privateKey->privateKeyRep2.coefficientQInv); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* make sure output length is at least p len */ + if (*outLen < p->len) + return BAD_FUNC_ARG; + + /* make sure outLen is not more than inLen */ + if (*outLen > inLen) + *outLen = inLen; + + opData->inputData.dataLenInBytes = inLen; + opData->inputData.pData = XREALLOC((byte*)in, inLen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + outBuf->dataLenInBytes = *outLen; + outBuf->pData = XREALLOC(out, *outLen, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + + /* check allocations */ + if (opData->inputData.pData == NULL || outBuf->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* setup private key */ + privateKey->version = CPA_CY_RSA_VERSION_TWO_PRIME; + privateKey->privateKeyRepType = CPA_CY_RSA_PRIVATE_KEY_REP_TYPE_2; + + /* assign private key to private op data */ + opData->pRecipientPrivateKey = privateKey; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLenPtr = outLen; + IntelQaOpInit(dev, IntelQaRsaPrivateFree); + + /* perform RSA CRT decrypt */ + do { + status = cpaCyRsaDecrypt(dev->qat.handle, + callback, + dev, + opData, + outBuf); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_RSA_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyRsaDecrypt CRT failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaRsaPrivateFree(dev); + + return ret; +} + +static void IntelQaRsaPublicFree(WC_ASYNC_DEV* dev) +{ + CpaCyRsaEncryptOpData* opData = &dev->qat.op.rsa_pub.opData; + CpaFlatBuffer* outBuf = &dev->qat.op.rsa_pub.outBuf; + + if (opData) { + if (opData->inputData.pData) { + XFREE(opData->inputData.pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->inputData.pData = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCyRsaEncryptOpData)); + } + if (outBuf) { + if (outBuf->pData) { + XFREE(outBuf->pData, dev, DYNAMIC_TYPE_ASYNC_NUMA64); + outBuf->pData = NULL; + } + XMEMSET(outBuf, 0, sizeof(CpaFlatBuffer)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaRsaPublicCallback(void *pCallbackTag, + CpaStatus status, void *pOpdata, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyRsaEncryptOpData* opData = (CpaCyRsaEncryptOpData*)pOpdata; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaRsaPublicCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + if (dev->qat.outLenPtr) { + if (pOut->dataLenInBytes > *dev->qat.outLenPtr) { + pOut->dataLenInBytes = *dev->qat.outLenPtr; + } + *dev->qat.outLenPtr = pOut->dataLenInBytes; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pOut->pData) { + XMEMCPY(dev->qat.out, pOut->pData, pOut->dataLenInBytes); + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaRsaPublic(WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + WC_BIGINT* e, WC_BIGINT* n, + byte* out, word32* outLen) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyRsaPublicKey* publicKey = NULL; + CpaCyRsaEncryptOpData* opData = NULL; + CpaFlatBuffer* outBuf = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaRsaPublicCallback; + + if (dev == NULL || in == NULL || inLen == 0 || out == NULL || + outLen == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaRsaPublic: dev %p, in %p (%d), out %p\n", + dev, in, inLen, out); +#endif + + /* setup operation */ + opData = &dev->qat.op.rsa_pub.opData; + outBuf = &dev->qat.op.rsa_pub.outBuf; + publicKey = &dev->qat.op.rsa_pub.publicKey; + + /* init variables */ + XMEMSET(opData, 0, sizeof(CpaCyRsaEncryptOpData)); + XMEMSET(outBuf, 0, sizeof(CpaFlatBuffer)); + XMEMSET(publicKey, 0, sizeof(CpaCyRsaPublicKey)); + + /* assign buffers */ + ret = IntelQaBigIntToFlatBuffer(e, &publicKey->publicExponentE); + ret += IntelQaBigIntToFlatBuffer(n, &publicKey->modulusN); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* make sure output length is at least modulus len */ + if (*outLen < n->len) + return BAD_FUNC_ARG; + + /* make sure output len is set to modulus size */ + *outLen = n->len; + + opData->inputData.dataLenInBytes = inLen; + opData->inputData.pData = XREALLOC((byte*)in, inLen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + outBuf->dataLenInBytes = *outLen; + outBuf->pData = XREALLOC(out, *outLen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA64); + + /* check allocations */ + if (opData->inputData.pData == NULL || outBuf->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* assign public key to public op data */ + opData->pPublicKey = publicKey; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLenPtr = outLen; + IntelQaOpInit(dev, IntelQaRsaPublicFree); + + /* perform RSA encrypt */ + do { + status = cpaCyRsaEncrypt(dev->qat.handle, + callback, + dev, + opData, + outBuf); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_RSA_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyRsaEncrypt failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaRsaPublicFree(dev); + + return ret; +} + +static void IntelQaRsaModExpFree(WC_ASYNC_DEV* dev) +{ + CpaCyLnModExpOpData* opData = &dev->qat.op.rsa_modexp.opData; + CpaFlatBuffer* target = &dev->qat.op.rsa_modexp.target; + + if (opData) { + if (opData->base.pData) { + XFREE(opData->base.pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->base.pData = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCyLnModExpOpData)); + } + if (target) { + if (target->pData) + XFREE(target->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + XMEMSET(target, 0, sizeof(CpaFlatBuffer)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaRsaModExpCallback(void *pCallbackTag, + CpaStatus status, void *pOpdata, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyLnModExpOpData* opData = (CpaCyLnModExpOpData*)pOpdata; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaRsaModExpCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + if (dev->qat.outLenPtr) { + if (pOut->dataLenInBytes > *dev->qat.outLenPtr) { + pOut->dataLenInBytes = *dev->qat.outLenPtr; + } + *dev->qat.outLenPtr = pOut->dataLenInBytes; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pOut->pData) { + XMEMCPY(dev->qat.out, pOut->pData, pOut->dataLenInBytes); + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaRsaExptMod(WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + WC_BIGINT* e, WC_BIGINT* n, + byte* out, word32* outLen) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyLnModExpOpData* opData = NULL; + CpaFlatBuffer* target = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaRsaModExpCallback; + + if (dev == NULL || in == NULL || inLen == 0 || out == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaRsaExptMod: dev %p, in %p (%d), out %p\n", + dev, in, inLen, out); +#endif + + /* setup operation */ + opData = &dev->qat.op.rsa_modexp.opData; + target = &dev->qat.op.rsa_modexp.target; + + /* init variables */ + XMEMSET(opData, 0, sizeof(CpaCyLnModExpOpData)); + XMEMSET(target, 0, sizeof(CpaFlatBuffer)); + + /* assign buffers */ + ret = IntelQaBigIntToFlatBuffer(e, &opData->exponent); + ret += IntelQaBigIntToFlatBuffer(n, &opData->modulus); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + opData->base.dataLenInBytes = inLen; + opData->base.pData = XREALLOC((byte*)in, inLen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + target->dataLenInBytes = *outLen; + target->pData = XREALLOC(out, *outLen, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + + /* check allocations */ + if (opData->base.pData == NULL || target->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLenPtr = outLen; + IntelQaOpInit(dev, IntelQaRsaModExpFree); + + /* make modexp call async */ + do { + status = cpaCyLnModExp(dev->qat.handle, + callback, + dev, + opData, + target); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_EXPTMOD_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyLnModExp failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaRsaModExpFree(dev); + + return ret; +} +#endif /* !NO_RSA */ + + +/* -------------------------------------------------------------------------- */ +/* Symmetric Algos */ +/* -------------------------------------------------------------------------- */ + +#if defined(QAT_ENABLE_CRYPTO) || defined(QAT_ENABLE_HASH) + +static int IntelQaSymOpen(WC_ASYNC_DEV* dev, CpaCySymSessionSetupData* setup, + CpaCySymCbFunc callback) +{ + int ret = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + Cpa32U sessionCtxSize = 0; + IntelQaSymCtx* ctx; + + /* arg check */ + if (dev == NULL || setup == NULL) { + return BAD_FUNC_ARG; + } + + ctx = IntelQaGetSymCtx(dev); + + /* Determine size of session context to allocate - use max size */ + status = cpaCySymSessionCtxGetSize(dev->qat.handle, setup, &sessionCtxSize); + + if (status != CPA_STATUS_SUCCESS || (ctx->symCtxSize > 0 && + ctx->symCtxSize > sessionCtxSize)) { + printf("Symmetric context size error %d! Buf %d, Exp %d\n", + status, ctx->symCtxSize, sessionCtxSize); + return ASYNC_OP_E; + } + + /* make sure session context is allocated */ + if (ctx->symCtx == NULL) { + /* Allocate session context */ + ctx->symCtx = XMALLOC(sessionCtxSize, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA64); + if (ctx->symCtx == NULL) { + return MEMORY_E; + } + } + ctx->symCtxSize = sessionCtxSize; + + if (!ctx->isOpen) { + ctx->isOpen = 1; + + #ifdef QAT_DEBUG + printf("IntelQaSymOpen: InitSession dev %p, symCtx %p\n", + dev, ctx->symCtx); + #endif + + /* open symmetric session */ + status = cpaCySymInitSession(dev->qat.handle, callback, setup, + ctx->symCtx); + if (status != CPA_STATUS_SUCCESS) { + printf("cpaCySymInitSession failed! dev %p, status %d\n", + dev, status); + XFREE(ctx->symCtx, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + ctx->symCtx = NULL; + return ASYNC_INIT_E; + } + } + + if (ctx->symCtxSrc == NULL) { + ctx->symCtxSrc = ctx->symCtx; + } + +#ifdef QAT_DEBUG + printf("IntelQaSymOpen: dev %p, symCtx %p (src %p), symCtxSize %d, " + "isCopy %d, isOpen %d\n", + dev, ctx->symCtx, ctx->symCtxSrc, ctx->symCtxSize, ctx->isCopy, + ctx->isOpen); +#endif + + return ret; +} + +static int IntelQaSymClose(WC_ASYNC_DEV* dev, int doFree) +{ + int ret = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + IntelQaSymCtx* ctx; +#ifdef QAT_ENABLE_HASH + int isHash; +#endif + + if (dev == NULL) { + return BAD_FUNC_ARG; + } + + ctx = IntelQaGetSymCtx(dev); + +#ifdef QAT_ENABLE_HASH + isHash = IntelQaDevIsHash(dev); +#endif + +#ifdef QAT_DEBUG + printf("IntelQaSymClose: dev %p, ctx %p, symCtx %p (src %p), " + "symCtxSize %d, isCopy %d, isOpen %d, doFree %d\n", + dev, ctx, ctx->symCtx, ctx->symCtxSrc, ctx->symCtxSize, ctx->isCopy, + ctx->isOpen, doFree); +#endif + + if (ctx->symCtx == ctx->symCtxSrc && ctx->symCtx != NULL) { + if (ctx->isOpen) { + ctx->isOpen = 0; + #ifdef QAT_DEBUG + printf("IntelQaSymClose: RemoveSession dev %p, symCtx %p\n", + dev, ctx->symCtx); + #endif + status = cpaCySymRemoveSession(dev->qat.handle, ctx->symCtx); + if (status == CPA_STATUS_RETRY) { + printf("cpaCySymRemoveSession retry!\n"); + /* treat this as error, since session should not be active */ + ret = ASYNC_OP_E; + } + else if (status != CPA_STATUS_SUCCESS) { + printf("cpaCySymRemoveSession failed! status %d\n", status); + ret = ASYNC_OP_E; + } + } + } + + if (doFree) { + XFREE(ctx->symCtx, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + ctx->symCtx = NULL; + ctx->symCtxSrc = NULL; + ctx->symCtxSize = 0; + } + +#ifdef QAT_ENABLE_HASH + /* make sure hash temp buffer is cleared */ + + if (isHash) { + if (dev->qat.op.hash.tmpIn) { + XFREE(dev->qat.op.hash.tmpIn, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + } +#endif + + return ret; +} + +#endif /* QAT_ENABLE_CRYPTO || QAT_ENABLE_HASH */ + + +/* -------------------------------------------------------------------------- */ +/* AES/DES Algo */ +/* -------------------------------------------------------------------------- */ + +#ifdef QAT_ENABLE_CRYPTO +static void IntelQaSymCipherFree(WC_ASYNC_DEV* dev) +{ + IntelQaSymCtx* ctx = &dev->qat.op.cipher.ctx; + CpaCySymOpData* opData = &ctx->opData; + CpaBufferList* pDstBuffer = &dev->qat.op.cipher.bufferList; + + if (opData) { + if (opData->pAdditionalAuthData) { + XFREE(opData->pAdditionalAuthData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + opData->pAdditionalAuthData = NULL; + } + if (opData->pIv) { + XFREE(opData->pIv, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->pIv = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + } + if (pDstBuffer) { + if (pDstBuffer->pBuffers) { + if (pDstBuffer->pBuffers->pData) { + XFREE(pDstBuffer->pBuffers->pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pDstBuffer->pBuffers->pData = NULL; + } + XMEMSET(pDstBuffer->pBuffers, 0, sizeof(CpaFlatBuffer)); + } + if (pDstBuffer->pPrivateMetaData) { + XFREE(pDstBuffer->pPrivateMetaData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pDstBuffer->pPrivateMetaData = NULL; + } + XMEMSET(pDstBuffer, 0, sizeof(CpaBufferList)); + } + + /* close and free sym context */ + IntelQaSymClose(dev, 1); + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLen = 0; +#ifndef NO_AES + dev->qat.op.cipher.authTag = NULL; + dev->qat.op.cipher.authTagSz = 0; +#endif +} + +static void IntelQaSymCipherCallback(void *pCallbackTag, CpaStatus status, + const CpaCySymOp operationType, void *pOpData, CpaBufferList *pDstBuffer, + CpaBoolean verifyResult) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCySymOpData* opData = (CpaCySymOpData*)pOpData; + int ret = ASYNC_OP_E; + + (void)opData; + (void)verifyResult; + (void)pDstBuffer; + (void)operationType; + +#ifdef QAT_DEBUG + printf("IntelQaSymCipherCallback: dev %p, type %d, status %d, " + "verifyResult %d, num %d\n", + dev, operationType, status, verifyResult, pDstBuffer->numBuffers); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + if (pDstBuffer && pDstBuffer->numBuffers >= 1) { + /* check length */ + word32 outLen = pDstBuffer->pBuffers->dataLenInBytes; + + if (outLen > dev->qat.outLen) { + outLen = dev->qat.outLen; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pDstBuffer->pBuffers->pData) { + XMEMCPY(dev->qat.out, pDstBuffer->pBuffers->pData, outLen); + } + + /* capture IV for next call */ + if (dev->qat.op.cipher.iv && dev->qat.op.cipher.ivSz > 0) { + word32 ivSz = dev->qat.op.cipher.ivSz; + if (ivSz > outLen) + ivSz = outLen; + /* copy last block */ + XMEMCPY(dev->qat.op.cipher.iv, + &pDstBuffer->pBuffers->pData[outLen - ivSz], + ivSz); + } + + #ifndef NO_AES + /* return authTag */ + if (dev->qat.op.cipher.authTag && + dev->qat.op.cipher.authTagSz > 0) { + word32 authTagLen = dev->qat.op.cipher.authTagSz; + + /* check authtag length */ + if (authTagLen + outLen > pDstBuffer->pBuffers->dataLenInBytes) + authTagLen = pDstBuffer->pBuffers->dataLenInBytes - outLen; + + XMEMCPY(dev->qat.op.cipher.authTag, + pDstBuffer->pBuffers->pData + outLen, authTagLen); + } + #endif + + /* return length */ + dev->qat.outLen = outLen; + + /* mark event result */ + ret = 0; /* success */ + } + } + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +static int IntelQaSymCipher(WC_ASYNC_DEV* dev, byte* out, const byte* in, + word32 inOutSz, const byte* key, word32 keySz, byte* iv, word32 ivSz, + CpaCySymOp symOperation, CpaCySymCipherAlgorithm cipherAlgorithm, + CpaCySymCipherDirection cipherDirection, + + /* for auth ciphers (CCM or GCM) */ + CpaCySymHashAlgorithm hashAlgorithm, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCySymOpData* opData = NULL; + CpaCySymSessionSetupData setup; + const Cpa32U numBuffers = 1; + CpaBufferList* bufferList = NULL; + CpaFlatBuffer* flatBuffer = NULL; + CpaCySymCbFunc callback = IntelQaSymCipherCallback; + Cpa8U* ivBuf = NULL; + Cpa8U* dataBuf = NULL; + Cpa32U dataLen = inOutSz; + Cpa8U* metaBuf = NULL; + Cpa32U metaSize = 0; + Cpa8U* authInBuf = NULL; + Cpa32U authInSzAligned = authInSz; + IntelQaSymCtx* ctx; + +#ifdef QAT_DEBUG + printf("IntelQaSymCipher: dev %p, out %p, in %p, inOutSz %d, op %d, " + "algo %d, dir %d, hash %d\n", + dev, out, in, inOutSz, symOperation, cipherAlgorithm, cipherDirection, + hashAlgorithm); +#endif + + /* check args */ + if (out == NULL || in == NULL || inOutSz == 0 || + key == NULL || keySz == 0 || iv == NULL || ivSz == 0) { + return BAD_FUNC_ARG; + } + if (hashAlgorithm != CPA_CY_SYM_HASH_NONE && + (authTag == NULL || authTagSz == 0)) { + return BAD_FUNC_ARG; + } + + /* get meta size */ + status = cpaCyBufferListGetMetaSize(dev->qat.handle, numBuffers, &metaSize); + if (status != CPA_STATUS_SUCCESS && metaSize <= 0) { + ret = BUFFER_E; goto exit; + } + + /* if authtag provided then it will be appended to end of input */ + if (authTag && authTagSz > 0) { + dataLen += authTagSz; + } + + /* allocate buffers */ + ctx = &dev->qat.op.cipher.ctx; + opData = &ctx->opData; + bufferList = &dev->qat.op.cipher.bufferList; + flatBuffer = &dev->qat.op.cipher.flatBuffer; + metaBuf = XMALLOC(metaSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + dataBuf = XREALLOC((byte*)in, dataLen, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + ivBuf = XREALLOC((byte*)iv, AES_BLOCK_SIZE, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + /* check allocations */ + if (ivBuf == NULL || metaBuf == NULL || dataBuf == NULL) { + ret = MEMORY_E; goto exit; + } + + /* AAD */ + if (authIn && authInSz > 0) { + /* make sure AAD is block aligned */ + if (authInSzAligned % AES_BLOCK_SIZE) { + authInSzAligned += AES_BLOCK_SIZE - + (authInSzAligned % AES_BLOCK_SIZE); + } + + authInBuf = XREALLOC((byte*)authIn, authInSzAligned, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + if (authInBuf == NULL) { + ret = MEMORY_E; goto exit; + } + /* clear remainder */ + XMEMSET(authInBuf + authInSz, 0, authInSzAligned - authInSz); + } + + /* init buffers */ + XMEMSET(&setup, 0, sizeof(CpaCySymSessionSetupData)); + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + XMEMSET(bufferList, 0, sizeof(CpaBufferList)); + XMEMSET(flatBuffer, 0, sizeof(CpaFlatBuffer)); + XMEMSET(metaBuf, 0, metaSize); + + bufferList->pBuffers = flatBuffer; + bufferList->numBuffers = numBuffers; + bufferList->pPrivateMetaData = metaBuf; + flatBuffer->dataLenInBytes = dataLen; + flatBuffer->pData = dataBuf; + + /* setup */ + setup.sessionPriority = CPA_CY_PRIORITY_NORMAL; + setup.symOperation = symOperation; + setup.cipherSetupData.cipherAlgorithm = cipherAlgorithm; + setup.cipherSetupData.cipherKeyLenInBytes = keySz; + setup.cipherSetupData.pCipherKey = (byte*)key; + setup.cipherSetupData.cipherDirection = cipherDirection; + + /* setup auth ciphers */ + if (hashAlgorithm != CPA_CY_SYM_HASH_NONE) { + setup.algChainOrder = + (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) ? + CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH : + CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER; + + setup.hashSetupData.hashAlgorithm = hashAlgorithm; + setup.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; + setup.hashSetupData.digestResultLenInBytes = authTagSz; + setup.hashSetupData.authModeSetupData.aadLenInBytes = authInSz; + + setup.digestIsAppended = CPA_TRUE; + } + + /* open session */ + ret = IntelQaSymOpen(dev, &setup, callback); + if (ret != 0) { + goto exit; + } + + /* operation data */ + opData->sessionCtx = ctx->symCtx; + opData->packetType = CPA_CY_SYM_PACKET_TYPE_FULL; + opData->pIv = ivBuf; + opData->ivLenInBytes = ivSz; + opData->cryptoStartSrcOffsetInBytes = 0; + opData->messageLenToCipherInBytes = inOutSz; + if (authIn && authInSz > 0) { + opData->pAdditionalAuthData = authInBuf; + } + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT) { + if (authTag && authTagSz > 0) { + /* append digest to end of data buffer */ + XMEMCPY(flatBuffer->pData + inOutSz, authTag, authTagSz); + } + } + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLen = inOutSz; + /* optional return of next IV */ + if (cipherAlgorithm != CPA_CY_SYM_CIPHER_AES_GCM && iv) { + if (ivSz > inOutSz) + ivSz = inOutSz; + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) { + /* capture this on the callback */ + dev->qat.op.cipher.iv = iv; + dev->qat.op.cipher.ivSz = ivSz; + } + else { + /* capture last block of input as next IV */ + XMEMCPY(iv, &in[inOutSz - ivSz], ivSz); + } + } + if (cipherDirection == CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT) { + dev->qat.op.cipher.authTag = authTag; + dev->qat.op.cipher.authTagSz = authTagSz; + } + else { + dev->qat.op.cipher.authTag = NULL; + dev->qat.op.cipher.authTagSz = 0; + } + IntelQaOpInit(dev, IntelQaSymCipherFree); + + /* perform symmetric AES operation async */ + /* use same buffer list for in-place operation */ + do { + status = cpaCySymPerformOp(dev->qat.handle, + dev, + opData, + bufferList, + bufferList, + NULL); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_CIPHER_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCySymPerformOp Cipher failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaSymCipherFree(dev); + + return ret; +} + +#ifdef HAVE_AES_CBC +int IntelQaSymAesCbcEncrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_AES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} + +#ifdef HAVE_AES_DECRYPT +int IntelQaSymAesCbcDecrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_AES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_CBC */ + + +#ifdef HAVE_AESGCM +int IntelQaSymAesGcmEncrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, (byte*)iv, ivSz, + CPA_CY_SYM_OP_ALGORITHM_CHAINING, CPA_CY_SYM_CIPHER_AES_GCM, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_AES_GCM, authTag, authTagSz, authIn, authInSz); +} +#ifdef HAVE_AES_DECRYPT +int IntelQaSymAesGcmDecrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, (byte*)iv, ivSz, + CPA_CY_SYM_OP_ALGORITHM_CHAINING, CPA_CY_SYM_CIPHER_AES_GCM, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_AES_GCM, (byte*)authTag, authTagSz, authIn, authInSz); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AESGCM */ + +#ifndef NO_DES3 +int IntelQaSymDes3CbcEncrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_3DES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} + +int IntelQaSymDes3CbcDecrypt(WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz) +{ + return IntelQaSymCipher(dev, out, in, sz, + key, keySz, iv, ivSz, + CPA_CY_SYM_OP_CIPHER, CPA_CY_SYM_CIPHER_3DES_CBC, + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT, + CPA_CY_SYM_HASH_NONE, NULL, 0, NULL, 0); +} +#endif /* !NO_DES3 */ + +#endif /* QAT_ENABLE_CRYPTO */ + + +/* -------------------------------------------------------------------------- */ +/* Hashing Algo */ +/* -------------------------------------------------------------------------- */ + +#ifdef QAT_ENABLE_HASH +static int IntelQaSymHashGetInfo(CpaCySymHashAlgorithm hashAlgorithm, + Cpa32U* pBlockSize, Cpa32U* pDigestSize) +{ + Cpa32U blockSize = 0; + Cpa32U digestSize = 0; + + switch(hashAlgorithm) { + case CPA_CY_SYM_HASH_MD5: + #ifndef NO_MD5 + blockSize = WC_MD5_BLOCK_SIZE; + digestSize = WC_MD5_DIGEST_SIZE; + #endif + break; + case CPA_CY_SYM_HASH_SHA1: + #ifndef NO_SHA + blockSize = WC_SHA_BLOCK_SIZE; + digestSize = WC_SHA_DIGEST_SIZE; + #endif + break; + case CPA_CY_SYM_HASH_SHA224: + #ifdef WOLFSSL_SHA224 + blockSize = WC_SHA224_BLOCK_SIZE; + digestSize = WC_SHA224_DIGEST_SIZE; + #endif + break; + case CPA_CY_SYM_HASH_SHA256: + #ifndef NO_SHA256 + blockSize = WC_SHA256_BLOCK_SIZE; + digestSize = WC_SHA256_DIGEST_SIZE; + #endif + break; + case CPA_CY_SYM_HASH_SHA384: + #if defined(WOLFSSL_SHA512) && defined(WOLFSSL_SHA384) + blockSize = WC_SHA384_BLOCK_SIZE; + digestSize = WC_SHA384_DIGEST_SIZE; + #endif + break; + case CPA_CY_SYM_HASH_SHA512: + #ifdef WOLFSSL_SHA512 + blockSize = WC_SHA512_BLOCK_SIZE; + digestSize = WC_SHA512_DIGEST_SIZE; + #endif + break; + #ifdef QAT_V2 + case CPA_CY_SYM_HASH_SHA3_256: + #ifdef WOLFSSL_SHA3 + blockSize = WC_SHA3_256_BLOCK_SIZE; + digestSize = WC_SHA3_256_DIGEST_SIZE; + #endif + break; + #endif + + /* not supported */ + case CPA_CY_SYM_HASH_NONE: + case CPA_CY_SYM_HASH_AES_XCBC: + case CPA_CY_SYM_HASH_AES_CCM: + case CPA_CY_SYM_HASH_AES_GCM: + case CPA_CY_SYM_HASH_KASUMI_F9: + case CPA_CY_SYM_HASH_SNOW3G_UIA2: + case CPA_CY_SYM_HASH_AES_CMAC: + case CPA_CY_SYM_HASH_AES_GMAC: + case CPA_CY_SYM_HASH_AES_CBC_MAC: + #ifdef QAT_V2 + case CPA_CY_SYM_HASH_ZUC_EIA3: + #ifdef QAT_V2_4_PLUS + case CPA_CY_SYM_HASH_SHA3_224: + case CPA_CY_SYM_HASH_SHA3_384: + case CPA_CY_SYM_HASH_SHA3_512: + case CPA_CY_SYM_HASH_SHAKE_128: + case CPA_CY_SYM_HASH_SHAKE_256: + case CPA_CY_SYM_HASH_POLY: + case CPA_CY_SYM_HASH_SM3: + #endif /* QAT_V2_4_PLUS */ + #endif /* QAT_V2 */ + default: + return -1; + } + + if (pBlockSize) + *pBlockSize = blockSize; + if (pDigestSize) + *pDigestSize = digestSize; + + return 0; +} + +static void IntelQaSymHashFree(WC_ASYNC_DEV* dev) +{ + IntelQaSymCtx* ctx = &dev->qat.op.hash.ctx; + CpaCySymOpData* opData = &ctx->opData; + CpaBufferList* pDstBuffer = dev->qat.op.hash.srcList; + int idx; + + if (opData) { + if (opData->pDigestResult) { + XFREE(opData->pDigestResult, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->pDigestResult = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + } + + if (pDstBuffer) { + idx = pDstBuffer->numBuffers; + while (--idx >= 0) { + if (pDstBuffer->pBuffers[idx].pData) { + XFREE(pDstBuffer->pBuffers[idx].pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pDstBuffer->pBuffers[idx].pData = NULL; + } + } + + XFREE(pDstBuffer, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + + /* if final */ + if (dev->qat.out) { + int doFree = 0; + + /* free any tmp input */ + if (dev->qat.op.hash.tmpIn) { + XFREE(dev->qat.op.hash.tmpIn, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + dev->qat.op.hash.tmpIn = NULL; + dev->qat.op.hash.tmpInSz = 0; + dev->qat.op.hash.tmpInBufSz = 0; + + if (ctx->isCopy || ctx->symCtx != ctx->symCtxSrc) { + doFree = 1; + } + + #ifdef QAT_DEBUG + printf("IntelQaSymHashFree: dev %p, doFree %d\n", dev, doFree); + #endif + + /* close session */ + IntelQaSymClose(dev, doFree); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLen = 0; +} + +static void IntelQaSymHashCallback(void *pCallbackTag, CpaStatus status, + const CpaCySymOp operationType, void *pOpData, CpaBufferList *pDstBuffer, + CpaBoolean verifyResult) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCySymOpData* opData = (CpaCySymOpData*)pOpData; + int ret = ASYNC_OP_E; + + (void)opData; + (void)verifyResult; + (void)pDstBuffer; + (void)operationType; + +#ifdef QAT_DEBUG + printf("IntelQaSymHashCallback: dev %p, type %d, status %d, " + "verifyResult %d, num %d\n", + dev, operationType, status, verifyResult, pDstBuffer->numBuffers); +#endif + + if (status == CPA_STATUS_SUCCESS) { + if (dev->qat.out) { + /* is final */ + + /* return digest */ + if (dev->qat.outLen > 0 && dev->qat.out != opData->pDigestResult) { + XMEMCPY(dev->qat.out, opData->pDigestResult, dev->qat.outLen); + } + } + + /* mark event result */ + ret = 0; /* success */ + } + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +/* For hash update call with out == NULL */ +/* For hash final call with out != NULL */ +/* All input is cached in memory or only sent to hardware on final */ +#ifndef QAT_HASH_ALLOC_BLOCK_SZ + #define QAT_HASH_ALLOC_BLOCK_SZ 1024 +#endif +static int IntelQaSymHashCache(WC_ASYNC_DEV* dev, byte* out, const byte* in, + word32 inOutSz, CpaCySymHashMode hashMode, + CpaCySymHashAlgorithm hashAlgorithm, + + /* For HMAC auth mode only */ + Cpa8U* authKey, Cpa32U authKeyLenInBytes) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCySymOpData* opData = NULL; + CpaCySymCbFunc callback = IntelQaSymHashCallback; + CpaBufferList* srcList = NULL; + Cpa32U bufferListSize = 0; + Cpa8U* digestBuf = NULL; + Cpa32U metaSize = 0; + Cpa32U totalMsgSz = 0; + Cpa32U blockSize; + Cpa32U digestSize; + CpaCySymPacketType packetType; + IntelQaSymCtx* ctx; + CpaCySymSessionSetupData setup; + const int bufferCount = 1; + + ret = IntelQaSymHashGetInfo(hashAlgorithm, &blockSize, &digestSize); + if (ret != 0) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaSymHashCache: dev %p, out %p, in %p, inOutSz %d, mode %d" + ", algo %d, digSz %d, blkSz %d\n", + dev, out, in, inOutSz, hashMode, hashAlgorithm, digestSize, blockSize); +#endif + + ctx = &dev->qat.op.hash.ctx; + + /* handle input processing */ + if (in) { + if (dev->qat.op.hash.tmpIn == NULL) { + dev->qat.op.hash.tmpInSz = 0; + dev->qat.op.hash.tmpInBufSz = + (inOutSz + QAT_HASH_ALLOC_BLOCK_SZ - 1) + & ~(QAT_HASH_ALLOC_BLOCK_SZ - 1); + if (dev->qat.op.hash.tmpInBufSz == 0) + dev->qat.op.hash.tmpInBufSz = QAT_HASH_ALLOC_BLOCK_SZ; + dev->qat.op.hash.tmpIn = (byte*)XMALLOC(dev->qat.op.hash.tmpInBufSz, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (dev->qat.op.hash.tmpIn == NULL) { + ret = MEMORY_E; goto exit; + } + } + /* determine if we need to grow buffer */ + else if ((dev->qat.op.hash.tmpInSz + inOutSz) > + dev->qat.op.hash.tmpInBufSz) { + byte* oldIn = dev->qat.op.hash.tmpIn; + dev->qat.op.hash.tmpInBufSz = (dev->qat.op.hash.tmpInSz + inOutSz + + QAT_HASH_ALLOC_BLOCK_SZ - 1) & ~(QAT_HASH_ALLOC_BLOCK_SZ - 1); + + dev->qat.op.hash.tmpIn = (byte*)XMALLOC( + dev->qat.op.hash.tmpInBufSz, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (dev->qat.op.hash.tmpIn == NULL) { + ret = MEMORY_E; goto exit; + } + XMEMCPY(dev->qat.op.hash.tmpIn, oldIn, dev->qat.op.hash.tmpInSz); + XFREE(oldIn, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + + /* copy input to new buffer */ + XMEMCPY(&dev->qat.op.hash.tmpIn[dev->qat.op.hash.tmpInSz], in, inOutSz); + dev->qat.op.hash.tmpInSz += inOutSz; + + ret = 0; /* success */ + goto exit; + } + + /* handle output processing */ + packetType = CPA_CY_SYM_PACKET_TYPE_FULL; + + /* get meta size */ + status = cpaCyBufferListGetMetaSize(dev->qat.handle, bufferCount, + &metaSize); + if (status != CPA_STATUS_SUCCESS && metaSize <= 0) { + ret = BUFFER_E; goto exit; + } + + /* allocate buffer list */ + bufferListSize = sizeof(CpaBufferList) + + (bufferCount * sizeof(CpaFlatBuffer)) + metaSize; + srcList = XMALLOC(bufferListSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (srcList == NULL) { + ret = MEMORY_E; goto exit; + } + dev->qat.op.hash.srcList = srcList; + XMEMSET(srcList, 0, bufferListSize); + srcList->pBuffers = (CpaFlatBuffer*)( + (byte*)srcList + sizeof(CpaBufferList)); + srcList->pPrivateMetaData = (byte*)srcList + sizeof(CpaBufferList) + + (bufferCount * sizeof(CpaFlatBuffer)); + + srcList->numBuffers = bufferCount; + srcList->pBuffers[0].dataLenInBytes = dev->qat.op.hash.tmpInSz; + srcList->pBuffers[0].pData = dev->qat.op.hash.tmpIn; + totalMsgSz = dev->qat.op.hash.tmpInSz; + + dev->qat.op.hash.tmpInSz = 0; + dev->qat.op.hash.tmpInBufSz = 0; + dev->qat.op.hash.tmpIn = NULL; + + /* build output */ + if (out) { + /* use blockSize for alloc, but we are only returning digestSize */ + digestBuf = XMALLOC(blockSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (digestBuf == NULL) { + ret = MEMORY_E; goto exit; + } + } + + /* setup */ + XMEMSET(&setup, 0, sizeof(CpaCySymSessionSetupData)); + setup.sessionPriority = CPA_CY_PRIORITY_NORMAL; + setup.symOperation = CPA_CY_SYM_OP_HASH; + setup.partialsNotRequired = CPA_TRUE; + setup.hashSetupData.hashMode = hashMode; + setup.hashSetupData.hashAlgorithm = hashAlgorithm; + setup.hashSetupData.digestResultLenInBytes = digestSize; + setup.hashSetupData.authModeSetupData.authKey = authKey; + setup.hashSetupData.authModeSetupData.authKeyLenInBytes = authKeyLenInBytes; + + /* open session */ + ret = IntelQaSymOpen(dev, &setup, callback); + if (ret != 0) { + goto exit; + } + + /* operation data */ + opData = &ctx->opData; + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + opData->sessionCtx = ctx->symCtx; + opData->packetType = packetType; + opData->messageLenToHashInBytes = totalMsgSz; + opData->pDigestResult = digestBuf; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLen = inOutSz; + IntelQaOpInit(dev, IntelQaSymHashFree); + + /* perform symmetric hash operation async */ + /* use same buffer list for in-place operation */ + do { + status = cpaCySymPerformOp(dev->qat.handle, + dev, + opData, + srcList, + srcList, + NULL); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_HASH_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCySymPerformOp Hash failed! dev %p, status %d, ret %d\n", + dev, status, ret); + + /* handle cleanup */ + IntelQaSymHashFree(dev); + } + + return ret; +} + +#ifdef QAT_HASH_ENABLE_PARTIAL + +/* For hash update call with out == NULL */ +/* For hash final call with out != NULL */ +static int IntelQaSymHashPartial(WC_ASYNC_DEV* dev, byte* out, const byte* in, + word32 inOutSz, CpaCySymHashMode hashMode, + CpaCySymHashAlgorithm hashAlgorithm, + + /* For HMAC auth mode only */ + Cpa8U* authKey, Cpa32U authKeyLenInBytes) +{ + int ret, retryCount = 0, i; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCySymOpData* opData = NULL; + CpaCySymCbFunc callback = IntelQaSymHashCallback; + CpaBufferList* srcList = NULL; + Cpa32U bufferListSize = 0; + Cpa8U* digestBuf = NULL; + Cpa32U metaSize = 0; + Cpa32U totalMsgSz = 0; + Cpa32U blockSize; + Cpa32U digestSize; + CpaCySymPacketType packetType; + IntelQaSymCtx* ctx; + CpaCySymSessionSetupData setup; + + int* bufferCount; + byte** buffers; + word32* buffersSz; + + ret = IntelQaSymHashGetInfo(hashAlgorithm, &blockSize, &digestSize); + if (ret != 0) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaSymHashPartial: dev %p, out %p, in %p, inOutSz %d, mode %d, " + "algo %d, digSz %d, blkSz %d\n", + dev, out, in, inOutSz, hashMode, hashAlgorithm, digestSize, blockSize); +#endif + + ctx = &dev->qat.op.hash.ctx; + + bufferCount = &dev->qat.op.hash.bufferCount; + buffers = dev->qat.op.hash.buffers; + buffersSz = dev->qat.op.hash.buffersSz; + + /* handle input processing */ + if (in) { + /* if tmp has data or input is not block aligned */ + if (dev->qat.op.hash.tmpInSz > 0 || inOutSz == 0 || + (inOutSz % blockSize) != 0) { + /* need to handle unaligned hashing, using local tmp */ + + /* make sure we have tmpIn allocated */ + if (dev->qat.op.hash.tmpIn == NULL) { + dev->qat.op.hash.tmpInSz = 0; + dev->qat.op.hash.tmpIn = XMALLOC(blockSize, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + if (dev->qat.op.hash.tmpIn == NULL) { + ret = MEMORY_E; goto exit; + } + dev->qat.op.hash.tmpInBufSz = blockSize; + } + + /* setup processing for block aligned part of input or use tmpIn */ + if (dev->qat.op.hash.tmpInSz > 0) { + word32 remainSz = blockSize - dev->qat.op.hash.tmpInSz; + + /* attempt to fill tmpIn and process block */ + if (inOutSz < remainSz) { + /* not enough to fill buffer */ + XMEMCPY(&dev->qat.op.hash.tmpIn[dev->qat.op.hash.tmpInSz], + in, inOutSz); + dev->qat.op.hash.tmpInSz += inOutSz; + } + else { + /* fill tmp buffer and add */ + XMEMCPY(&dev->qat.op.hash.tmpIn[dev->qat.op.hash.tmpInSz], + in, remainSz); + dev->qat.op.hash.tmpInSz += remainSz; + buffers[*bufferCount] = dev->qat.op.hash.tmpIn; + buffersSz[*bufferCount] = dev->qat.op.hash.tmpInSz; + (*bufferCount)++; + inOutSz -= remainSz; + in += remainSz; + dev->qat.op.hash.tmpIn = NULL; + dev->qat.op.hash.tmpInSz = 0; + + /* use remainder of block aligned */ + if (inOutSz >= blockSize) { + word32 unalignedSz = (inOutSz % blockSize); + word32 inSz = inOutSz - unalignedSz; + + buffersSz[*bufferCount] = inSz; + buffers[*bufferCount] = (byte*)XMALLOC(inSz, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (buffers[*bufferCount] == NULL) { + ret = MEMORY_E; goto exit; + } + XMEMCPY(buffers[*bufferCount], (byte*)in, inSz); + + (*bufferCount)++; + inOutSz -= inSz; + in += inSz; + } + + /* save remainder to tmpIn */ + if (inOutSz > 0) { + dev->qat.op.hash.tmpInSz = 0; + dev->qat.op.hash.tmpIn = XMALLOC(blockSize, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + if (dev->qat.op.hash.tmpIn == NULL) { + ret = MEMORY_E; goto exit; + } + dev->qat.op.hash.tmpInBufSz = blockSize; + + XMEMCPY(dev->qat.op.hash.tmpIn, in, inOutSz); + dev->qat.op.hash.tmpInSz = inOutSz; + } + } + } + else { + /* if not enough to fit into blockSize store into tmpIn */ + if (inOutSz < blockSize) { + dev->qat.op.hash.tmpInSz = inOutSz; + XMEMCPY(dev->qat.op.hash.tmpIn, in, inOutSz); + } + else { + word32 unalignedSz = (inOutSz % blockSize); + word32 inSz = inOutSz - unalignedSz; + + buffersSz[*bufferCount] = inSz; + buffers[*bufferCount] = (byte*)XREALLOC((byte*)in, + inSz, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (buffers[*bufferCount] == NULL) { + ret = MEMORY_E; goto exit; + } + (*bufferCount)++; + + /* store remainder */ + dev->qat.op.hash.tmpInSz = unalignedSz; + XMEMCPY(dev->qat.op.hash.tmpIn, &in[inSz], unalignedSz); + } + } + + } + else { + /* use input directly */ + buffersSz[*bufferCount] = inOutSz; + buffers[*bufferCount] = (byte*)XREALLOC((byte*)in, + buffersSz[*bufferCount], dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (buffers[*bufferCount] == NULL) { + ret = MEMORY_E; goto exit; + } + (*bufferCount)++; + } + } + + /* determine if early exit is okay */ + if (out == NULL) { + /* if not final and no in buffers then exit with success */ + if (*bufferCount == 0) { + ret = 0; /* return success */ + goto exit; + } + + /* for auth must pass in buffer, so leave one in buffer cache */ + else if (hashMode == CPA_CY_SYM_HASH_MODE_AUTH && *bufferCount <= 1) { + ret = 0; /* return success */ + goto exit; + } + } + + /* determine packet type and add any remainder to input processing */ + packetType = CPA_CY_SYM_PACKET_TYPE_PARTIAL; + if (out) { + /* if remainder then add it */ + if (dev->qat.op.hash.tmpIn) { + /* add buffer and use final hash type */ + buffers[*bufferCount] = dev->qat.op.hash.tmpIn; + buffersSz[*bufferCount] = dev->qat.op.hash.tmpInSz; + (*bufferCount)++; + dev->qat.op.hash.tmpIn = NULL; + dev->qat.op.hash.tmpInSz = 0; + } + + /* determine if this is full or partial */ + if (ctx->symCtxSrc == NULL || (!ctx->isOpen && !ctx->isCopy)) { + packetType = CPA_CY_SYM_PACKET_TYPE_FULL; + } + else { + packetType = CPA_CY_SYM_PACKET_TYPE_LAST_PARTIAL; + } + } + + /* get meta size */ + status = cpaCyBufferListGetMetaSize(dev->qat.handle, *bufferCount, + &metaSize); + if (status != CPA_STATUS_SUCCESS && metaSize <= 0) { + ret = BUFFER_E; goto exit; + } + + /* allocate buffer list */ + bufferListSize = sizeof(CpaBufferList) + + (*bufferCount * sizeof(CpaFlatBuffer)) + metaSize; + srcList = XMALLOC(bufferListSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (srcList == NULL) { + ret = MEMORY_E; goto exit; + } + dev->qat.op.hash.srcList = srcList; + XMEMSET(srcList, 0, bufferListSize); + srcList->pBuffers = (CpaFlatBuffer*)( + (byte*)srcList + sizeof(CpaBufferList)); + srcList->pPrivateMetaData = (byte*)srcList + sizeof(CpaBufferList) + + (*bufferCount * sizeof(CpaFlatBuffer)); + for (i = 0; i < *bufferCount; i++) { + srcList->pBuffers[i].dataLenInBytes = buffersSz[i]; + srcList->pBuffers[i].pData = buffers[i]; + totalMsgSz += buffersSz[i]; + } + srcList->numBuffers = *bufferCount; + + /* clear buffer cache */ + dev->qat.op.hash.bufferCount = 0; + for (i=0; iqat.op.hash.buffers[i] = NULL; + dev->qat.op.hash.buffersSz[i] = 0; + } + + /* build output */ + if (out) { + /* use blockSize for alloc, but we are only returning digestSize */ + digestBuf = XMALLOC(blockSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (digestBuf == NULL) { + ret = MEMORY_E; goto exit; + } + } + + /* setup */ + XMEMSET(&setup, 0, sizeof(CpaCySymSessionSetupData)); + setup.sessionPriority = CPA_CY_PRIORITY_NORMAL; + setup.symOperation = CPA_CY_SYM_OP_HASH; + setup.partialsNotRequired = (packetType == CPA_CY_SYM_PACKET_TYPE_FULL) ? + CPA_TRUE : CPA_FALSE; + setup.hashSetupData.hashMode = hashMode; + setup.hashSetupData.hashAlgorithm = hashAlgorithm; + setup.hashSetupData.digestResultLenInBytes = digestSize; + setup.hashSetupData.authModeSetupData.authKey = authKey; + setup.hashSetupData.authModeSetupData.authKeyLenInBytes = authKeyLenInBytes; + + /* open session */ + ret = IntelQaSymOpen(dev, &setup, callback); + if (ret != 0) { + goto exit; + } + + /* workarounds for handling symmetric context copies */ + if (packetType == CPA_CY_SYM_PACKET_TYPE_LAST_PARTIAL) { + /* set the partialState for partial */ + #ifdef USE_LAC_SESSION_FOR_STRUCT_OFFSET + word32 parStaOffset = (word32)offsetof(lac_session_desc_t, + partialState); + #else + word32 parStaOffset = (28 * 16); + #endif + + /* make sure partialState is partial, try + 16 alignments as well */ + for (i = 0; i < 4; i++) { + word32* priorVal = (word32*)((byte*)ctx->symCtx + parStaOffset + + (i * 16)); + if (*priorVal == CPA_CY_SYM_PACKET_TYPE_FULL) { + *priorVal = CPA_CY_SYM_PACKET_TYPE_PARTIAL; + break; + } + } + } + if (ctx->symCtx != ctx->symCtxSrc) { + /* copy hash state (digest into new symmetric context) */ + byte* symCtxDst = (byte*)ctx->symCtx; + byte* symCtxSrc = (byte*)ctx->symCtxSrc; + /* copy from hashStatePrefixBuffer to end */ + #ifdef USE_LAC_SESSION_FOR_STRUCT_OFFSET + const word32 copyRegion = (word32)offsetof(lac_session_desc_t, + hashStatePrefixBuffer); + #else + const word32 copyRegion = (41 * 16); + #endif + XMEMCPY(&symCtxDst[copyRegion], &symCtxSrc[copyRegion], + ctx->symCtxSize - copyRegion); + } + + /* operation data */ + opData = &ctx->opData; + XMEMSET(opData, 0, sizeof(CpaCySymOpData)); + opData->sessionCtx = ctx->symCtx; + opData->packetType = packetType; + opData->messageLenToHashInBytes = totalMsgSz; + opData->pDigestResult = digestBuf; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLen = inOutSz; + IntelQaOpInit(dev, IntelQaSymHashFree); + + /* perform symmetric hash operation async */ + /* use same buffer list for in-place operation */ + do { + status = cpaCySymPerformOp(dev->qat.handle, + dev, + opData, + srcList, + srcList, + NULL); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_HASH_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCySymPerformOp Hash partial failed! dev %p, status %d, " + "ret %d\n", dev, status, ret); + + /* handle cleanup */ + IntelQaSymHashFree(dev); + } + + return ret; +} +#endif /* QAT_HASH_ENABLE_PARTIAL */ + + +/* For hash update call with out == NULL */ +/* For hash final call with out != NULL */ +static int IntelQaSymHash(WC_ASYNC_DEV* dev, byte* out, const byte* in, + word32 inOutSz, CpaCySymHashMode hashMode, + CpaCySymHashAlgorithm hashAlgorithm, + + /* For HMAC auth mode only */ + Cpa8U* authKey, Cpa32U authKeyLenInBytes) +{ + /* check args */ + if (dev == NULL || (out == NULL && in == NULL) || + hashAlgorithm == CPA_CY_SYM_HASH_NONE) { + return BAD_FUNC_ARG; + } + + /* trap call with both in and out set */ + if (in != NULL && out != NULL) { + printf("IntelQaSymHash: Cannot call with in and out both set\n"); + return BAD_FUNC_ARG; + } + + if (inOutSz == 0) { + return 0; /* nothing to do, return success */ + } + +#ifdef QAT_HASH_ENABLE_PARTIAL + if (g_qatCapabilities.supPartial + #ifdef QAT_V2 + && hashAlgorithm != CPA_CY_SYM_HASH_SHA3_256 + #endif + ) { + return IntelQaSymHashPartial(dev, out, in, inOutSz, hashMode, + hashAlgorithm, authKey, authKeyLenInBytes); + } + else +#endif + return IntelQaSymHashCache(dev, out, in, inOutSz, hashMode, + hashAlgorithm, authKey, authKeyLenInBytes); +} + +#ifdef WOLFSSL_SHA512 +int IntelQaSymSha512(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA512, NULL, 0); +} + +#ifdef WOLFSSL_SHA384 +int IntelQaSymSha384(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA384, NULL, 0); +} +#endif /* WOLFSSL_SHA384 */ +#endif /* WOLFSSL_SHA512 */ + +#ifndef NO_SHA256 +int IntelQaSymSha256(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA256, NULL, 0); +} +#ifdef WOLFSSL_SHA224 +int IntelQaSymSha224(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA224, NULL, 0); +} +#endif /* WOLFSSL_SHA224 */ +#endif /* !NO_SHA256 */ + +#ifndef NO_SHA +int IntelQaSymSha(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA1, NULL, 0); +} +#endif /* !NO_SHA */ + +#ifndef NO_MD5 +int IntelQaSymMd5(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_MD5, NULL, 0); +} +#endif /* !NO_MD5 */ + +#if defined(WOLFSSL_SHA3) && defined(QAT_V2) +int IntelQaSymSha3(WC_ASYNC_DEV* dev, byte* out, const byte* in, word32 sz) +{ + if (g_qatCapabilities.supSha3) { + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_PLAIN, CPA_CY_SYM_HASH_SHA3_256, NULL, 0); + } + return NOT_COMPILED_IN; +} +#endif + +#ifndef NO_HMAC + int IntelQaHmacGetType(int macType, word32* hashAlgorithm) + { + int ret = NOT_COMPILED_IN; + + switch (macType) { + #ifndef NO_MD5 + case WC_MD5: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_MD5; + ret = 0; + break; + #endif + #ifndef NO_SHA + case WC_SHA: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_SHA1; + ret = 0; + break; + #endif + #ifdef WOLFSSL_SHA224 + case WC_SHA224: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_SHA224; + ret = 0; + break; + #endif + #ifndef NO_SHA256 + case WC_SHA256: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_SHA256; + ret = 0; + break; + #endif + #ifdef WOLFSSL_SHA512 + #ifdef WOLFSSL_SHA384 + case WC_SHA384: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_SHA384; + ret = 0; + break; + #endif + case WC_SHA512: + if (hashAlgorithm) *hashAlgorithm = CPA_CY_SYM_HASH_SHA512; + ret = 0; + break; + #endif + #if defined(WOLFSSL_SHA3) && defined(QAT_V2) + case WC_SHA3_256: + if (g_qatCapabilities.supSha3) { + if (hashAlgorithm) + *hashAlgorithm = CPA_CY_SYM_HASH_SHA3_256; + ret = 0; + } + break; + #endif + #ifdef HAVE_BLAKE2 + case BLAKE2B_ID: + #endif + #ifdef WOLFSSL_SHA3 + case WC_SHA3_224: + case WC_SHA3_384: + case WC_SHA3_512: + #endif + default: + ret = NOT_COMPILED_IN; + } + return ret; + } + + int IntelQaHmac(struct WC_ASYNC_DEV* dev, + int macType, byte* keyRaw, word16 keyLen, + byte* out, const byte* in, word32 sz) + { + int ret; + CpaCySymHashAlgorithm hashAlgorithm; + + ret = IntelQaHmacGetType(macType, &hashAlgorithm); + if (ret != 0) + return ret; + + return IntelQaSymHash(dev, out, in, sz, + CPA_CY_SYM_HASH_MODE_AUTH, hashAlgorithm, keyRaw, keyLen); + } +#endif /* !NO_HMAC */ + +#endif /* QAT_ENABLE_HASH */ + + + +/* -------------------------------------------------------------------------- */ +/* ECC Algo */ +/* -------------------------------------------------------------------------- */ + +#ifdef HAVE_ECC + +#ifdef HAVE_ECC_DHE + +/* ECC Point Multiple Used for Public Key computation Key Gen */ +static void IntelQaEccPointMulFree(WC_ASYNC_DEV* dev) +{ + CpaCyEcPointMultiplyOpData* opData = &dev->qat.op.ecc_mul.opData; + CpaFlatBuffer* pXk = &dev->qat.op.ecc_mul.pXk; + CpaFlatBuffer* pYk = &dev->qat.op.ecc_mul.pYk; + + if (pXk) { + if (pXk->pData != NULL) { + XFREE(pXk->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + XMEMSET(pXk, 0, sizeof(CpaFlatBuffer)); + } + if (pYk) { + if (pYk->pData != NULL) { + XFREE(pYk->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + XMEMSET(pYk, 0, sizeof(CpaFlatBuffer)); + } + + if (opData) { + if (opData->h.pData) { + if (opData->h.pData != g_qatEcdhCofactor1) { + XFREE(opData->h.pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + opData->h.pData = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCyEcPointMultiplyOpData)); + } + + /* clear temp pointers */ + dev->qat.op.ecc_mul.pubX = NULL; + dev->qat.op.ecc_mul.pubY = NULL; + dev->qat.op.ecc_mul.pubZ = NULL; +} + +static void IntelQaEccPointMulCallback(void *pCallbackTag, CpaStatus status, + void* pOpData, CpaBoolean multiplyStatus, CpaFlatBuffer* pXk, + CpaFlatBuffer* pYk) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyEcPointMultiplyOpData* opData = (CpaCyEcPointMultiplyOpData*)pOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaEccPointMulCallback: dev %p, status %d, multiplyStatus %d, " + "xLen %d, yLen %d\n", + dev, status, multiplyStatus, pXk->dataLenInBytes, pYk->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* check multiply status */ + if (multiplyStatus == 0) { + /* fail */ + WOLFSSL_MSG("IntelQaEccPointMulCallback: multiply failed"); + ret = ECC_CURVE_OID_E; + } + else { + ret = mp_read_unsigned_bin(dev->qat.op.ecc_mul.pubX, + pXk->pData, pXk->dataLenInBytes); + if (ret == 0) + ret = mp_read_unsigned_bin(dev->qat.op.ecc_mul.pubY, + pYk->pData, pYk->dataLenInBytes); + if (ret == 0) + ret = mp_set(dev->qat.op.ecc_mul.pubZ, 1); /* always 1 */ + } + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaEccPointMul(WC_ASYNC_DEV* dev, WC_BIGINT* k, + MATH_INT_T* pubX, MATH_INT_T* pubY, MATH_INT_T* pubZ, + WC_BIGINT* xG, WC_BIGINT* yG, WC_BIGINT* a, WC_BIGINT* b, WC_BIGINT* q, + word32 cofactor) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyEcPointMultiplyOpData* opData = NULL; + CpaFlatBuffer* pXk = NULL; + CpaFlatBuffer* pYk = NULL; + CpaCyEcPointMultiplyCbFunc callback = IntelQaEccPointMulCallback; + CpaBoolean* multiplyStatus; + + /* check arguments */ + if (dev == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaEccPointMul dev %p\n", dev); +#endif + + /* setup operation */ + opData = &dev->qat.op.ecc_mul.opData; + pXk = &dev->qat.op.ecc_mul.pXk; + pYk = &dev->qat.op.ecc_mul.pYk; + multiplyStatus = &dev->qat.op.ecc_mul.multiplyStatus; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyEcPointMultiplyOpData)); + XMEMSET(pXk, 0, sizeof(CpaFlatBuffer)); + XMEMSET(pYk, 0, sizeof(CpaFlatBuffer)); + XMEMSET(multiplyStatus, 0, sizeof(CpaBoolean)); + + /* setup operation data */ + opData->fieldType = CPA_CY_EC_FIELD_TYPE_PRIME; + ret = IntelQaBigIntToFlatBuffer(k, &opData->k); + ret += IntelQaBigIntToFlatBuffer(xG, &opData->xg); + ret += IntelQaBigIntToFlatBuffer(yG, &opData->yg); + if (a != NULL && a->buf == NULL) { + /* The Koblitz curves can have a zero param "a" */ + ret += IntelQaAllocFlatBuffer(&opData->a, k->len, dev->heap); + XMEMSET(opData->a.pData, 0, k->len); + } + else { + ret += IntelQaBigIntToFlatBuffer(a, &opData->a); + } + ret += IntelQaBigIntToFlatBuffer(b, &opData->b); + ret += IntelQaBigIntToFlatBuffer(q, &opData->q); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* setup cofactor */ + /* for this point multiply the cofactor should not be used, + * so always pass 1 */ + /* if using default value 1 then use shared global */ + opData->h.dataLenInBytes = 4; + opData->h.pData = g_qatEcdhCofactor1; + (void)cofactor; + + ret = IntelQaAllocFlatBuffer(pXk, q->len, dev->heap); + ret += IntelQaAllocFlatBuffer(pYk, q->len, dev->heap); + if (ret != 0) { + ret = MEMORY_E; goto exit; + } + + /* store info needed for output */ + dev->qat.op.ecc_mul.pubX = pubX; + dev->qat.op.ecc_mul.pubY = pubY; + dev->qat.op.ecc_mul.pubZ = pubZ; + IntelQaOpInit(dev, IntelQaEccPointMulFree); + + /* perform point multiply */ + do { + status = cpaCyEcPointMultiply(dev->qat.handle, + callback, + dev, + opData, + multiplyStatus, + pXk, + pYk); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_ECMUL_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyEcPointMultiply failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaEccPointMulFree(dev); + + return ret; +} + +static void IntelQaEcdhFree(WC_ASYNC_DEV* dev) +{ + CpaCyEcdhPointMultiplyOpData* opData = &dev->qat.op.ecc_ecdh.opData; + CpaFlatBuffer* resultX = &dev->qat.op.ecc_ecdh.pXk; + CpaFlatBuffer* resultY = &dev->qat.op.ecc_ecdh.pYk; + + if (resultX) { + if (resultX->pData) { + XFREE(resultX->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + resultX->pData = NULL; + } + if (resultY->pData) { + /* Don't free, since isn't used, persist global */ + /* XFREE(resultY->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); */ + resultY->pData = NULL; + } + XMEMSET(resultX, 0, sizeof(CpaFlatBuffer)); + XMEMSET(resultY, 0, sizeof(CpaFlatBuffer)); + } + + if (opData) { + if (opData->h.pData) { + if (opData->h.pData != g_qatEcdhCofactor1) { + XFREE(opData->h.pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + } + opData->h.pData = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCyEcdhPointMultiplyOpData)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaEcdhCallback(void *pCallbackTag, CpaStatus status, + void* pOpData, CpaBoolean multiplyStatus, CpaFlatBuffer* pXk, + CpaFlatBuffer* pYk) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyEcdhPointMultiplyOpData* opData = + (CpaCyEcdhPointMultiplyOpData*)pOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaEcdhCallback: dev %p, status %d, multiplyStatus %d, " + "xLen %d, yLen %d\n", + dev, status, multiplyStatus, pXk->dataLenInBytes, pYk->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + if (dev->qat.outLenPtr) { + if (pXk->dataLenInBytes > *dev->qat.outLenPtr) { + pXk->dataLenInBytes = *dev->qat.outLenPtr; + } + *dev->qat.outLenPtr = pXk->dataLenInBytes; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pXk->pData) { + XMEMCPY(dev->qat.out, pXk->pData, pXk->dataLenInBytes); + } + + /* check multiply status */ + if (multiplyStatus == 0) { + /* fail */ + WOLFSSL_MSG("IntelQaEcdhCallback: multiply failed"); + ret = ECC_CURVE_OID_E; + } + else { + /* mark event result */ + ret = 0; /* success */ + } + } + (void)opData; + (void)pYk; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaEcdh(WC_ASYNC_DEV* dev, WC_BIGINT* k, WC_BIGINT* xG, + WC_BIGINT* yG, byte* out, word32* outlen, + WC_BIGINT* a, WC_BIGINT* b, WC_BIGINT* q, + word32 cofactor) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyEcdhPointMultiplyOpData* opData = NULL; + CpaFlatBuffer* pXk = NULL; + CpaFlatBuffer* pYk = NULL; + CpaCyEcdhPointMultiplyCbFunc callback = IntelQaEcdhCallback; + CpaBoolean* multiplyStatus; + + /* check arguments */ + if (dev == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaEcdh dev %p\n", dev); +#endif + + /* setup operation */ + opData = &dev->qat.op.ecc_ecdh.opData; + pXk = &dev->qat.op.ecc_ecdh.pXk; + pYk = &dev->qat.op.ecc_ecdh.pYk; + multiplyStatus = &dev->qat.op.ecc_ecdh.multiplyStatus; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyEcdhPointMultiplyOpData)); + XMEMSET(pXk, 0, sizeof(CpaFlatBuffer)); + XMEMSET(pYk, 0, sizeof(CpaFlatBuffer)); + XMEMSET(multiplyStatus, 0, sizeof(CpaBoolean)); + + /* setup operation data */ + opData->fieldType = CPA_CY_EC_FIELD_TYPE_PRIME; + ret = IntelQaBigIntToFlatBuffer(k, &opData->k); + ret += IntelQaBigIntToFlatBuffer(xG, &opData->xg); + ret += IntelQaBigIntToFlatBuffer(yG, &opData->yg); + if (a != NULL && a->buf == NULL) { + /* The Koblitz curves can have a zero param "a" */ + ret += IntelQaAllocFlatBuffer(&opData->a, k->len, dev->heap); + XMEMSET(opData->a.pData, 0, k->len); + } + else { + ret += IntelQaBigIntToFlatBuffer(a, &opData->a); + } + ret += IntelQaBigIntToFlatBuffer(b, &opData->b); + ret += IntelQaBigIntToFlatBuffer(q, &opData->q); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* setup cofactor */ + /* for this point multiply the cofactor should not be used, + * so always pass 1 */ + /* if using default value 1 then use shared global */ + opData->h.dataLenInBytes = 4; + opData->h.pData = g_qatEcdhCofactor1; + (void)cofactor; + + pXk->dataLenInBytes = q->len; /* bytes key size / 8 (aligned) */ + pXk->pData = XREALLOC(out, pXk->dataLenInBytes, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pYk->dataLenInBytes = q->len; + pYk->pData = g_qatEcdhY; + + /* store info needed for output */ + dev->qat.out = out; + dev->qat.outLenPtr = outlen; + IntelQaOpInit(dev, IntelQaEcdhFree); + + /* perform point multiply */ + do { + status = cpaCyEcdhPointMultiply(dev->qat.handle, + callback, + dev, + opData, + multiplyStatus, + pXk, + pYk); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_ECDHE_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyEcdhPointMultiply failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaEcdhFree(dev); + + return ret; +} +#endif /* HAVE_ECC_DHE */ + + +#ifdef HAVE_ECC_SIGN + +static void IntelQaEcdsaSignFree(WC_ASYNC_DEV* dev) +{ + CpaCyEcdsaSignRSOpData* opData = &dev->qat.op.ecc_sign.opData; + CpaFlatBuffer *pR = &dev->qat.op.ecc_sign.R; + CpaFlatBuffer *pS = &dev->qat.op.ecc_sign.S; + + if (opData) { + XMEMSET(opData, 0, sizeof(CpaCyEcdsaSignRSOpData)); + } + + if (pR) { + if (pR->pData) + XFREE(pR->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + XMEMSET(pR, 0, sizeof(CpaFlatBuffer)); + } + if (pS) { + if (pS->pData) + XFREE(pS->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + XMEMSET(pS, 0, sizeof(CpaFlatBuffer)); + } + + /* clear temp pointers */ + dev->qat.op.ecc_sign.pR = NULL; + dev->qat.op.ecc_sign.pS = NULL; +} + +static void IntelQaEcdsaSignCallback(void *pCallbackTag, + CpaStatus status, void *pOpData, CpaBoolean signStatus, + CpaFlatBuffer *pR, CpaFlatBuffer *pS) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyEcdsaSignRSOpData* opData = (CpaCyEcdsaSignRSOpData*)pOpData; + int ret = ASYNC_OP_E; + + (void)signStatus; + +#ifdef QAT_DEBUG + printf("IntelQaEcdsaSignCallback: dev %p, status %d, signStatus %d, " + "rLen %d, sLen %d\n", + dev, status, signStatus, pR->dataLenInBytes, pS->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* check sign status */ + if (signStatus == 0) { + /* fail */ + WOLFSSL_MSG("IntelQaEcdsaSignCallback: sign failed"); + ret = ECC_CURVE_OID_E; + } + else { + /* success - populate result */ + ret = IntelQaFlatBufferToBigInt(pR, dev->qat.op.ecc_sign.pR); + if (ret == 0) { + ret = IntelQaFlatBufferToBigInt(pS, dev->qat.op.ecc_sign.pS); + } + } + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaEcdsaSign(WC_ASYNC_DEV* dev, + WC_BIGINT* m, WC_BIGINT* d, + WC_BIGINT* k, + WC_BIGINT* r, WC_BIGINT* s, + WC_BIGINT* a, WC_BIGINT* b, + WC_BIGINT* q, WC_BIGINT* n, + WC_BIGINT* xg, WC_BIGINT* yg) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyEcdsaSignRSOpData* opData = NULL; + CpaCyEcdsaSignRSCbFunc callback = IntelQaEcdsaSignCallback; + CpaBoolean* signStatus; + CpaFlatBuffer* pR = NULL; + CpaFlatBuffer* pS = NULL; + + if (dev == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaEcdsaSign dev %p\n", dev); +#endif + + /* setup operation */ + opData = &dev->qat.op.ecc_sign.opData; + pR = &dev->qat.op.ecc_sign.R; + pS = &dev->qat.op.ecc_sign.S; + signStatus = &dev->qat.op.ecc_sign.signStatus; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyEcdsaSignRSOpData)); + XMEMSET(pR, 0, sizeof(CpaFlatBuffer)); + XMEMSET(pS, 0, sizeof(CpaFlatBuffer)); + XMEMSET(signStatus, 0, sizeof(CpaBoolean)); + + /* setup operation data */ + opData->fieldType = CPA_CY_EC_FIELD_TYPE_PRIME; + ret = IntelQaBigIntToFlatBuffer(m, &opData->m); + ret += IntelQaBigIntToFlatBuffer(d, &opData->d); + ret += IntelQaBigIntToFlatBuffer(k, &opData->k); + ret += IntelQaBigIntToFlatBuffer(a, &opData->a); + ret += IntelQaBigIntToFlatBuffer(b, &opData->b); + ret += IntelQaBigIntToFlatBuffer(q, &opData->q); + ret += IntelQaBigIntToFlatBuffer(n, &opData->n); + ret += IntelQaBigIntToFlatBuffer(xg, &opData->xg); + ret += IntelQaBigIntToFlatBuffer(yg, &opData->yg); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + pR->dataLenInBytes = n->len; /* bytes key size / 8 (aligned) */ + pS->dataLenInBytes = n->len; + pR->pData = XREALLOC(r->buf, pR->dataLenInBytes, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pS->pData = XREALLOC(s->buf, pS->dataLenInBytes, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + + if (pR->pData == NULL || pS->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* store info needed for output */ + dev->qat.op.ecc_sign.pR = r; + dev->qat.op.ecc_sign.pS = s; + IntelQaOpInit(dev, IntelQaEcdsaSignFree); + + /* Perform ECDSA sign */ + do { + status = cpaCyEcdsaSignRS(dev->qat.handle, + callback, + dev, + opData, + signStatus, + pR, + pS); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_ECDSA_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyEcdsaSignRS failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaEcdsaSignFree(dev); + + return ret; +} + +#endif /* HAVE_ECC_SIGN */ + + +#ifdef HAVE_ECC_VERIFY +static void IntelQaEcdsaVerifyFree(WC_ASYNC_DEV* dev) +{ + CpaCyEcdsaVerifyOpData* opData = &dev->qat.op.ecc_verify.opData; + + if (opData) { + XMEMSET(opData, 0, sizeof(CpaCyEcdsaVerifyOpData)); + } + + /* clear temp pointers */ + dev->qat.op.ecc_verify.stat = NULL; +} + +static void IntelQaEcdsaVerifyCallback(void *pCallbackTag, + CpaStatus status, void *pOpData, CpaBoolean verifyStatus) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyEcdsaVerifyOpData* opData = (CpaCyEcdsaVerifyOpData*)pOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaEcdsaVerifyCallback: dev %p, status %d, verifyStatus %d\n", + dev, status, verifyStatus); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* populate result */ + *dev->qat.op.ecc_verify.stat = verifyStatus; + + /* check verify status */ + if (verifyStatus == 0) { + /* fail */ + WOLFSSL_MSG("IntelQaEcdsaVerifyCallback: verify failed"); + ret = ECC_CURVE_OID_E; + } + else { + /* mark event result */ + ret = 0; /* success */ + } + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaEcdsaVerify(WC_ASYNC_DEV* dev, WC_BIGINT* m, + WC_BIGINT* xp, WC_BIGINT* yp, + WC_BIGINT* r, WC_BIGINT* s, + WC_BIGINT* a, WC_BIGINT* b, + WC_BIGINT* q, WC_BIGINT* n, + WC_BIGINT* xg, WC_BIGINT* yg, int* pVerifyStatus) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyEcdsaVerifyOpData* opData = NULL; + CpaCyEcdsaVerifyCbFunc callback = IntelQaEcdsaVerifyCallback; + CpaBoolean* verifyStatus; + + if (dev == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaEcdsaVerify dev %p\n", dev); +#endif + + /* setup operation */ + opData = &dev->qat.op.ecc_verify.opData; + verifyStatus = &dev->qat.op.ecc_verify.verifyStatus; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyEcdsaVerifyOpData)); + XMEMSET(verifyStatus, 0, sizeof(CpaBoolean)); + + /* setup operation data */ + opData->fieldType = CPA_CY_EC_FIELD_TYPE_PRIME; + ret = IntelQaBigIntToFlatBuffer(m, &opData->m); + ret += IntelQaBigIntToFlatBuffer(r, &opData->r); + ret += IntelQaBigIntToFlatBuffer(s, &opData->s); + ret += IntelQaBigIntToFlatBuffer(xp, &opData->xp); + ret += IntelQaBigIntToFlatBuffer(yp, &opData->yp); + ret += IntelQaBigIntToFlatBuffer(a, &opData->a); + ret += IntelQaBigIntToFlatBuffer(b, &opData->b); + ret += IntelQaBigIntToFlatBuffer(q, &opData->q); + ret += IntelQaBigIntToFlatBuffer(n, &opData->n); + ret += IntelQaBigIntToFlatBuffer(xg, &opData->xg); + ret += IntelQaBigIntToFlatBuffer(yg, &opData->yg); + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + + /* store info needed for output */ + dev->qat.op.ecc_verify.stat = pVerifyStatus; + IntelQaOpInit(dev, IntelQaEcdsaVerifyFree); + + /* Perform ECDSA verify */ + do { + status = cpaCyEcdsaVerify(dev->qat.handle, + callback, + dev, + opData, + verifyStatus); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_ECDSA_ASYNC, + callback, &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyEcdsaVerify failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaEcdsaVerifyFree(dev); + + return ret; +} +#endif /* HAVE_ECC_VERIFY */ + +#endif /* HAVE_ECC */ + + +#ifndef NO_DH + +static void IntelQaDhKeyGenFree(WC_ASYNC_DEV* dev) +{ + CpaCyDhPhase1KeyGenOpData* opData = &dev->qat.op.dh_gen.opData; + CpaFlatBuffer* pOut = &dev->qat.op.dh_gen.pOut; + + if (opData) { + IntelQaFreeFlatBuffer(&opData->privateValueX, dev->heap); + + XMEMSET(opData, 0, sizeof(CpaCyDhPhase1KeyGenOpData)); + } + + if (pOut) { + if (pOut->pData) { + XFREE(pOut->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + pOut->pData = NULL; + } + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaDhKeyGenCallback(void *pCallbackTag, CpaStatus status, + void *pOpData, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyDhPhase1KeyGenOpData* opData = (CpaCyDhPhase1KeyGenOpData*)pOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaDhKeyGenCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* validate returned output */ + if (dev->qat.outLenPtr) { + if (pOut->dataLenInBytes > *dev->qat.outLenPtr) { + pOut->dataLenInBytes = *dev->qat.outLenPtr; + } + *dev->qat.outLenPtr = pOut->dataLenInBytes; + } + + /* return data */ + if (dev->qat.out && dev->qat.out != pOut->pData) { + XMEMCPY(dev->qat.out, pOut->pData, pOut->dataLenInBytes); + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaDhKeyGen(WC_ASYNC_DEV* dev, WC_BIGINT* p, WC_BIGINT* g, + WC_BIGINT* x, byte* pub, word32* pubSz) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyDhPhase1KeyGenOpData* opData = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaDhKeyGenCallback; + CpaFlatBuffer* pOut = NULL; + + if (dev == NULL || p == NULL || p->buf == NULL || g == NULL || x == NULL || + pub == NULL || pubSz == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaDhKeyGen dev %p\n", dev); +#endif + + /* setup operation */ + opData = &dev->qat.op.dh_gen.opData; + pOut = &dev->qat.op.dh_gen.pOut; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyDhPhase1KeyGenOpData)); + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + + /* setup operation data */ + ret = IntelQaBigIntToFlatBuffer(p, &opData->primeP); + ret += IntelQaBigIntToFlatBuffer(g, &opData->baseG); + /* transfer control of big int buffer to opData structure */ + ret += IntelQaBigIntToFlatBuffer(x, &opData->privateValueX); + /* don't let caller free x, do it in IntelQaDhKeyGenFree */ + x->buf = NULL; + x->len = 0; + if (ret != 0) { + ret = BAD_FUNC_ARG; goto exit; + } + pOut->dataLenInBytes = p->len; + pOut->pData = XREALLOC(pub, p->len, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + if (pOut->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* store info needed for output */ + *pubSz = p->len; + dev->qat.out = pub; + dev->qat.outLenPtr = pubSz; + IntelQaOpInit(dev, IntelQaDhKeyGenFree); + + /* Perform DhKeyGen */ + do { + status = cpaCyDhKeyGenPhase1(dev->qat.handle, + callback, + dev, + opData, + pOut); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_DH_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyDhKeyGenPhase1 failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaDhKeyGenFree(dev); + + return ret; +} + +static void IntelQaDhAgreeFree(WC_ASYNC_DEV* dev) +{ + CpaCyDhPhase2SecretKeyGenOpData* opData = &dev->qat.op.dh_agree.opData; + CpaFlatBuffer* pOut = &dev->qat.op.dh_agree.pOut; + + if (pOut) { + if (pOut->pData) { + XFREE(pOut->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + pOut->pData = NULL; + } + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + } + if (opData) { + if (opData->remoteOctetStringPV.pData) { + XFREE(opData->remoteOctetStringPV.pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + opData->remoteOctetStringPV.pData = NULL; + } + if (opData->privateValueX.pData) { + XFREE(opData->privateValueX.pData, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + opData->privateValueX.pData = NULL; + } + XMEMSET(opData, 0, sizeof(CpaCyDhPhase2SecretKeyGenOpData)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; + dev->qat.outLenPtr = NULL; +} + +static void IntelQaDhAgreeCallback(void *pCallbackTag, CpaStatus status, + void *pOpData, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyDhPhase2SecretKeyGenOpData* opData = + (CpaCyDhPhase2SecretKeyGenOpData*)pOpData; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaDhAgreeCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + word32 idxTrim = 0; + byte* out = (byte*)pOut->pData; + + /* check output size */ + if (dev->qat.outLenPtr) { + if (pOut->dataLenInBytes > *dev->qat.outLenPtr) { + pOut->dataLenInBytes = *dev->qat.outLenPtr; + } + } + + /* count leading zeros */ + while (out[idxTrim] == 0 && idxTrim < pOut->dataLenInBytes) { + idxTrim++; + } + pOut->dataLenInBytes -= idxTrim; + + /* return data and trim leading zeros */ + if (dev->qat.out && (dev->qat.out != pOut->pData || idxTrim > 0)) { + XMEMMOVE(dev->qat.out, &out[idxTrim], pOut->dataLenInBytes); + } + + /* return final length */ + if (dev->qat.outLenPtr) { + *dev->qat.outLenPtr = pOut->dataLenInBytes; + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaDhAgree(WC_ASYNC_DEV* dev, WC_BIGINT* p, + byte* agree, word32* agreeSz, const byte* priv, word32 privSz, + const byte* otherPub, word32 pubSz) +{ + int ret, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyDhPhase2SecretKeyGenOpData* opData = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaDhAgreeCallback; + CpaFlatBuffer* pOut = NULL; + + if (dev == NULL || agree == NULL || agreeSz == NULL || + priv == NULL || privSz == 0 || otherPub == NULL || pubSz == 0) { + return BAD_FUNC_ARG; + } + +#ifdef QAT_DEBUG + printf("IntelQaDhAgree dev %p, agreeSz %d\n", dev, *agreeSz); +#endif + + /* setup operation */ + opData = &dev->qat.op.dh_agree.opData; + pOut = &dev->qat.op.dh_agree.pOut; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyDhPhase2SecretKeyGenOpData)); + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + + /* setup operation data */ + ret = IntelQaBigIntToFlatBuffer(p, &opData->primeP); + if (ret != 0) { + goto exit; + } + + opData->remoteOctetStringPV.dataLenInBytes = pubSz; + opData->remoteOctetStringPV.pData = XREALLOC((byte*)otherPub, pubSz, + dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + opData->privateValueX.dataLenInBytes = privSz; + opData->privateValueX.pData = XREALLOC((byte*)priv, privSz, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + pOut->dataLenInBytes = p->len; + pOut->pData = XREALLOC(agree, p->len, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + + if (opData->remoteOctetStringPV.pData == NULL || + opData->privateValueX.pData == NULL || pOut->pData == NULL) { + ret = MEMORY_E; goto exit; + } + + /* store info needed for output */ + dev->qat.out = agree; + dev->qat.outLenPtr = agreeSz; + IntelQaOpInit(dev, IntelQaDhAgreeFree); + + /* Perform DhKeyGen */ + do { + status = cpaCyDhKeyGenPhase2Secret(dev->qat.handle, + callback, + dev, + opData, + pOut); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_DH_ASYNC, callback, + &retryCount)); + + if (ret == WC_PENDING_E) + return ret; + +exit: + + if (ret != 0) { + printf("cpaCyDhKeyGenPhase2Secret failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaDhAgreeFree(dev); + + return ret; +} + +#endif /* !NO_DH */ + + +#if defined(QAT_ENABLE_RNG) +/* -------------------------------------------------------------------------- */ +/* Random NRBG/DRBG */ +/* -------------------------------------------------------------------------- */ +int IntelQaNrbg(CpaFlatBuffer* pBuffer, Cpa32U length) +{ + CpaStatus status; + CpaCyNrbgOpData opData; + CpaInstanceHandle instanceHandle = CPA_INSTANCE_HANDLE_SINGLE; + + if (pBuffer == NULL || length == 0) { + return BAD_FUNC_ARG; + } + + if (pBuffer->dataLenInBytes < length) { + return BAD_FUNC_ARG; + } + + /* For now use the first crypto instance - assumed to be started already */ + status = cpaCyGetInstances(1, &instanceHandle); + if (instanceHandle == NULL || status != CPA_STATUS_SUCCESS) { + return ASYNC_INIT_E; + } + + /* init buffers */ + XMEMSET(&opData, 0, sizeof(CpaCyNrbgOpData)); + opData.lengthInBytes = length; + + /* Perform NRBG generation */ + status = cpaCyNrbgGetEntropy(instanceHandle, NULL, NULL, &opData, pBuffer); + if (status != CPA_STATUS_SUCCESS) { + printf("cpaCyNrbgGetEntropy failed! status %d\n", status); + } + + return status; +} + +static CpaStatus IntelQaGetEntropyInputFunc( + IcpSalDrbgGetEntropyInputCbFunc pCb, + void* pCallbackTag, + icp_sal_drbg_get_entropy_op_data_t *pOpData, + CpaFlatBuffer *pBuffer, + Cpa32U *pLengthReturned) +{ + CpaStatus status = CPA_STATUS_SUCCESS; + + *pLengthReturned = pOpData->maxLength; + + status = IntelQaNrbg(pBuffer, pOpData->maxLength); + if (status != CPA_STATUS_SUCCESS) { + return CPA_STATUS_FAIL; + } + + if (pCb != NULL) { + pCb(pCallbackTag, CPA_STATUS_SUCCESS, pOpData, + pOpData->maxLength, pBuffer); + } + + return CPA_STATUS_SUCCESS; +} + +static CpaStatus IntelQaGetNonceFunc( + icp_sal_drbg_get_entropy_op_data_t *pOpData, + CpaFlatBuffer *pBuffer, + Cpa32U *pLengthReturned) +{ + + CpaStatus status = CPA_STATUS_SUCCESS; + + status = IntelQaNrbg(pBuffer, pOpData->maxLength); + if (status != CPA_STATUS_SUCCESS) { + return CPA_STATUS_FAIL; + } + *pLengthReturned = pOpData->maxLength; + + return CPA_STATUS_SUCCESS; +} + +static CpaBoolean IntelQaNotDFRequired(void) +{ + return CPA_FALSE; +} + +static int IntelQaDrbgClose(WC_ASYNC_DEV* dev) +{ + CpaStatus status; + + if (dev == NULL) + return BAD_FUNC_ARG; + +#ifdef QAT_DEBUG + printf("cpaCyDrbgRemoveSession dev %p\n", dev); +#endif + + if (dev->qat.op.drbg.handle) { + CpaCyDrbgSessionHandle handle = dev->qat.op.drbg.handle; + dev->qat.op.drbg.handle = NULL; + + status = cpaCyDrbgRemoveSession(dev->qat.handle, handle); + if (status != CPA_STATUS_SUCCESS) { + printf("cpaCyDrbgRemoveSession failed! status %d\n", status); + } + + XFREE(handle, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + } + + return 0; +} + +static void IntelQaDrbgFree(WC_ASYNC_DEV* dev) +{ + CpaCyDrbgGenOpData* opData = &dev->qat.op.drbg.opData; + CpaFlatBuffer* pOut = &dev->qat.op.drbg.pOut; + + if (pOut) { + if (pOut->pData) { + XFREE(pOut->pData, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA); + pOut->pData = NULL; + } + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + } + + if (opData) { + XMEMSET(opData, 0, sizeof(CpaCyDrbgGenOpData)); + } + + /* clear temp pointers */ + dev->qat.out = NULL; +} + +static void IntelQaDrbgCallback(void *pCallbackTag, CpaStatus status, + void *pOpdata, CpaFlatBuffer *pOut) +{ + WC_ASYNC_DEV* dev = (WC_ASYNC_DEV*)pCallbackTag; + CpaCyDrbgGenOpData* opData = (CpaCyDrbgGenOpData*)pOpdata; + int ret = ASYNC_OP_E; + +#ifdef QAT_DEBUG + printf("IntelQaDrbgCallback: dev %p, status %d, len %d\n", + dev, status, pOut->dataLenInBytes); +#endif + + if (status == CPA_STATUS_SUCCESS) { + /* return data */ + if (dev->qat.out && dev->qat.out != pOut->pData) { + XMEMCPY(dev->qat.out, pOut->pData, pOut->dataLenInBytes); + } + + /* mark event result */ + ret = 0; /* success */ + } + (void)opData; + + /* set return code to mark complete */ + dev->qat.ret = ret; +} + +int IntelQaDrbg(WC_ASYNC_DEV* dev, byte* rngBuf, word32 rngSz) +{ + int ret = 0, retryCount = 0; + CpaStatus status = CPA_STATUS_SUCCESS; + CpaCyDrbgGenOpData* opData = NULL; + CpaCyGenFlatBufCbFunc callback = IntelQaDrbgCallback; + CpaFlatBuffer* pOut = NULL; + word32 idx = 0, gen = 0; + + if (dev == NULL || rngBuf == NULL) { + return BAD_FUNC_ARG; + } + + /* This function can be called with rngSz == 0 */ + if (rngSz == 0) { + return 0; /* no data to get */ + } + +#ifdef QAT_DEBUG + printf("IntelQaDrbg: dev %p, buf %p, sz %d\n", dev, rngBuf, rngSz); +#endif + + /* setup operation */ + opData = &dev->qat.op.drbg.opData; + pOut = &dev->qat.op.drbg.pOut; + + /* init buffers */ + XMEMSET(opData, 0, sizeof(CpaCyDrbgGenOpData)); + XMEMSET(pOut, 0, sizeof(CpaFlatBuffer)); + + if (dev->qat.op.drbg.handle == NULL) { + CpaCyDrbgSessionSetupData setup; + Cpa32U seedLen = 0; + Cpa32U handleSize; + + #ifdef QAT_DEBUG + printf("cpaCyDrbgInitSession dev %p\n", dev); + #endif + + /* register required DRBG callback functions */ + icp_sal_drbgIsDFReqFuncRegister(IntelQaNotDFRequired); + icp_sal_drbgGetEntropyInputFuncRegister(IntelQaGetEntropyInputFunc); + icp_sal_drbgGetNonceFuncRegister(IntelQaGetNonceFunc); + + setup.predictionResistanceRequired = CPA_FALSE; + setup.secStrength = CPA_CY_RBG_SEC_STRENGTH_128; + setup.personalizationString.dataLenInBytes = 0; + setup.personalizationString.pData = NULL; + + status = cpaCyDrbgSessionGetSize(dev->qat.handle, &setup, &handleSize); + if (status != CPA_STATUS_SUCCESS) { + ret = ASYNC_INIT_E; goto exit; + } + + dev->qat.op.drbg.handle = (CpaCyDrbgSessionHandle)XMALLOC( + handleSize, dev->heap, DYNAMIC_TYPE_ASYNC_NUMA64); + if (dev->qat.op.drbg.handle == NULL) { + ret = MEMORY_E; goto exit; + } + + status = cpaCyDrbgInitSession(dev->qat.handle, + callback, /* callback function for generate */ + NULL, /* callback function for reseed */ + &setup, /* session setup data */ + dev->qat.op.drbg.handle, + &seedLen); + } + + /* chunk into LAC_DRBG_MAX_NUM_OF_BYTES (0xFFFF) */ + while (ret == 0 && idx < rngSz) { + /* setup operation data */ + gen = rngSz - gen; + if (gen > 0xFFFF) + gen = 0xFFFF; + + pOut->dataLenInBytes = gen; + if (idx == 0 && pOut->pData == NULL) { + pOut->pData = XREALLOC(rngBuf, gen, dev->heap, + DYNAMIC_TYPE_ASYNC_NUMA); + if (pOut->pData == NULL) { + ret = MEMORY_E; goto exit; + } + } + else { + XMEMCPY(pOut->pData, &rngBuf[idx], gen); + } + + opData->sessionHandle = dev->qat.op.drbg.handle; + opData->lengthInBytes = gen; + opData->secStrength = CPA_CY_RBG_SEC_STRENGTH_128; + opData->predictionResistanceRequired = CPA_FALSE; + opData->additionalInput.dataLenInBytes = 0; + opData->additionalInput.pData = NULL; + + /* store info needed for output */ + dev->qat.out = &rngBuf[idx]; + IntelQaOpInit(dev, IntelQaDrbgFree); + + /* Perform DRBG generation */ + do { + status = cpaCyDrbgGen(dev->qat.handle, + dev, + opData, + pOut); + } while (IntelQaHandleCpaStatus(dev, status, &ret, QAT_DRBG_ASYNC, + callback, &retryCount)); + + idx += gen; + }; + +exit: + + if (ret != 0) { + printf("cpaCyDrbgGen failed! dev %p, status %d, ret %d\n", + dev, status, ret); + } + + /* handle cleanup */ + IntelQaDrbgFree(dev); + + return ret; +} +#endif /* QAT_ENABLE_RNG */ + +#ifdef QAT_DEMO_MAIN + + /* RSA */ +static const byte rsa_in[256] = { + 0x7e, 0xf5, 0x69, 0x11, 0x6f, 0x67, 0x81, 0x71, 0xa2, 0x3e, 0xe7, 0x0e, + 0xad, 0xb9, 0x5f, 0x20, 0xc8, 0x2d, 0x8b, 0xd3, 0xb1, 0x65, 0x27, 0x34, + 0x7a, 0x10, 0x2e, 0xf4, 0xe9, 0x6a, 0x69, 0x93, 0xc0, 0x3e, 0xad, 0xbe, + 0x2e, 0x35, 0x34, 0xeb, 0x64, 0x45, 0x09, 0xf4, 0x07, 0x33, 0x6f, 0xac, + 0x2f, 0xc8, 0x59, 0xca, 0x72, 0x99, 0x0b, 0x99, 0xb1, 0xf3, 0xda, 0x42, + 0xdb, 0x7b, 0xed, 0x4c, 0x22, 0x48, 0x08, 0x8a, 0x30, 0xd7, 0xdc, 0x99, + 0x0b, 0xb9, 0x1a, 0xc5, 0x40, 0xe5, 0x7d, 0xe9, 0xbf, 0x0a, 0x05, 0xea, + 0x07, 0x24, 0x7a, 0x1f, 0x54, 0xbf, 0x77, 0x71, 0x09, 0xec, 0x6d, 0xdf, + 0x87, 0xc2, 0x11, 0xda, 0x8c, 0x66, 0x46, 0x1d, 0x5a, 0x45, 0x23, 0x35, + 0x96, 0x48, 0xa7, 0x0e, 0x03, 0xe1, 0x02, 0x43, 0x76, 0x56, 0xae, 0xc3, + 0x6e, 0x61, 0x73, 0xba, 0x48, 0x6e, 0x8a, 0x58, 0x60, 0xdd, 0x0a, 0x81, + 0x46, 0xe4, 0xb4, 0x03, 0xf1, 0x63, 0xf4, 0xc1, 0xad, 0xd5, 0x4a, 0xda, + 0x25, 0xd9, 0x9d, 0x56, 0x1f, 0xb4, 0x7b, 0x2b, 0xdd, 0x90, 0x4e, 0xfd, + 0xa1, 0xd4, 0x5b, 0xd9, 0x17, 0x1a, 0x68, 0xd0, 0x3c, 0x95, 0x94, 0x64, + 0x6a, 0x4a, 0xad, 0x39, 0xe5, 0x5f, 0xd1, 0xe2, 0xb1, 0x1b, 0xad, 0x1d, + 0x2a, 0xc2, 0x12, 0xed, 0x47, 0xa1, 0xac, 0x0f, 0x3e, 0x3b, 0x44, 0x2f, + 0x61, 0xa5, 0xab, 0xa1, 0x03, 0xe9, 0x40, 0x62, 0x82, 0xc6, 0x33, 0xcf, + 0x12, 0xeb, 0x76, 0x73, 0x13, 0x61, 0xe5, 0x3b, 0xf9, 0x38, 0x24, 0xc0, + 0x24, 0xc7, 0x88, 0x2b, 0x4a, 0x3c, 0x42, 0x26, 0xd0, 0xe6, 0x4d, 0xc8, + 0x41, 0x58, 0x94, 0x77, 0x91, 0x1d, 0xfa, 0xbb, 0x9f, 0xa8, 0x43, 0xe0, + 0x33, 0x46, 0x7e, 0x8e, 0xcf, 0xfc, 0x3e, 0xd4, 0x72, 0x7b, 0xf9, 0xee, + 0xca, 0xfd, 0x96, 0xd4, +}; +static const byte rsa_d[256] = { + 0xa2, 0xe6, 0xd8, 0x5f, 0x10, 0x71, 0x64, 0x08, 0x9e, 0x2e, 0x6d, 0xd1, + 0x6d, 0x1e, 0x85, 0xd2, 0x0a, 0xb1, 0x8c, 0x47, 0xce, 0x2c, 0x51, 0x6a, + 0xa0, 0x12, 0x9e, 0x53, 0xde, 0x91, 0x4c, 0x1d, 0x6d, 0xea, 0x59, 0x7b, + 0xf2, 0x77, 0xaa, 0xd9, 0xc6, 0xd9, 0x8a, 0xab, 0xd8, 0xe1, 0x16, 0xe4, + 0x63, 0x26, 0xff, 0xb5, 0x6c, 0x13, 0x59, 0xb8, 0xe3, 0xa5, 0xc8, 0x72, + 0x17, 0x2e, 0x0c, 0x9f, 0x6f, 0xe5, 0x59, 0x3f, 0x76, 0x6f, 0x49, 0xb1, + 0x11, 0xc2, 0x5a, 0x2e, 0x16, 0x29, 0x0d, 0xde, 0xb7, 0x8e, 0xdc, 0x40, + 0xd5, 0xa2, 0xee, 0xe0, 0x1e, 0xa1, 0xf4, 0xbe, 0x97, 0xdb, 0x86, 0x63, + 0x96, 0x14, 0xcd, 0x98, 0x09, 0x60, 0x2d, 0x30, 0x76, 0x9c, 0x3c, 0xcd, + 0xe6, 0x88, 0xee, 0x47, 0x92, 0x79, 0x0b, 0x5a, 0x00, 0xe2, 0x5e, 0x5f, + 0x11, 0x7c, 0x7d, 0xf9, 0x08, 0xb7, 0x20, 0x06, 0x89, 0x2a, 0x5d, 0xfd, + 0x00, 0xab, 0x22, 0xe1, 0xf0, 0xb3, 0xbc, 0x24, 0xa9, 0x5e, 0x26, 0x0e, + 0x1f, 0x00, 0x2d, 0xfe, 0x21, 0x9a, 0x53, 0x5b, 0x6d, 0xd3, 0x2b, 0xab, + 0x94, 0x82, 0x68, 0x43, 0x36, 0xd8, 0xf6, 0x2f, 0xc6, 0x22, 0xfc, 0xb5, + 0x41, 0x5d, 0x0d, 0x33, 0x60, 0xea, 0xa4, 0x7d, 0x7e, 0xe8, 0x4b, 0x55, + 0x91, 0x56, 0xd3, 0x5c, 0x57, 0x8f, 0x1f, 0x94, 0x17, 0x2f, 0xaa, 0xde, + 0xe9, 0x9e, 0xa8, 0xf4, 0xcf, 0x8a, 0x4c, 0x8e, 0xa0, 0xe4, 0x56, 0x73, + 0xb2, 0xcf, 0x4f, 0x86, 0xc5, 0x69, 0x3c, 0xf3, 0x24, 0x20, 0x8b, 0x5c, + 0x96, 0x0c, 0xfa, 0x6b, 0x12, 0x3b, 0x9a, 0x67, 0xc1, 0xdf, 0xc6, 0x96, + 0xb2, 0xa5, 0xd5, 0x92, 0x0d, 0x9b, 0x09, 0x42, 0x68, 0x24, 0x10, 0x45, + 0xd4, 0x50, 0xe4, 0x17, 0x39, 0x48, 0xd0, 0x35, 0x8b, 0x94, 0x6d, 0x11, + 0xde, 0x8f, 0xca, 0x59, +}; +static const byte rsa_n[256] = { + 0xc3, 0x03, 0xd1, 0x2b, 0xfe, 0x39, 0xa4, 0x32, 0x45, 0x3b, 0x53, 0xc8, + 0x84, 0x2b, 0x2a, 0x7c, 0x74, 0x9a, 0xbd, 0xaa, 0x2a, 0x52, 0x07, 0x47, + 0xd6, 0xa6, 0x36, 0xb2, 0x07, 0x32, 0x8e, 0xd0, 0xba, 0x69, 0x7b, 0xc6, + 0xc3, 0x44, 0x9e, 0xd4, 0x81, 0x48, 0xfd, 0x2d, 0x68, 0xa2, 0x8b, 0x67, + 0xbb, 0xa1, 0x75, 0xc8, 0x36, 0x2c, 0x4a, 0xd2, 0x1b, 0xf7, 0x8b, 0xba, + 0xcf, 0x0d, 0xf9, 0xef, 0xec, 0xf1, 0x81, 0x1e, 0x7b, 0x9b, 0x03, 0x47, + 0x9a, 0xbf, 0x65, 0xcc, 0x7f, 0x65, 0x24, 0x69, 0xa6, 0xe8, 0x14, 0x89, + 0x5b, 0xe4, 0x34, 0xf7, 0xc5, 0xb0, 0x14, 0x93, 0xf5, 0x67, 0x7b, 0x3a, + 0x7a, 0x78, 0xe1, 0x01, 0x56, 0x56, 0x91, 0xa6, 0x13, 0x42, 0x8d, 0xd2, + 0x3c, 0x40, 0x9c, 0x4c, 0xef, 0xd1, 0x86, 0xdf, 0x37, 0x51, 0x1b, 0x0c, + 0xa1, 0x3b, 0xf5, 0xf1, 0xa3, 0x4a, 0x35, 0xe4, 0xe1, 0xce, 0x96, 0xdf, + 0x1b, 0x7e, 0xbf, 0x4e, 0x97, 0xd0, 0x10, 0xe8, 0xa8, 0x08, 0x30, 0x81, + 0xaf, 0x20, 0x0b, 0x43, 0x14, 0xc5, 0x74, 0x67, 0xb4, 0x32, 0x82, 0x6f, + 0x8d, 0x86, 0xc2, 0x88, 0x40, 0x99, 0x36, 0x83, 0xba, 0x1e, 0x40, 0x72, + 0x22, 0x17, 0xd7, 0x52, 0x65, 0x24, 0x73, 0xb0, 0xce, 0xef, 0x19, 0xcd, + 0xae, 0xff, 0x78, 0x6c, 0x7b, 0xc0, 0x12, 0x03, 0xd4, 0x4e, 0x72, 0x0d, + 0x50, 0x6d, 0x3b, 0xa3, 0x3b, 0xa3, 0x99, 0x5e, 0x9d, 0xc8, 0xd9, 0x0c, + 0x85, 0xb3, 0xd9, 0x8a, 0xd9, 0x54, 0x26, 0xdb, 0x6d, 0xfa, 0xac, 0xbb, + 0xff, 0x25, 0x4c, 0xc4, 0xd1, 0x79, 0xf4, 0x71, 0xd3, 0x86, 0x40, 0x18, + 0x13, 0xb0, 0x63, 0xb5, 0x72, 0x4e, 0x30, 0xc4, 0x97, 0x84, 0x86, 0x2d, + 0x56, 0x2f, 0xd7, 0x15, 0xf7, 0x7f, 0xc0, 0xae, 0xf5, 0xfc, 0x5b, 0xe5, + 0xfb, 0xa1, 0xba, 0xd3, +}; + + +/* AES GCM */ +static const byte aesgcm_k[] = { + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, + 0x99, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, + 0x77, 0x88, 0x99, 0x00, 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, 0x77, 0x88, 0x99, 0x00, 0x11, 0x22 +}; + +static const byte aesgcm_iv[] = { + 0xca, 0xfe, 0xca, 0xfe, 0xca, 0xfe, 0xca, 0xfe, + 0xca, 0xfe, 0xca, 0xfe +}; + +static const byte aesgcm_a[] = { + 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, + 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, 0xde, 0xad, + 0xde, 0xad, 0xde, 0xad +}; + +static const byte aesgcm_p[] = { + 0x79, 0x84, 0x86, 0x44, 0x68, 0x45, 0x15, 0x61, + 0x86, 0x54, 0x66, 0x56, 0x54, 0x54, 0x31, 0x54, + 0x64, 0x64, 0x68, 0x45, 0x15, 0x15, 0x61, 0x61, + 0x51, 0x51, 0x51, 0x51, 0x51, 0x56, 0x14, 0x11, + 0x72, 0x13, 0x51, 0x82, 0x84, 0x56, 0x74, 0x53, + 0x45, 0x34, 0x65, 0x15, 0x46, 0x14, 0x67, 0x55, + 0x16, 0x14, 0x67, 0x54, 0x65, 0x47, 0x14, 0x67, + 0x46, 0x74, 0x65, 0x46 +}; + +static const byte aesgcm_c[] = { + 0x59, 0x85, 0x02, 0x97, 0xE0, 0x4D, 0xFC, 0x5C, + 0x03, 0xCC, 0x83, 0x64, 0xCE, 0x28, 0x0B, 0x95, + 0x78, 0xEC, 0x93, 0x40, 0xA1, 0x8D, 0x21, 0xC5, + 0x48, 0x6A, 0x39, 0xBA, 0x4F, 0x4B, 0x8C, 0x95, + 0x6F, 0x8C, 0xF6, 0x9C, 0xD0, 0xA5, 0x8D, 0x67, + 0xA1, 0x32, 0x11, 0xE7, 0x2E, 0xF6, 0x63, 0xAF, + 0xDE, 0xD4, 0x7D, 0xEC, 0x15, 0x01, 0x58, 0xCB, + 0xE3, 0x7B, 0xC6, 0x94, +}; + +static byte aesgcm_t[] = { + 0x5D, 0x10, 0x3F, 0xC7, 0x22, 0xC7, 0x21, 0x29 +}; + + +/* ecc curve */ +static byte ecc_a[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc +}; +static byte ecc_b[] = { + 0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, + 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, + 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, + 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b +}; +static byte ecc_q[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +/* private key */ +static byte ecc_k[] = { + 0x52, 0x2f, 0x27, 0xe3, 0x44, 0x3c, 0xa7, 0x92, + 0x9b, 0xdc, 0xe3, 0x00, 0x8a, 0x47, 0x0f, 0x28, + 0x5c, 0x0e, 0x2d, 0x87, 0xfd, 0x89, 0x56, 0xdd, + 0x83, 0x94, 0x6c, 0x48, 0x6c, 0x15, 0x59, 0xb7, + 0xf1, 0xc8, 0x13, 0x27, 0xe5, 0x80, 0xbd, 0x9c +}; +/* public key */ +static byte ecc_xg[] = { + 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, + 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, + 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, + 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 +}; +static byte ecc_yg[] = { + 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, + 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, + 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, + 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5 +}; + + +/* DH */ +static byte dh_priv1[] = { + 0xbd, 0x64, 0xf6, 0xd2, 0xe9, 0xca, 0xd0, 0xda, + 0x41, 0x48, 0x95, 0x5d, 0xd3, 0xa7, 0x36, 0x47, + 0xb6, 0x28, 0xdf, 0x05, 0x7b, 0x9c, 0xcd, 0x34, + 0x79, 0x09, 0x7a, 0x06, 0x43, +}; + +static byte dh_pub1[] = { + 0xaa, 0x43, 0x2e, 0xfd, 0xc6, 0xbe, 0x40, 0xdc, 0xac, 0x64, 0xf2, 0x65, + 0x91, 0xae, 0x88, 0xa0, 0x7b, 0x71, 0x3d, 0x9f, 0xa7, 0x00, 0xbe, 0x82, + 0xbb, 0xb5, 0x27, 0x2a, 0x58, 0xce, 0xb5, 0xf9, 0x18, 0x6e, 0x0b, 0xaa, + 0x75, 0x91, 0x59, 0x30, 0x2b, 0x1e, 0xf3, 0x26, 0xa5, 0x6a, 0x22, 0x91, + 0x65, 0xad, 0x5f, 0xef, 0x53, 0x57, 0x76, 0x53, 0xe8, 0xc2, 0x93, 0x9d, + 0x21, 0x7e, 0x91, 0x27, 0x79, 0xe4, 0xa5, 0xa1, 0x8b, 0x20, 0x52, 0xa2, + 0xd6, 0x22, 0xef, 0x15, 0x2c, 0xa7, 0xf3, 0xfc, 0xce, 0xc7, 0x1b, 0x90, + 0xaa, 0x9b, 0xb3, 0x83, 0xff, 0x21, 0xa0, 0x20, 0xc7, 0x21, 0x93, 0xbd, + 0x1a, 0xf3, 0xae, 0xd9, 0x16, 0x02, 0xf0, 0x62, 0x07, 0x68, 0xea, 0x1a, + 0xe7, 0xa6, 0xb9, 0xa6, 0x3b, 0x9a, 0x23, 0x4c, 0x21, 0xec, 0xa1, 0xe0, + 0x8f, 0x16, 0x2a, 0x99, 0x36, 0xbf, 0x57, 0x89, 0xf0, 0x3d, 0x84, 0xca, + 0x99, 0xe8, 0xea, 0x79, 0x24, 0xc0, 0x93, 0x96, 0x70, 0x9a, 0xbb, 0x16, + 0xa3, 0xe9, 0x06, 0x59, 0xb4, 0x6c, 0xe7, 0x48, 0x59, 0xde, 0x75, 0x83, + 0xbb, 0xc2, 0xa7, 0xd7, 0x84, 0x1d, 0xf4, 0x27, 0xf1, 0x72, 0x04, 0x64, + 0x01, 0x6b, 0x7b, 0xac, 0xf2, 0xaf, 0x12, 0x4c, 0x22, 0x83, 0xae, 0x8f, + 0x6d, 0x50, 0xe8, 0x16, 0xdc, 0x4c, 0x25, 0xe4, 0x54, 0x5a, 0xf0, 0xb7, + 0x82, 0x4f, 0xdc, 0x2e, 0xb5, 0xfd, 0x24, 0x26, 0x22, 0x26, 0x4f, 0x20, + 0x76, 0xb4, 0x36, 0x9e, 0x62, 0xb8, 0xb9, 0x2c, 0x52, 0xaf, 0x58, 0xa8, + 0x90, 0xcd, 0x62, 0x06, 0x30, 0xcc, 0x93, 0x8b, 0x3d, 0xd4, 0xd1, 0x5f, + 0x60, 0x3b, 0x28, 0x15, 0xcc, 0x92, 0xc1, 0x70, 0xb7, 0x39, 0x8c, 0x73, + 0x01, 0x65, 0x2f, 0x19, 0xeb, 0xd0, 0xce, 0x3f, 0x84, 0x36, 0xea, 0x11, + 0x34, 0x0e, 0xce, 0x0b, +}; + +static byte dh_priv2[] = { + 0x5e, 0x49, 0x52, 0xb3, 0xc4, 0x8f, 0x3f, 0xde, 0x55, 0x9d, 0x87, 0xb3, + 0x21, 0xb8, 0x24, 0xb1, 0xb0, 0x35, 0x5e, 0xc7, 0xbb, 0x5a, 0x86, 0x9e, + 0xfb, 0xd3, 0x8f, 0x5b, 0x7e, +}; + +static byte dh_pub2[] = { + 0x9b, 0xc4, 0xdb, 0x33, 0xc4, 0x96, 0xf4, 0x43, 0xa0, 0x3b, 0x9d, 0x7c, + 0x7d, 0x81, 0x97, 0xf6, 0xb9, 0x94, 0x0f, 0x0f, 0x2e, 0xc1, 0x16, 0xdc, + 0xf6, 0xe3, 0xaf, 0xa1, 0xcd, 0x32, 0xdf, 0xd5, 0xdc, 0x12, 0x93, 0x99, + 0x1d, 0xfb, 0xff, 0x54, 0xdf, 0xf6, 0x24, 0x6a, 0xc2, 0x9e, 0xd0, 0x41, + 0xed, 0x28, 0x23, 0x8d, 0x68, 0x06, 0x57, 0xd6, 0xb6, 0xf1, 0x9a, 0x5d, + 0x41, 0xc7, 0x96, 0xf8, 0xc4, 0x7f, 0xd6, 0x92, 0x97, 0x56, 0x05, 0xd9, + 0x17, 0x46, 0x07, 0x19, 0x0b, 0x08, 0xd5, 0xba, 0x90, 0xd8, 0x40, 0x94, + 0x2d, 0x90, 0x75, 0x01, 0x77, 0xa7, 0x12, 0x82, 0x5b, 0x82, 0x9e, 0x7b, + 0x75, 0x46, 0xce, 0x07, 0x40, 0x9b, 0xbb, 0x10, 0x3d, 0xf7, 0x80, 0xaa, + 0x39, 0xa3, 0x67, 0xfa, 0xd8, 0x07, 0xda, 0x09, 0x92, 0x68, 0x6d, 0xa4, + 0xe2, 0xda, 0xde, 0x6e, 0x98, 0xcd, 0x1e, 0x6d, 0x68, 0x72, 0x0e, 0x68, + 0x1e, 0xaa, 0x72, 0x12, 0x92, 0xe6, 0x96, 0x3d, 0x6c, 0x57, 0xb8, 0x77, + 0x61, 0x6d, 0xb8, 0x6f, 0x1e, 0xbe, 0xd8, 0x2c, 0xdd, 0xc4, 0xe9, 0x38, + 0x77, 0xde, 0x5f, 0x2f, 0xb6, 0x40, 0xf0, 0x30, 0x5b, 0x33, 0x16, 0xd4, + 0xef, 0x74, 0x9f, 0x38, 0xbc, 0x4d, 0x2d, 0xf3, 0x14, 0x8f, 0x38, 0xcc, + 0x6c, 0x8b, 0xad, 0xef, 0x30, 0xee, 0xc0, 0x36, 0x31, 0x6b, 0xc8, 0xb0, + 0x55, 0x44, 0x62, 0xb0, 0x24, 0x70, 0x9f, 0x64, 0x5c, 0xb1, 0x70, 0x19, + 0xfa, 0xd4, 0x8d, 0x23, 0xa8, 0x24, 0x72, 0x49, 0xfd, 0x23, 0x90, 0x18, + 0x99, 0xc1, 0xd0, 0x96, 0x91, 0x5f, 0x62, 0xf9, 0xd7, 0x14, 0xfa, 0x8b, + 0xeb, 0x05, 0x97, 0x03, 0xe1, 0x51, 0xc9, 0x3b, 0x8d, 0x41, 0x86, 0x53, + 0x45, 0xdc, 0x6d, 0xe1, 0xc7, 0x94, 0xfd, 0xdd, 0x57, 0xed, 0xc6, 0xe7, + 0x38, 0x84, 0xf7, 0xeb, +}; + +/* dh1024 p */ +static const byte dh_p[] = { + 0xb0, 0xa1, 0x08, 0x06, 0x9c, 0x08, 0x13, 0xba, 0x59, 0x06, 0x3c, 0xbc, + 0x30, 0xd5, 0xf5, 0x00, 0xc1, 0x4f, 0x44, 0xa7, 0xd6, 0xef, 0x4a, 0xc6, + 0x25, 0x27, 0x1c, 0xe8, 0xd2, 0x96, 0x53, 0x0a, 0x5c, 0x91, 0xdd, 0xa2, + 0xc2, 0x94, 0x84, 0xbf, 0x7d, 0xb2, 0x44, 0x9f, 0x9b, 0xd2, 0xc1, 0x8a, + 0xc5, 0xbe, 0x72, 0x5c, 0xa7, 0xe7, 0x91, 0xe6, 0xd4, 0x9f, 0x73, 0x07, + 0x85, 0x5b, 0x66, 0x48, 0xc7, 0x70, 0xfa, 0xb4, 0xee, 0x02, 0xc9, 0x3d, + 0x9a, 0x4a, 0xda, 0x3d, 0xc1, 0x46, 0x3e, 0x19, 0x69, 0xd1, 0x17, 0x46, + 0x07, 0xa3, 0x4d, 0x9f, 0x2b, 0x96, 0x17, 0x39, 0x6d, 0x30, 0x8d, 0x2a, + 0xf3, 0x94, 0xd3, 0x75, 0xcf, 0xa0, 0x75, 0xe6, 0xf2, 0x92, 0x1f, 0x1a, + 0x70, 0x05, 0xaa, 0x04, 0x83, 0x57, 0x30, 0xfb, 0xda, 0x76, 0x93, 0x38, + 0x50, 0xe8, 0x27, 0xfd, 0x63, 0xee, 0x3c, 0xe5, 0xb7, 0xc8, 0x09, 0xae, + 0x6f, 0x50, 0x35, 0x8e, 0x84, 0xce, 0x4a, 0x00, 0xe9, 0x12, 0x7e, 0x5a, + 0x31, 0xd7, 0x33, 0xfc, 0x21, 0x13, 0x76, 0xcc, 0x16, 0x30, 0xdb, 0x0c, + 0xfc, 0xc5, 0x62, 0xa7, 0x35, 0xb8, 0xef, 0xb7, 0xb0, 0xac, 0xc0, 0x36, + 0xf6, 0xd9, 0xc9, 0x46, 0x48, 0xf9, 0x40, 0x90, 0x00, 0x2b, 0x1b, 0xaa, + 0x6c, 0xe3, 0x1a, 0xc3, 0x0b, 0x03, 0x9e, 0x1b, 0xc2, 0x46, 0xe4, 0x48, + 0x4e, 0x22, 0x73, 0x6f, 0xc3, 0x5f, 0xd4, 0x9a, 0xd6, 0x30, 0x07, 0x48, + 0xd6, 0x8c, 0x90, 0xab, 0xd4, 0xf6, 0xf1, 0xe3, 0x48, 0xd3, 0x58, 0x4b, + 0xa6, 0xb9, 0xcd, 0x29, 0xbf, 0x68, 0x1f, 0x08, 0x4b, 0x63, 0x86, 0x2f, + 0x5c, 0x6b, 0xd6, 0xb6, 0x06, 0x65, 0xf7, 0xa6, 0xdc, 0x00, 0x67, 0x6b, + 0xbb, 0xc3, 0xa9, 0x41, 0x83, 0xfb, 0xc7, 0xfa, 0xc8, 0xe2, 0x1e, 0x7e, + 0xaf, 0x00, 0x3f, 0x93, +}; + + +/* simple example of using RSA encrypt with Intel QA */ +int main(int argc, char** argv) +{ + int ret; + WC_ASYNC_DEV dev; + byte out[256]; + word32 outLen = sizeof(out); + byte tmp[256]; + word32 tmpLen = sizeof(tmp); +#ifndef NO_RSA + WC_BIGINT d, n; +#endif +#if defined(HAVE_ECC) && defined(HAVE_ECC_DHE) + WC_BIGINT k, xG, yG, xR, yR, a, b, q; +#endif +#ifndef NO_DH + WC_BIGINT p; +#endif + +#ifdef QAT_DEBUG + wolfSSL_Debugging_ON(); +#endif + + IntelQaInit(NULL); + +#ifdef QAT_ENABLE_RNG + /* DRBG Test */ + IntelQaOpen(&dev, 0); + ret = IntelQaDrbg(&dev, out, sizeof(out)); + printf("RNG1: Ret=%d\n", ret); + + /* call again using same session */ + ret = IntelQaDrbg(&dev, out, sizeof(out)); + printf("RNG2: Ret=%d\n", ret); + IntelQaClose(&dev); +#endif + +#ifndef NO_RSA + IntelQaOpen(&dev, 0); + /* RSA Test */ + dev.event.ret = WC_PENDING_E; + XMEMSET(out, 0, sizeof(out)); + wc_bigint_init(&d); + wc_bigint_init(&n); + wc_bigint_from_unsigned_bin(&d, rsa_d, sizeof(rsa_d)); + wc_bigint_from_unsigned_bin(&n, rsa_n, sizeof(rsa_n)); + ret = IntelQaRsaPrivate(&dev, (byte*)rsa_in, sizeof(rsa_in), &d, &n, out, + &outLen); + if (ret == 0 || ret == WC_PENDING_E) { + ret = IntelQaPollBlockRet(&dev, WC_PENDING_E); + } + printf("RSA Private: Ret=%d, Out Len=%d\n", ret, outLen); + IntelQaClose(&dev); +#endif /* !NO_RSA */ + +#ifndef NO_AES +#ifdef HAVE_AESGCM + /* AES Test */ + IntelQaOpen(&dev, 0); + dev.event.ret = WC_PENDING_E; + tmpLen = sizeof(aesgcm_t); + XMEMSET(out, 0, sizeof(out)); + XMEMSET(tmp, 0, sizeof(tmp)); + + ret = IntelQaSymAesGcmEncrypt(&dev, out, aesgcm_p, sizeof(aesgcm_p), + aesgcm_k, sizeof(aesgcm_k), aesgcm_iv, sizeof(aesgcm_iv), + tmp, tmpLen, aesgcm_a, sizeof(aesgcm_a)); + if (ret == 0 || ret == WC_PENDING_E) { + ret = IntelQaPollBlockRet(&dev, WC_PENDING_E); + } + printf("AES GCM Encrypt: Ret=%d, Tag Len=%d\n", ret, tmpLen); + IntelQaClose(&dev); +#endif /* HAVE_AESGCM */ +#endif /* NO_AES */ + +#ifdef HAVE_ECC +#ifdef HAVE_ECC_DHE + /* ECDHE Test */ + IntelQaOpen(&dev, 0); + dev.event.ret = WC_PENDING_E; + XMEMSET(out, 0, sizeof(out)); + XMEMSET(tmp, 0, sizeof(tmp)); + wc_bigint_init(&xG); + wc_bigint_init(&yG); + wc_bigint_init(&k); + wc_bigint_init(&a); + wc_bigint_init(&b); + wc_bigint_init(&q); + wc_bigint_from_unsigned_bin(&xG, ecc_xg, sizeof(ecc_xg)); + wc_bigint_from_unsigned_bin(&yG, ecc_yg, sizeof(ecc_yg)); + wc_bigint_from_unsigned_bin(&k, ecc_k, sizeof(ecc_k)); + wc_bigint_from_unsigned_bin(&a, ecc_a, sizeof(ecc_a)); + wc_bigint_from_unsigned_bin(&b, ecc_b, sizeof(ecc_b)); + wc_bigint_from_unsigned_bin(&q, ecc_q, sizeof(ecc_q)); + + ret = IntelQaEcdh(&dev, &k, &xG, &yG, out, &outLen, &a, &b, &q, 1); + if (ret == 0 || ret == WC_PENDING_E) { + ret = IntelQaPollBlockRet(&dev, WC_PENDING_E); + } + printf("ECDH: Ret=%d, Result: X Len=%d, Y Len=%d\n", ret, xR.len, yR.len); + IntelQaClose(&dev); +#endif /* HAVE_ECC_DHE */ +#endif /* HAVE_ECC */ + +#ifndef NO_DH + /* DH Test */ + IntelQaOpen(&dev, 0); + dev.event.ret = WC_PENDING_E; + XMEMSET(out, 0, sizeof(out)); + XMEMSET(tmp, 0, sizeof(tmp)); + wc_bigint_init(&p); + wc_bigint_from_unsigned_bin(&p, dh_p, sizeof(dh_p)); + + outLen = 0; + ret = IntelQaDhAgree(&dev, &p, out, &outLen, dh_priv1, sizeof(dh_priv1), + dh_pub2, sizeof(dh_pub2)); + if (ret == 0 || ret == WC_PENDING_E) { + ret = IntelQaPollBlockRet(&dev, WC_PENDING_E); + } + printf("DH Agree1: Ret=%d, Out Len=%d\n", ret, outLen); + + tmpLen = 0; + ret = IntelQaDhAgree(&dev, &p, tmp, &tmpLen, dh_priv2, sizeof(dh_priv2), + dh_pub1, sizeof(dh_pub1)); + if (ret == 0 || ret == WC_PENDING_E) { + ret = IntelQaPollBlockRet(&dev, WC_PENDING_E); + } + printf("DH Agree2: Ret=%d, Out Len=%d\n", ret, tmpLen); + + /* compare results */ + if (ret != 0 || outLen != tmpLen || memcmp(out, tmp, outLen) != 0) { + printf("DH Agree Failed!\n"); + } + else { + printf("DH Agree Match\n"); + } + IntelQaClose(&dev); +#endif /* !NO_DH */ + + (void)tmp; + (void)tmpLen; + + IntelQaDeInit(0); + + return 0; +} + +#endif + +#endif /* HAVE_INTEL_QA */ diff --git a/wolfcrypt/src/port/intel/quickassist_mem.c b/wolfcrypt/src/port/intel/quickassist_mem.c new file mode 100644 index 000000000..4aea5c724 --- /dev/null +++ b/wolfcrypt/src/port/intel/quickassist_mem.c @@ -0,0 +1,1131 @@ +/* quickassist_mem.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_INTEL_QA +#include + +#include +#include + +#include +#include +#include + +/* use thread local for QAE variables (removing mutex requirement) */ +#include /* for threadId tracking */ +#ifdef USE_QAE_THREAD_LS + #define QAE_THREAD_LS THREAD_LS_T +#else + #define QAE_THREAD_LS +#endif + +/* these are used to align memory to a byte boundary */ +#define ALIGNMENT_BASE (16ul) +#define ALIGNMENT_HW (64ul) +#define WOLF_MAGIC_NUM 0xA576F6C6641736EBUL /* (0xA)WolfAsyn(0xB) */ +#define WOLF_HEADER_ALIGN ALIGNMENT_BASE + +#ifndef QAT_V2 +#include +#include +#include +#include +#include +#include +#include + +#ifdef SAL_IOMMU_CODE + #include +#endif + +/* enable fixed static memory instead of dynamic list */ +#ifdef USE_QAE_STATIC_MEM + /* adjustable parameter for the maximum memory allocations */ + #ifndef QAE_USER_MEM_MAX_COUNT + #define QAE_USER_MEM_MAX_COUNT 16000 + #endif + #define MEM_INVALID_IDX -1 +#endif + +#define QAE_MEM "/dev/qae_mem" +#define PAGE_SHIFT 13 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define SYSTEM_PAGE_SHIFT 12 +#define SYSTEM_PAGE_SIZE (1UL << SYSTEM_PAGE_SHIFT) +#define SYSTEM_PAGE_MASK (~(SYSTEM_PAGE_SIZE-1)) +#define USER_MEM_OFFSET (128) +#define QAEM_MAGIC_NUM 0xABCD12345678ECDFUL + +/* define types which need to vary between 32 and 64 bit */ +#ifdef __x86_64__ + #define QAE_UINT Cpa64U + #define QAE_INT Cpa64S +#else + #define QAE_UINT Cpa32U + #define QAE_INT Cpa32S +#endif + +/* IOCTL number for use between the kernel and the user space application */ +#define DEV_MEM_MAGIC 'q' +#define DEV_MEM_CMD_MEMALLOC (0) +#define DEV_MEM_CMD_MEMFREE (1) + +/* IOCTL commands for requesting kernel memory */ +#define DEV_MEM_IOC_MEMALLOC \ + _IOWR(DEV_MEM_MAGIC, DEV_MEM_CMD_MEMALLOC, qae_dev_mem_info_t) + +#define DEV_MEM_IOC_MEMFREE \ + _IOWR(DEV_MEM_MAGIC, DEV_MEM_CMD_MEMFREE, qae_dev_mem_info_t) + + +/* local structures */ +#pragma pack(push) +#pragma pack(1) +typedef struct qae_dev_mem_info_s { + union { + struct qae_dev_mem_info_s *pPrev; + uint64_t padding_pPrev; + }; + union { + struct qae_dev_mem_info_s *pNext; + uint64_t padding_pNext; + }; + uint32_t id; + /* Id of this block */ + uint32_t nodeId; + /* Node id for NUMA */ + uint32_t size; + /* Size of this block (bytes) */ + uint32_t available_size; + /* Available size remained on the page */ + uint16_t allocations; + /* Counter keeping track of number of allocations */ + union { + void *kmalloc_ptr; + uint64_t padding_kmalloc_ptr; + }; + /* Pointer to mem originally returned by kmalloc */ + union { + int32_t *kmalloc_area; + uint64_t padding_kamalloc_area; + }; + /* Pointer to kmalloc'd area rounded up to a page boundary */ + uint64_t phy_addr; + /* Physical address of the kmalloc area */ + union { + void *virt_addr; + uint64_t padding_virt_addr; + }; + /* Base address in user space - i.e. virtual address */ +} qae_dev_mem_info_t; + +#ifdef USE_QAE_STATIC_MEM + typedef struct qae_dev_mem_info_ex_s { + qae_dev_mem_info_t mem_info; + int index; /* Index into g_pUserMemList */ + } qae_dev_mem_info_ex_t; +#else + typedef qae_dev_mem_info_t qae_dev_mem_info_ex_t; +#endif + +#pragma pack(pop) + +#endif /* QAT_V2 */ + + +#define QAE_NOT_NUMA_PAGE 0xFFFF +typedef struct qaeMemHeader { +#ifdef WOLFSSL_TRACK_MEMORY + struct qaeMemHeader* next; + struct qaeMemHeader* prev; + #ifdef WOLFSSL_DEBUG_MEMORY + const char* func; + unsigned int line; + #endif +#endif + uint64_t magic; + void* heap; +#ifdef USE_QAE_THREAD_LS + pthread_t threadId; +#endif + size_t size; + word16 count; + word16 isNuma:1; + word16 reservedBits:15; /* use for future bits */ + word16 type; + word16 numa_page_offset; /* use QAE_NOT_NUMA_PAGE if not NUMA */ +} ALIGN16 qaeMemHeader; + +#ifdef WOLFSSL_TRACK_MEMORY + typedef struct qaeMemStats { + long totalAllocs; /* number of allocations */ + long totalDeallocs; /* number of deallocations */ + long totalBytes; /* total number of bytes allocated */ + long peakBytes; /* concurrent max bytes */ + long currentBytes; /* total current bytes in use */ + } qaeMemStats; + + /* track allocations and report at end */ + typedef struct qaeMemList { + qaeMemHeader* head; + qaeMemHeader* tail; + uint32_t count; + } qaeMemList; +#endif /* WOLFSSL_TRACK_MEMORY */ + + +/* local variables */ +#ifndef USE_QAE_THREAD_LS + static pthread_mutex_t g_memLock = PTHREAD_MUTEX_INITIALIZER; +#endif + +#ifndef QAT_V2 +#ifdef USE_QAE_STATIC_MEM + /* Use an array instead of a list */ + static QAE_THREAD_LS qae_dev_mem_info_ex_t* + g_pUserMemList[QAE_USER_MEM_MAX_COUNT]; + /* cache the available sizes to improve userMemLookupBySize performance */ + static QAE_THREAD_LS uint16_t g_avail_size[QAE_USER_MEM_MAX_COUNT]; + /* Count of items in g_pUserMemList and g_avail_size */ + static QAE_THREAD_LS int g_userMemListCount = 0; + static QAE_THREAD_LS int g_lastIndexBySize = 0; +#else + static QAE_THREAD_LS qae_dev_mem_info_t *g_pUserMemList = NULL; + static QAE_THREAD_LS qae_dev_mem_info_t *g_pUserMemListHead = NULL; +#endif + +static int g_qaeMemFd = -1; +#endif /* !QAT_V2 */ + +#ifdef WOLFSSL_TRACK_MEMORY + static qaeMemStats g_memStats; + static qaeMemList g_memList; + static pthread_mutex_t g_memStatLock = PTHREAD_MUTEX_INITIALIZER; +#endif + +/* forward declarations */ +#ifndef QAT_V2 +static void* qaeMemAllocNUMA(Cpa32U size, Cpa32U node, Cpa32U alignment, + word16* p_page_offset); +static void qaeMemFreeNUMA(void** ptr, word16 page_offset); +#endif + +static WC_INLINE int qaeMemTypeIsNuma(int type) +{ + int isNuma = 0; + + switch (type) { + case DYNAMIC_TYPE_ASYNC_NUMA: + case DYNAMIC_TYPE_ASYNC_NUMA64: + case DYNAMIC_TYPE_WOLF_BIGINT: + case DYNAMIC_TYPE_PRIVATE_KEY: + case DYNAMIC_TYPE_PUBLIC_KEY: + case DYNAMIC_TYPE_AES_BUFFER: + case DYNAMIC_TYPE_RSA_BUFFER: + case DYNAMIC_TYPE_ECC_BUFFER: + case DYNAMIC_TYPE_SIGNATURE: + case DYNAMIC_TYPE_DIGEST: + case DYNAMIC_TYPE_SECRET: + case DYNAMIC_TYPE_SEED: + case DYNAMIC_TYPE_SALT: + { + isNuma = 1; + break; + } + case DYNAMIC_TYPE_OUT_BUFFER: + case DYNAMIC_TYPE_IN_BUFFER: + { + #if !defined(WC_ASYNC_NO_CRYPT) && !defined(WC_ASYNC_NO_HASH) + isNuma = 1; + #else + isNuma = 0; + #endif + break; + } + default: + isNuma = 0; + break; + } + return isNuma; +} + + +static void _qaeMemFree(void *ptr, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + qaeMemHeader* header = NULL; + size_t size; + void* origPtr = ptr; + + if (ptr == NULL) + return; + + /* adjust for header and align */ + ptr = (byte*)(((size_t)ptr - ((size_t)ptr % WOLF_HEADER_ALIGN)) - + sizeof(qaeMemHeader)); + header = (qaeMemHeader*)ptr; + + /* check for header magic */ + if (header->magic != WOLF_MAGIC_NUM) { + printf("Free: Header magic not found! %p\n", ptr); + return; + } + + /* cache values for later */ + size = header->size; + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + printf("Free: %p (%u) at %s:%d, heap %p, type %d, count %d\n", + origPtr, (unsigned int)size, func, line, heap, type, header->count); +#else + (void)func; + (void)line; +#endif +#endif + (void)type; + + /* adjust free count */ + header->count--; + + /* check header count */ + if (header->count > 0) { + /* go ahead and return if still in use */ + return; + } + +#ifdef WOLFSSL_TRACK_MEMORY + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.currentBytes -= size; + g_memStats.totalDeallocs++; + + if (header == g_memList.head && header == g_memList.tail) { + g_memList.head = NULL; + g_memList.tail = NULL; + } + else if (header == g_memList.head) { + g_memList.head = header->next; + g_memList.head->prev = NULL; + } + else if (header == g_memList.tail) { + g_memList.tail = header->prev; + g_memList.tail->next = NULL; + } + else { + qaeMemHeader* next = header->next; + qaeMemHeader* prev = header->prev; + if (next) + next->prev = prev; + if (prev) + prev->next = next; + } + g_memList.count--; + + pthread_mutex_unlock(&g_memStatLock); + } +#endif + + (void)heap; + (void)size; + (void)origPtr; + +#ifdef WOLFSSL_DEBUG_MEMORY + /* make sure magic is gone */ + header->magic = 0; +#endif + + /* free type */ + if (header->isNuma && header->numa_page_offset != QAE_NOT_NUMA_PAGE) { + #ifdef QAT_V2 + qaeMemFreeNUMA(&ptr); + #else + qaeMemFreeNUMA(&ptr, header->numa_page_offset); + #endif + } + else { + free(ptr); + } +} + + +static void* _qaeMemAlloc(size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* ptr = NULL; + qaeMemHeader* header = NULL; + int isNuma; + int alignment = ALIGNMENT_BASE; + word16 page_offset = QAE_NOT_NUMA_PAGE; + + /* make sure all allocations are aligned */ + if ((size % WOLF_HEADER_ALIGN) != 0) { + size += (WOLF_HEADER_ALIGN - (size % WOLF_HEADER_ALIGN)); + } + + isNuma = qaeMemTypeIsNuma(type); + if (type == DYNAMIC_TYPE_ASYNC_NUMA64) + alignment = ALIGNMENT_HW; + + /* allocate type */ + if (isNuma) { + /* Node is typically 0 */ + #ifdef QAT_V2 + page_offset = 0; + ptr = qaeMemAllocNUMA((Cpa32U)(size + sizeof(qaeMemHeader)), 0, + alignment); + #else + ptr = qaeMemAllocNUMA((Cpa32U)(size + sizeof(qaeMemHeader)), 0, + alignment, &page_offset); + #endif + } + else { + isNuma = 0; + ptr = malloc(size + sizeof(qaeMemHeader)); + } + + /* add header */ + if (ptr) { + header = (qaeMemHeader*)ptr; + ptr = (byte*)ptr + sizeof(qaeMemHeader); + header->magic = WOLF_MAGIC_NUM; + header->heap = heap; + header->size = size; + header->type = type; + header->count = 1; + header->isNuma = isNuma; + header->numa_page_offset = page_offset; + #ifdef USE_QAE_THREAD_LS + header->threadId = pthread_self(); + #endif + + #ifdef WOLFSSL_TRACK_MEMORY + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.totalAllocs++; + g_memStats.totalBytes += size; + g_memStats.currentBytes += size; + if (g_memStats.currentBytes > g_memStats.peakBytes) + g_memStats.peakBytes = g_memStats.currentBytes; + + #ifdef WOLFSSL_DEBUG_MEMORY + header->func = func; + header->line = line; + #endif + + /* Setup event */ + header->next = NULL; + if (g_memList.tail == NULL) { + g_memList.head = header; + } + else { + g_memList.tail->next = header; + header->prev = g_memList.tail; + } + g_memList.tail = header; /* add to the end either way */ + g_memList.count++; + + pthread_mutex_unlock(&g_memStatLock); + } + #endif + } + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + printf("Alloc: %p (%u) at %s:%d, heap %p, type %d\n", + ptr, (unsigned int)size, func, line, heap, type); +#else + (void)func; + (void)line; +#endif +#endif + + (void)heap; + + return ptr; +} + +/* Public Functions */ +void* IntelQaMalloc(size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* ptr; + +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Alloc: Error(%d) on mutex lock\n", ret); + return NULL; + } +#endif + + ptr = _qaeMemAlloc(size, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif + + return ptr; +} + +void IntelQaFree(void *ptr, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Free: Error(%d) on mutex lock\n", ret); + return; + } +#endif + + _qaeMemFree(ptr, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif +} + +void* IntelQaRealloc(void *ptr, size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +) +{ + void* newPtr = NULL; + void* origPtr = ptr; + qaeMemHeader* header = NULL; + byte allocNew = 1; + int newIsNuma = -1, ptrIsNuma = -1; + size_t copySize = 0; + +#ifndef USE_QAE_THREAD_LS + int ret = pthread_mutex_lock(&g_memLock); + if (ret != 0) { + printf("Realloc: Error(%d) on mutex lock\n", ret); + return NULL; + } +#endif + + (void)heap; + + if (ptr) { + /* get header pointer and align */ + header = (qaeMemHeader*)(((size_t)ptr - + ((size_t)ptr % WOLF_HEADER_ALIGN)) - sizeof(qaeMemHeader)); + if (header->magic == WOLF_MAGIC_NUM) { + newIsNuma = qaeMemTypeIsNuma(type); + ptrIsNuma = (header->numa_page_offset != QAE_NOT_NUMA_PAGE) ? 1 : 0; + + /* for non-NUMA, treat as normal REALLOC */ + if (newIsNuma == 0 && ptrIsNuma == 0) { + allocNew = 1; + } + /* confirm input is aligned, otherwise allocate new */ + else if (((size_t)ptr % WOLF_HEADER_ALIGN) != 0) { + allocNew = 1; + } + /* if matching NUMA type and size fits, use existing */ + else if (newIsNuma == ptrIsNuma && header->size >= size) { + + #ifdef USE_QAE_THREAD_LS + if (header->threadId != pthread_self()) { + allocNew = 1; + #if 0 + printf("Realloc %p from different thread! " + "orig %lx this %lx\n", + origPtr, header->threadId, pthread_self()); + #endif + } + else + #endif + { + /* use existing pointer and increment counter */ + header->count++; + newPtr = origPtr; + allocNew = 0; + } + } + + copySize = header->size; + } + else { + copySize = size; + } + } + + if (allocNew) { + newPtr = _qaeMemAlloc(size, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + if (newPtr && ptr) { + /* only copy min of new and old size to new pointer */ + if (copySize > size) + copySize = size; + XMEMCPY(newPtr, ptr, copySize); + + if (newIsNuma == 0 && ptrIsNuma == 0) { + /* for non-NUMA, treat as normal REALLOC and free old pointer */ + _qaeMemFree(ptr, heap, type + #ifdef WOLFSSL_DEBUG_MEMORY + , func, line + #endif + ); + } + } + } + +#ifndef USE_QAE_THREAD_LS + pthread_mutex_unlock(&g_memLock); +#endif + +#ifdef WOLFSSL_DEBUG_MEMORY +#ifdef WOLFSSL_DEBUG_MEMORY_PRINT + if (allocNew) { + printf("Realloc: New %p -> %p (%u) at %s:%d, heap %p, type %d\n", + origPtr, newPtr, (unsigned int)size, func, line, heap, type); + } + else { + printf("Realloc: Reuse %p (%u) at %s:%d, heap %p, type %d, count %d\n", + origPtr, (unsigned int)size, func, line, + header->heap, header->type, header->count); + } +#else + (void)func; + (void)line; +#endif +#endif + + return newPtr; +} + + +#ifdef WOLFSSL_TRACK_MEMORY +int InitMemoryTracker(void) +{ + if (pthread_mutex_lock(&g_memStatLock) == 0) { + g_memStats.totalAllocs = 0; + g_memStats.totalDeallocs= 0; + g_memStats.totalBytes = 0; + g_memStats.peakBytes = 0; + g_memStats.currentBytes = 0; + + XMEMSET(&g_memList, 0, sizeof(g_memList)); + + pthread_mutex_unlock(&g_memStatLock); + } + + return 0; +} + +void ShowMemoryTracker(void) +{ + if (pthread_mutex_lock(&g_memStatLock) == 0) { + printf("total Allocs = %9ld\n", g_memStats.totalAllocs); + printf("total Deallocs = %9ld\n", g_memStats.totalDeallocs); + printf("total Bytes = %9ld\n", g_memStats.totalBytes); + printf("peak Bytes = %9ld\n", g_memStats.peakBytes); + printf("current Bytes = %9ld\n", g_memStats.currentBytes); + + if (g_memList.count > 0) { + + /* print list of allocations */ + qaeMemHeader* header; + for (header = g_memList.head; + header != NULL; + header = header->next) { + printf("Leak: Ptr %p, Size %u, Type %d, Heap %p" + #ifdef WOLFSSL_DEBUG_MEMORY + ", Func %s, Line %d" + #endif + "\n", + (byte*)header + sizeof(qaeMemHeader), + (unsigned int)header->size, + header->type, header->heap + #ifdef WOLFSSL_DEBUG_MEMORY + , header->func, header->line + #endif + ); + } + } + + pthread_mutex_unlock(&g_memStatLock); + + /* cleanup lock */ + pthread_mutex_destroy(&g_memStatLock); + } +} +#endif /* WOLFSSL_TRACK_MEMORY */ + + + +/************************************** + * Memory functions + *************************************/ + +#ifndef QAT_V2 + +CpaStatus qaeMemInit(void) +{ + if (g_qaeMemFd < 0) { + #ifndef QAT_V2 + g_qaeMemFd = open(QAE_MEM, O_RDWR); + if (g_qaeMemFd < 0) { + printf("unable to open %s %d\n", QAE_MEM, g_qaeMemFd); + return CPA_STATUS_FAIL; + } + #endif + } + + return CPA_STATUS_SUCCESS; +} + +void qaeMemDestroy(void) +{ + close(g_qaeMemFd); + g_qaeMemFd = -1; +} + +#ifdef USE_QAE_STATIC_MEM + +static CpaStatus userMemListAdd(qae_dev_mem_info_t *pMemInfo) +{ + qae_dev_mem_info_ex_t* pMemInfoEx = + (qae_dev_mem_info_ex_t*)pMemInfo->virt_addr; + + if (g_userMemListCount >= QAE_USER_MEM_MAX_COUNT) { + return MEM_INVALID_IDX; + } + g_pUserMemList[g_userMemListCount] = pMemInfoEx; + g_avail_size[g_userMemListCount] = pMemInfoEx->mem_info.available_size; + g_lastIndexBySize = g_userMemListCount; + g_userMemListCount++; + return CPA_STATUS_SUCCESS; +} + +static void userMemListFree(qae_dev_mem_info_t *pMemInfo, int memIdx) +{ + if (memIdx < 0 || memIdx >= g_userMemListCount || + g_userMemListCount >= QAE_USER_MEM_MAX_COUNT) { + return; + } + + if (memIdx < g_userMemListCount - 1) { + /* Replace the deleted index with the last one */ + g_pUserMemList[memIdx] = g_pUserMemList[g_userMemListCount - 1]; + g_avail_size[memIdx] = g_avail_size[g_userMemListCount - 1]; + + g_pUserMemList[memIdx]->index = memIdx; + } + g_userMemListCount--; + (void)pMemInfo; +} + +static qae_dev_mem_info_t* userMemLookupBySize(Cpa32U size, int* pMemIdx) +{ + int memIdx; + int count = g_userMemListCount; + int lastIndex = g_lastIndexBySize; + uint16_t *available_size = g_avail_size; + + for (memIdx = lastIndex; memIdx < count; memIdx++) { + if (available_size[memIdx] >= size) { + g_lastIndexBySize = memIdx; + if (pMemIdx) + *pMemIdx = memIdx; + return (qae_dev_mem_info_t *)g_pUserMemList[memIdx]; + } + } + for (memIdx = 0; memIdx < lastIndex && memIdx < count; memIdx++) { + if (available_size[memIdx] >= size) { + g_lastIndexBySize = memIdx; + if (pMemIdx) + *pMemIdx = memIdx; + return (qae_dev_mem_info_t *)g_pUserMemList[memIdx]; + } + } + + return NULL; +} + +static qae_dev_mem_info_t* userMemLookupByVirtAddr(void* virt_addr, + uint32_t page_offset, int* pMemIdx) +{ + qae_dev_mem_info_ex_t *pMemInfoEx = NULL; + void *pageVirtAddr; + int memIdx; + + /* Find the base page virtual address */ + pageVirtAddr = (void *)(((QAE_UINT)virt_addr & SYSTEM_PAGE_MASK) - + (page_offset << SYSTEM_PAGE_SHIFT)); + pMemInfoEx = (qae_dev_mem_info_ex_t*)pageVirtAddr; + + /* Find the index in g_pUserMemList stored directly in + * qae_dev_mem_info_ex_t */ + memIdx = pMemInfoEx->index; + if (memIdx < 0 || memIdx >= g_userMemListCount) { + printf("userMemIndex out of bounds: %d\n", memIdx); + return NULL; + } + + if (g_pUserMemList[memIdx] != pMemInfoEx) { + printf("userMemIndex virtual address mismatch (memIdx = %d, %p)\n", + memIdx, pageVirtAddr); + return NULL; + } + + if (pMemIdx) + *pMemIdx = memIdx; + + return (qae_dev_mem_info_t*)pMemInfoEx; +} + +#else + +static CpaStatus userMemListAdd(qae_dev_mem_info_t *pMemInfo) +{ + if (g_pUserMemList == NULL) { + g_pUserMemList = pMemInfo; + pMemInfo->pNext = NULL; + pMemInfo->pPrev = NULL; + g_pUserMemListHead = g_pUserMemList; + } + else { + pMemInfo->pPrev = g_pUserMemList; + g_pUserMemList->pNext = pMemInfo; + pMemInfo->pNext = NULL; + g_pUserMemList = pMemInfo; + } + + return CPA_STATUS_SUCCESS; +} + +static void userMemListFree(qae_dev_mem_info_t *pMemInfo) +{ + qae_dev_mem_info_t *pCurr = NULL; + for (pCurr = g_pUserMemListHead; pCurr != NULL; pCurr = pCurr->pNext) { + if (pCurr == pMemInfo) { + /* If the previous pointer is not NULL */ + if (pCurr->pPrev != NULL) { + pCurr->pPrev->pNext = pCurr->pNext; + if (pCurr->pNext) { + pCurr->pNext->pPrev = pCurr->pPrev; + } else { + g_pUserMemList = pCurr->pPrev; + } + } else if (pCurr->pNext != NULL) { + pCurr->pNext->pPrev = NULL; + g_pUserMemListHead = pCurr->pNext; + } else { + g_pUserMemList = NULL; + g_pUserMemListHead = NULL; + } + break; + } + } +} + + +static qae_dev_mem_info_t* userMemLookupBySize(Cpa32U size) +{ + qae_dev_mem_info_t *pCurr = NULL; + for (pCurr = g_pUserMemListHead; pCurr != NULL; pCurr = pCurr->pNext) { + if (pCurr->available_size >= size) { + return pCurr; + } + } + return NULL; +} + +static qae_dev_mem_info_t* userMemLookupByVirtAddr(void* virt_addr, + uint32_t page_offset) +{ + qae_dev_mem_info_t *pCurr = NULL; + for (pCurr = g_pUserMemListHead; pCurr != NULL; pCurr = pCurr->pNext) { + if ((QAE_UINT)pCurr->virt_addr <= (QAE_UINT)virt_addr && + ((QAE_UINT)pCurr->virt_addr + pCurr->size) > (QAE_UINT)virt_addr) { + return pCurr; + } + } + (void)page_offset; + return NULL; +} + +#endif + + +static void* qaeMemAllocNUMA(Cpa32U size, Cpa32U node, Cpa32U alignment, + word16* p_page_offset) +{ + int ret = 0; + qae_dev_mem_info_t* pMemInfo = NULL; + void* pVirtAddress = NULL; + void* pOriginalAddress = NULL; + QAE_UINT padding = 0; + QAE_UINT aligned_address = 0; + const uint64_t magic = QAEM_MAGIC_NUM; +#ifdef USE_QAE_STATIC_MEM + int memIdx; + qae_dev_mem_info_t memInfo; + qae_dev_mem_info_ex_t* pMemInfoEx; +#endif + + if (size == 0 || alignment == 0) { + printf("Invalid size or alignment parameter\n"); + return NULL; + } + if (g_qaeMemFd < 0) { + qaeMemInit(); + } + + if ( (pMemInfo = userMemLookupBySize(size + alignment + #ifdef USE_QAE_STATIC_MEM + , &memIdx + #endif + )) != NULL) + { + /* calculate address */ + pOriginalAddress = (void*)((QAE_UINT)pMemInfo->virt_addr + + (QAE_UINT)(pMemInfo->size - pMemInfo->available_size)); + /* calculate aligned address */ + padding = (QAE_UINT)pOriginalAddress % alignment; + aligned_address = ((QAE_UINT)pOriginalAddress) - padding + alignment; + + /* reduce available size */ + pMemInfo->available_size -= (size + (aligned_address - + (QAE_UINT)pOriginalAddress)); + pMemInfo->allocations += 1; + + #ifdef USE_QAE_STATIC_MEM + /* cache index's available size */ + g_avail_size[memIdx] = pMemInfo->available_size; + #endif + + *p_page_offset = (word16)( + (QAE_UINT)aligned_address >> SYSTEM_PAGE_SHIFT) - + ((QAE_UINT)pMemInfo->virt_addr >> SYSTEM_PAGE_SHIFT); + + return (void*)aligned_address; + } + +#ifdef USE_QAE_STATIC_MEM + pMemInfo = &memInfo; +#else + pMemInfo = malloc(sizeof(qae_dev_mem_info_t)); + if (pMemInfo == NULL) { + printf("unable to allocate pMemInfo buffer\n"); + return NULL; + } +#endif + + pMemInfo->allocations = 0; + pMemInfo->size = USER_MEM_OFFSET + size; + pMemInfo->size = pMemInfo->size % PAGE_SIZE ? + ((pMemInfo->size / PAGE_SIZE) + 1) * PAGE_SIZE : + pMemInfo->size; +#ifdef SAL_IOMMU_CODE + pMemInfo->size = icp_sal_iommu_get_remap_size(pMemInfo->size); +#endif + pMemInfo->nodeId = node; + + ret = ioctl(g_qaeMemFd, DEV_MEM_IOC_MEMALLOC, pMemInfo); + if (ret != 0) { + printf("ioctl call failed: ret %d, errno %d (%s)\n", + ret, errno, strerror(errno)); + return NULL; + } + + pMemInfo->virt_addr = mmap((caddr_t)0, pMemInfo->size, + PROT_READ|PROT_WRITE, MAP_SHARED, g_qaeMemFd, + (pMemInfo->id * getpagesize())); + + if (pMemInfo->virt_addr == (caddr_t)MAP_FAILED) { + printf("mmap failed\n"); + ret = ioctl(g_qaeMemFd, DEV_MEM_IOC_MEMFREE, pMemInfo); + if (ret != 0) { + printf("ioctl call failed: ret %d, errno %d (%s)\n", + ret, errno, strerror(errno)); + } + #ifndef USE_QAE_STATIC_MEM + free(pMemInfo); + #endif + return NULL; + } + + pMemInfo->available_size = pMemInfo->size - size - USER_MEM_OFFSET; + pMemInfo->allocations = 1; + memcpy(pMemInfo->virt_addr, pMemInfo, sizeof(qae_dev_mem_info_t)); +#ifdef USE_QAE_STATIC_MEM + pMemInfoEx = (qae_dev_mem_info_ex_t *)pMemInfo->virt_addr; + pMemInfoEx->index = g_userMemListCount; +#endif + memcpy(pMemInfo->virt_addr, &magic, sizeof(uint64_t)); + pVirtAddress = (void *)((QAE_UINT)pMemInfo->virt_addr + + USER_MEM_OFFSET); + + if (userMemListAdd(pMemInfo) != CPA_STATUS_SUCCESS) { + printf("Error on mem list add\n"); + #ifndef USE_QAE_STATIC_MEM + free(pMemInfo); + #endif + return NULL; + } + + *p_page_offset = 0; + return pVirtAddress; +} + +static void qaeMemFreeNUMA(void** ptr, word16 page_offset) +{ + int ret = 0; + qae_dev_mem_info_t *pMemInfo = NULL; + void* pVirtAddress = NULL; +#ifdef USE_QAE_STATIC_MEM + qae_dev_mem_info_t memInfo; /* temp buffer */ + int memIdx; +#endif + + if (ptr == NULL) + return; + + pVirtAddress = *ptr; + if (pVirtAddress == NULL) { + printf("qaeMemFreeNUMA: Invalid virtual address\n"); + return; + } + + if ((pMemInfo = userMemLookupByVirtAddr(pVirtAddress, page_offset + #ifdef USE_QAE_STATIC_MEM + , &memIdx + #endif + )) != NULL) + { + pMemInfo->allocations -= 1; + + #ifdef USE_QAE_STATIC_MEM + if (memIdx < QAE_USER_MEM_MAX_COUNT && pMemInfo->allocations == 0) { + pMemInfo->available_size = pMemInfo->size - USER_MEM_OFFSET; + g_avail_size[memIdx] = pMemInfo->available_size; + } + #endif + + if (pMemInfo->allocations != 0 + #ifdef USE_QAE_STATIC_MEM + || memIdx < QAE_USER_MEM_MAX_COUNT + #endif + ) { + *ptr = NULL; + return; + } + } + else { + printf("userMemLookupByVirtAddr failed\n"); + return; + } + +#ifdef USE_QAE_STATIC_MEM + /* use a temp copy of memory info */ + memInfo = *pMemInfo; + userMemListFree(pMemInfo->virt_addr, memIdx); + pMemInfo = &memInfo; +#endif + + ret = munmap(pMemInfo->virt_addr, pMemInfo->size); + if (ret != 0) { + printf("munmap failed, ret = %d\n",ret); + } + + ret = ioctl(g_qaeMemFd, DEV_MEM_IOC_MEMFREE, pMemInfo); + if (ret != 0) { + printf("ioctl call failed, ret = %d\n",ret); + } + +#ifndef USE_QAE_STATIC_MEM + userMemListFree(pMemInfo); + free(pMemInfo); +#endif + + *ptr = NULL; + + return; +} + +QAE_PHYS_ADDR qaeVirtToPhysNUMA(void* pVirtAddress) +{ + qae_dev_mem_info_t *pMemInfo = NULL; + void *pVirtPageAddress = NULL; + QAE_UINT offset = 0; + uint64_t *magic; + + if (pVirtAddress == NULL) { + printf("qaeVirtToPhysNUMA: Null virtual address pointer\n"); + return (QAE_PHYS_ADDR)0; + } + + pVirtPageAddress = ((int *)((( + (QAE_UINT)pVirtAddress)) & (SYSTEM_PAGE_MASK))); + + offset = (QAE_UINT)pVirtAddress - (QAE_UINT)pVirtPageAddress; + do { + pMemInfo = (qae_dev_mem_info_t *)pVirtPageAddress; + magic = (uint64_t *)pMemInfo; + if ((QAEM_MAGIC_NUM == *magic) && + (pMemInfo->virt_addr == pVirtPageAddress)) { + break; + } + pVirtPageAddress = (void*)( + (QAE_UINT)pVirtPageAddress - SYSTEM_PAGE_SIZE); + + offset += SYSTEM_PAGE_SIZE; + } while (pMemInfo->virt_addr != pVirtPageAddress); + + return (QAE_PHYS_ADDR)(pMemInfo->phy_addr + offset); +} +#endif /* !QAT_V2 */ + +#endif /* HAVE_INTEL_QA */ diff --git a/wolfssl/wolfcrypt/async.h b/wolfssl/wolfcrypt/async.h new file mode 100644 index 000000000..8eac4aa9c --- /dev/null +++ b/wolfssl/wolfcrypt/async.h @@ -0,0 +1,442 @@ +/* async.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef WOLFSSL_ASYNC_H +#define WOLFSSL_ASYNC_H + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef WOLFSSL_ASYNC_CRYPT + +#include +#include +#ifdef HAVE_CAVIUM + #include +#elif defined(HAVE_INTEL_QA) + #include +#endif + + +struct WC_ASYNC_DEV; + + +/* Asynchronous crypto using software */ +#ifdef WOLFSSL_ASYNC_CRYPT_SW + enum WC_ASYNC_SW_TYPE { + ASYNC_SW_NONE = 0, +#ifdef HAVE_ECC + ASYNC_SW_ECC_MAKE = 1, + #ifdef HAVE_ECC_SIGN + ASYNC_SW_ECC_SIGN = 2, + #endif + #ifdef HAVE_ECC_VERIFY + ASYNC_SW_ECC_VERIFY = 3, + #endif + #ifdef HAVE_ECC_DHE + ASYNC_SW_ECC_SHARED_SEC = 4, + #endif +#endif /* HAVE_ECC */ +#ifndef NO_RSA + #ifdef WOLFSSL_KEY_GEN + ASYNC_SW_RSA_MAKE = 5, + #endif + ASYNC_SW_RSA_FUNC = 6, +#endif /* !NO_RSA */ +#ifndef NO_DH + ASYNC_SW_DH_AGREE = 7, + ASYNC_SW_DH_GEN = 8, +#endif /* !NO_DH */ +#ifndef NO_AES + ASYNC_SW_AES_CBC_ENCRYPT = 9, + #ifdef HAVE_AES_DECRYPT + ASYNC_SW_AES_CBC_DECRYPT = 10, + #endif + #ifdef HAVE_AESGCM + ASYNC_SW_AES_GCM_ENCRYPT = 11, + #ifdef HAVE_AES_DECRYPT + ASYNC_SW_AES_GCM_DECRYPT = 12, + #endif + #endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#ifndef NO_DES3 + ASYNC_SW_DES3_CBC_ENCRYPT = 13, + ASYNC_SW_DES3_CBC_DECRYPT = 14, +#endif /* !NO_DES3 */ + }; + +#ifdef HAVE_ECC + struct AsyncCryptSwEccMake { + void* rng; /* WC_RNG */ + void* key; /* ecc_key */ + int curve_id; + int size; + }; + struct AsyncCryptSwEccSign { + const byte* in; + word32 inSz; + void* rng; /* WC_RNG */ + void* key; /* ecc_key */ + void* r; /* mp_int */ + void* s; /* mp_int */ + }; + struct AsyncCryptSwEccVerify { + void* r; /* mp_int */ + void* s; /* mp_int */ + const byte* hash; + word32 hashlen; + int* stat; + void* key; /* ecc_key */ + }; + struct AsyncCryptSwEccSharedSec { + void* private_key; /* ecc_key */ + void* public_point; /* ecc_point */ + byte* out; + word32* outLen; + }; +#endif /* HAVE_ECC */ +#ifndef NO_RSA + #ifdef WOLFSSL_KEY_GEN + struct AsyncCryptSwRsaMake { + void* key; /* RsaKey */ + void* rng; + long e; + int size; + }; + #endif + struct AsyncCryptSwRsaFunc { + const byte* in; + word32 inSz; + byte* out; + word32* outSz; + int type; + void* key; /* RsaKey */ + void* rng; + }; +#endif /* !NO_RSA */ + +#ifndef NO_DH + struct AsyncCryptSwDhAgree { + void* key; /* DhKey */ + byte* agree; + word32* agreeSz; + const byte* priv; + word32 privSz; + const byte* otherPub; + word32 pubSz; + }; + struct AsyncCryptSwDhGen { + void* key; /* DhKey */ + void* rng; /* WC_RNG */ + byte* priv; + word32* privSz; + byte* pub; + word32* pubSz; + }; +#endif /* !NO_DH */ + +#ifndef NO_AES + struct AsyncCryptSwAes { + void* aes; /* Aes */ + byte* out; + const byte* in; + word32 sz; + #ifdef HAVE_AESGCM + const byte* iv; + word32 ivSz; + byte* authTag; + word32 authTagSz; + const byte* authIn; + word32 authInSz; + #endif + }; +#endif /* !NO_AES */ + +#ifndef NO_DES3 + struct AsyncCryptSwDes { + void* des; /* Des */ + byte* out; + const byte* in; + word32 sz; + }; +#endif /* !NO_DES3 */ + + #ifdef __CC_ARM + #pragma push + #pragma anon_unions + #endif + + typedef struct WC_ASYNC_SW { + void* ctx; + #if HAVE_ANONYMOUS_INLINE_AGGREGATES + union { + #endif + #ifdef HAVE_ECC + struct AsyncCryptSwEccMake eccMake; + struct AsyncCryptSwEccSign eccSign; + struct AsyncCryptSwEccVerify eccVerify; + struct AsyncCryptSwEccSharedSec eccSharedSec; + #endif /* HAVE_ECC */ + #ifndef NO_RSA + #ifdef WOLFSSL_KEY_GEN + struct AsyncCryptSwRsaMake rsaMake; + #endif + struct AsyncCryptSwRsaFunc rsaFunc; + #endif /* !NO_RSA */ + #ifndef NO_DH + struct AsyncCryptSwDhAgree dhAgree; + struct AsyncCryptSwDhGen dhGen; + #endif /* !NO_DH */ + #ifndef NO_AES + struct AsyncCryptSwAes aes; + #endif /* !NO_AES */ + #ifndef NO_DES3 + struct AsyncCryptSwDes des; + #endif /* !NO_DES3 */ + #if HAVE_ANONYMOUS_INLINE_AGGREGATES + }; /* union */ + #endif + byte type; /* enum WC_ASYNC_SW_TYPE */ + } WC_ASYNC_SW; + + #ifdef __CC_ARM + #pragma pop + #endif + +#endif /* WOLFSSL_ASYNC_CRYPT_SW */ + +/* Performance tuning options */ + +/* determine maximum async pending requests */ +#ifdef HAVE_CAVIUM + #define WOLF_ASYNC_MAX_PENDING CAVIUM_MAX_PENDING +#elif defined(HAVE_INTEL_QA) + #define WOLF_ASYNC_MAX_PENDING QAT_MAX_PENDING +#else + #define WOLF_ASYNC_MAX_PENDING 8 + + #ifdef DEBUG_WOLFSSL + /* Use this to introduce extra delay in simulator at interval */ + #ifndef WOLF_ASYNC_SW_SKIP_MOD + #define WOLF_ASYNC_SW_SKIP_MOD (WOLF_ASYNC_MAX_PENDING / 2) + #endif + #endif +#endif + +/* async thresholds - defaults */ +#ifdef WC_ASYNC_THRESH_NONE + #undef WC_ASYNC_THRESH_AES_CBC + #define WC_ASYNC_THRESH_AES_CBC 1 + + #undef WC_ASYNC_THRESH_AES_GCM + #define WC_ASYNC_THRESH_AES_GCM 1 + + #undef WC_ASYNC_THRESH_DES3_CBC + #define WC_ASYNC_THRESH_DES3_CBC 1 +#else + #ifndef WC_ASYNC_THRESH_AES_CBC + #define WC_ASYNC_THRESH_AES_CBC 1024 + #endif + #ifndef WC_ASYNC_THRESH_AES_GCM + #define WC_ASYNC_THRESH_AES_GCM 128 + #endif + #ifndef WC_ASYNC_THRESH_DES3_CBC + #define WC_ASYNC_THRESH_DES3_CBC 1024 + #endif +#endif /* WC_ASYNC_THRESH_NONE */ + +/* Overrides to allow disabling async support per algorithm */ +#ifndef WC_ASYNC_NO_CRYPT + #ifndef WC_ASYNC_NO_ARC4 + #define WC_ASYNC_ENABLE_ARC4 + #endif + #ifndef WC_ASYNC_NO_AES + #define WC_ASYNC_ENABLE_AES + #endif + #ifndef WC_ASYNC_NO_3DES + #define WC_ASYNC_ENABLE_3DES + #endif +#endif /* WC_ASYNC_NO_CRYPT */ +#ifndef WC_ASYNC_NO_PKI + #ifndef WC_ASYNC_NO_RSA_KEYGEN + #define WC_ASYNC_ENABLE_RSA_KEYGEN + #endif + #ifndef WC_ASYNC_NO_RSA + #define WC_ASYNC_ENABLE_RSA + #endif + #ifndef WC_ASYNC_NO_ECC + #define WC_ASYNC_ENABLE_ECC + #endif + #ifndef WC_ASYNC_NO_DH + #define WC_ASYNC_ENABLE_DH + #endif +#endif /* WC_ASYNC_NO_PKI */ +#ifndef WC_ASYNC_NO_HASH + #ifndef WC_ASYNC_NO_SHA512 + #define WC_ASYNC_ENABLE_SHA512 + #endif + #ifndef WC_ASYNC_NO_SHA384 + #define WC_ASYNC_ENABLE_SHA384 + #endif + #ifndef WC_ASYNC_NO_SHA256 + #define WC_ASYNC_ENABLE_SHA256 + #endif + #ifndef WC_ASYNC_NO_SHA224 + #define WC_ASYNC_ENABLE_SHA224 + #endif + #ifndef WC_ASYNC_NO_SHA + #define WC_ASYNC_ENABLE_SHA + #endif + #ifndef WC_ASYNC_NO_MD5 + #define WC_ASYNC_ENABLE_MD5 + #endif + #ifndef WC_ASYNC_NO_HMAC + #define WC_ASYNC_ENABLE_HMAC + #endif + #ifndef WC_ASYNC_NO_SHA3 + #define WC_ASYNC_ENABLE_SHA3 + #endif +#endif /* WC_ASYNC_NO_HASH */ +#ifndef WC_ASYNC_NO_RNG + #define WC_ASYNC_ENABLE_RNG +#endif + + +/* async marker values */ +#define WOLFSSL_ASYNC_MARKER_INVALID 0x0 +#define WOLFSSL_ASYNC_MARKER_ARC4 0xBEEF0001 +#define WOLFSSL_ASYNC_MARKER_AES 0xBEEF0002 +#define WOLFSSL_ASYNC_MARKER_3DES 0xBEEF0003 +#define WOLFSSL_ASYNC_MARKER_RNG 0xBEEF0004 +#define WOLFSSL_ASYNC_MARKER_HMAC 0xBEEF0005 +#define WOLFSSL_ASYNC_MARKER_RSA 0xBEEF0006 +#define WOLFSSL_ASYNC_MARKER_ECC 0xBEEF0007 +#define WOLFSSL_ASYNC_MARKER_SHA512 0xBEEF0008 +#define WOLFSSL_ASYNC_MARKER_SHA384 0xBEEF0009 +#define WOLFSSL_ASYNC_MARKER_SHA256 0xBEEF000A +#define WOLFSSL_ASYNC_MARKER_SHA224 0xBEEF000B +#define WOLFSSL_ASYNC_MARKER_SHA 0xBEEF000C +#define WOLFSSL_ASYNC_MARKER_MD5 0xBEEF000D +#define WOLFSSL_ASYNC_MARKER_DH 0xBEEF000E +#define WOLFSSL_ASYNC_MARKER_SHA3 0xBEEF000F + + +/* event flags (bit mask) */ +enum WC_ASYNC_FLAGS { + WC_ASYNC_FLAG_NONE = 0x00000000, + + /* crypto needs called again after WC_PENDING_E */ + WC_ASYNC_FLAG_CALL_AGAIN = 0x00000001, +}; + +/* async device */ +typedef struct WC_ASYNC_DEV { + word32 marker; /* async marker */ + void* heap; + + /* event */ + WOLF_EVENT event; + + /* context for driver */ +#ifdef HAVE_CAVIUM + CaviumNitroxDev nitrox; +#elif defined(HAVE_INTEL_QA) + IntelQaDev qat; +#elif defined(WOLFSSL_ASYNC_CRYPT_SW) + WC_ASYNC_SW sw; +#endif +} WC_ASYNC_DEV; + + +/* Interfaces */ +WOLFSSL_API int wolfAsync_HardwareStart(void); +WOLFSSL_API void wolfAsync_HardwareStop(void); +WOLFSSL_API int wolfAsync_DevOpen(int *devId); +WOLFSSL_API int wolfAsync_DevOpenThread(int *devId, void* threadId); +WOLFSSL_API int wolfAsync_DevCtxInit(WC_ASYNC_DEV* asyncDev, word32 marker, + void* heap, int devId); +WOLFSSL_API void wolfAsync_DevCtxFree(WC_ASYNC_DEV* asyncDev, word32 marker); +WOLFSSL_API void wolfAsync_DevClose(int *devId); +WOLFSSL_API int wolfAsync_DevCopy(WC_ASYNC_DEV* src, WC_ASYNC_DEV* dst); + +WOLFSSL_API int wolfAsync_EventInit(WOLF_EVENT* event, + enum WOLF_EVENT_TYPE type, void* context, word32 flags); +WOLFSSL_API int wolfAsync_EventWait(WOLF_EVENT* event); +WOLFSSL_API int wolfAsync_EventPoll(WOLF_EVENT* event, + WOLF_EVENT_FLAG event_flags); +WOLFSSL_API int wolfAsync_EventPop(WOLF_EVENT* event, + enum WOLF_EVENT_TYPE event_type); +WOLFSSL_API int wolfAsync_EventQueuePush(WOLF_EVENT_QUEUE* queue, + WOLF_EVENT* event); +WOLFSSL_API int wolfAsync_EventQueuePoll(WOLF_EVENT_QUEUE* queue, + void* context_filter, WOLF_EVENT** events, int maxEvents, + WOLF_EVENT_FLAG event_flags, int* eventCount); + +WOLFSSL_API int wc_AsyncHandle(WC_ASYNC_DEV* asyncDev, + WOLF_EVENT_QUEUE* queue, word32 flags); +WOLFSSL_API int wc_AsyncWait(int ret, WC_ASYNC_DEV* asyncDev, + word32 flags); + +WOLFSSL_API int wc_AsyncSleep(word32 ms); + +#ifdef WOLFSSL_ASYNC_CRYPT_SW + WOLFSSL_API int wc_AsyncSwInit(WC_ASYNC_DEV* dev, int type); +#endif + +/* Pthread Helpers */ +#ifndef WC_NO_ASYNC_THREADING +#include +#include +#include +#include +#include + +typedef void* (*AsyncThreadFunc_t) (void *); + +#define THREAD_DEFAULT_PRIORITY (0) +#define THREAD_DEFAULT_POLICY SCHED_OTHER + +WOLFSSL_API int wc_AsyncGetNumberOfCpus(void); +WOLFSSL_API int wc_AsyncThreadCreate(pthread_t *thread, + AsyncThreadFunc_t function, void* params); +WOLFSSL_API int wc_AsyncThreadCreate_ex(pthread_t *thread, + word32 priority, int policy, + AsyncThreadFunc_t function, void* params); +WOLFSSL_API int wc_AsyncThreadBind(pthread_t *thread, word32 logicalCore); +WOLFSSL_API int wc_AsyncThreadStart(pthread_t *thread); +WOLFSSL_API void wc_AsyncThreadExit(void *retval); +WOLFSSL_API int wc_AsyncThreadKill(pthread_t *thread); +WOLFSSL_API int wc_AsyncThreadPrioritySet(pthread_t *thread, word32 priority); +WOLFSSL_API int wc_AsyncThreadSetPolicyAndPriority(pthread_t *thread, + word32 policy, word32 priority); +WOLFSSL_API int wc_AsyncThreadJoin(pthread_t *thread); +WOLFSSL_API void wc_AsyncThreadYield(void); +WOLFSSL_API pthread_t wc_AsyncThreadId(void); + +#endif /* WC_NO_ASYNC_THREADING */ + +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#endif /* WOLFSSL_ASYNC_H */ diff --git a/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h b/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h new file mode 100644 index 000000000..56e587d9c --- /dev/null +++ b/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h @@ -0,0 +1,217 @@ +/* cavium_nitrox.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef _CAVIUM_NITROX_H_ +#define _CAVIUM_NITROX_H_ + +#ifdef HAVE_CAVIUM + +#ifndef HAVE_CAVIUM_V + #include "cavium_sysdep.h" +#endif +#include "cavium_common.h" + +#define CAVIUM_SSL_GRP 0 +#define CAVIUM_DPORT 256 + +/* Compatibility with older Cavium SDK's */ +#ifndef HAVE_CAVIUM_V + typedef int CspHandle; + typedef word32 CavReqId; + + #define AES_128 AES_128_BIT + #define AES_192 AES_192_BIT + #define AES_256 AES_256_BIT + + #define MAX_TO_POLL 30 + typedef int context_type_t; + + struct CspMultiRequestStatusBuffer { + int count; + CspRequestStatusBuffer req[MAX_TO_POLL]; + }; + #define AES_CBC 0x3 + #define AES_GCM 0x7 +#else + typedef word64 CavReqId; + #define CAVIUM_DEV_ID 0 + #define CAVIUM_BLOCKING BLOCKING + #define CAVIUM_NON_BLOCKING NON_BLOCKING + #define CAVIUM_DIRECT DMA_DIRECT_DIRECT +#endif + +typedef struct CspMultiRequestStatusBuffer CspMultiRequestStatusBuffer; + +#ifdef WOLFSSL_ASYNC_CRYPT + #define CAVIUM_REQ_MODE CAVIUM_NON_BLOCKING +#else + #define CAVIUM_REQ_MODE CAVIUM_BLOCKING +#endif + + +#ifdef WOLFSSL_ASYNC_CRYPT + #ifndef CAVIUM_MAX_PENDING + #define CAVIUM_MAX_PENDING 10 /* 90 */ + #endif + #ifndef CAVIUM_MAX_POLL + #define CAVIUM_MAX_POLL MAX_TO_POLL + #endif +#endif + + +typedef struct CaviumNitroxDev { + CspHandle devId; /* nitrox device id */ + context_type_t type; /* Typically CONTEXT_SSL, but also ECC types*/ + word64 contextHandle; /* nitrox context memory handle */ + CavReqId reqId; /* Current requestId */ +} CaviumNitroxDev; + +struct WOLF_EVENT; +struct WC_ASYNC_DEV; +struct WC_BIGINT; + +/* Wrapper API's */ +WOLFSSL_LOCAL int NitroxTranslateResponseCode(int ret); +WOLFSSL_LOCAL CspHandle NitroxGetDeviceHandle(void); +WOLFSSL_LOCAL CspHandle NitroxOpenDeviceDefault(void); +WOLFSSL_LOCAL CspHandle NitroxOpenDevice(int dma_mode, int dev_id); +WOLFSSL_LOCAL int NitroxAllocContext(struct WC_ASYNC_DEV* dev, CspHandle devId, + context_type_t type); +WOLFSSL_LOCAL void NitroxFreeContext(struct WC_ASYNC_DEV* dev); +WOLFSSL_LOCAL void NitroxCloseDevice(CspHandle devId); + +#if defined(WOLFSSL_ASYNC_CRYPT) +WOLFSSL_LOCAL int NitroxCheckRequest(struct WC_ASYNC_DEV* dev, + struct WOLF_EVENT* event); +WOLFSSL_LOCAL int NitroxCheckRequests(struct WC_ASYNC_DEV* dev, + CspMultiRequestStatusBuffer* req_stat_buf); +#endif /* WOLFSSL_ASYNC_CRYPT */ + + +/* Crypto wrappers */ +#ifndef NO_RSA + struct RsaKey; + WOLFSSL_LOCAL int NitroxRsaExptMod( + const byte* in, word32 inLen, + byte* exponent, word32 expLen, + byte* modulus, word32 modLen, + byte* out, word32* outLen, struct RsaKey* key); + WOLFSSL_LOCAL int NitroxRsaPublicEncrypt(const byte* in, word32 inLen, + byte* out, word32 outLen, struct RsaKey* key); + WOLFSSL_LOCAL int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen, + byte* out, word32* outLen, struct RsaKey* key); + WOLFSSL_LOCAL int NitroxRsaSSL_Sign(const byte* in, word32 inLen, + byte* out, word32 outLen, struct RsaKey* key); + WOLFSSL_LOCAL int NitroxRsaSSL_Verify(const byte* in, word32 inLen, + byte* out, word32 *outLen, struct RsaKey* key); +#endif /* !NO_RSA */ + +#if defined(HAVE_ECC) && defined(HAVE_CAVIUM_V) + struct ecc_key; + WOLFSSL_LOCAL int NitroxEccGetSize(struct ecc_key* key); + WOLFSSL_LOCAL int NitroxEccRsSplit(struct ecc_key* key, + struct WC_BIGINT* r, struct WC_BIGINT* s); + WOLFSSL_LOCAL int NitroxEccIsCurveSupported(struct ecc_key* key); + WOLFSSL_LOCAL int NitroxEccPad(struct WC_BIGINT* bi, word32 padTo); + #ifdef HAVE_ECC_DHE + WOLFSSL_LOCAL int NitroxEcdh(struct ecc_key* key, + struct WC_BIGINT* k, struct WC_BIGINT* xG, struct WC_BIGINT* yG, + byte* out, word32* outlen, struct WC_BIGINT* q); + #endif /* HAVE_ECC_DHE */ + #ifdef HAVE_ECC_SIGN + WOLFSSL_LOCAL int NitroxEcdsaSign(struct ecc_key* key, + struct WC_BIGINT* m, struct WC_BIGINT* d, + struct WC_BIGINT* k, + struct WC_BIGINT* r, struct WC_BIGINT* s, + struct WC_BIGINT* q, struct WC_BIGINT* n); + #endif /* HAVE_ECC_SIGN */ + #ifdef HAVE_ECC_VERIFY + WOLFSSL_LOCAL int NitroxEcdsaVerify(struct ecc_key* key, + struct WC_BIGINT* m, struct WC_BIGINT* xp, + struct WC_BIGINT* yp, struct WC_BIGINT* r, + struct WC_BIGINT* s, struct WC_BIGINT* q, + struct WC_BIGINT* n, int* stat); + #endif /* HAVE_ECC_VERIFY */ +#endif /* HAVE_ECC */ + +#ifndef NO_AES + struct Aes; + #ifdef HAVE_AES_CBC + WOLFSSL_LOCAL int NitroxAesCbcEncrypt(struct Aes* aes, byte* out, + const byte* in, word32 length); + #ifdef HAVE_AES_DECRYPT + WOLFSSL_LOCAL int NitroxAesCbcDecrypt(struct Aes* aes, byte* out, + const byte* in, word32 length); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + #ifdef HAVE_AESGCM + WOLFSSL_LOCAL int NitroxAesGcmEncrypt(struct Aes* aes, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); + #ifdef HAVE_AES_DECRYPT + WOLFSSL_LOCAL int NitroxAesGcmDecrypt(struct Aes* aes, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ + +#ifndef NO_RC4 + struct Arc4; + WOLFSSL_LOCAL int NitroxArc4SetKey(struct Arc4* arc4, const byte* key, + word32 length); + WOLFSSL_LOCAL int NitroxArc4Process(struct Arc4* arc4, byte* out, + const byte* in, word32 length); +#endif /* !NO_RC4 */ + +#ifndef NO_DES3 + struct Des3; + WOLFSSL_LOCAL int NitroxDes3SetKey(struct Des3* des3, const byte* key, + const byte* iv); + WOLFSSL_LOCAL int NitroxDes3CbcEncrypt(struct Des3* des3, byte* out, + const byte* in, word32 length); + WOLFSSL_LOCAL int NitroxDes3CbcDecrypt(struct Des3* des3, byte* out, + const byte* in, word32 length); +#endif /* !NO_DES3 */ + +#ifndef NO_HMAC + struct Hmac; + WOLFSSL_LOCAL int NitroxHmacUpdate(struct Hmac* hmac, const byte* msg, + word32 length); + WOLFSSL_LOCAL int NitroxHmacFinal(struct Hmac* hmac, byte* hash, + word16 hashLen); +#endif /* NO_HMAC */ + +struct WC_RNG; +WOLFSSL_API int NitroxRngGenerateBlock(struct WC_RNG* rng, byte* output, + word32 sz); + + +#endif /* HAVE_CAVIUM */ + +#endif /* _CAVIUM_NITROX_H_ */ diff --git a/wolfssl/wolfcrypt/port/intel/quickassist.h b/wolfssl/wolfcrypt/port/intel/quickassist.h new file mode 100644 index 000000000..b43832cd1 --- /dev/null +++ b/wolfssl/wolfcrypt/port/intel/quickassist.h @@ -0,0 +1,520 @@ +/* quickassist.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef _INTEL_QUICKASSIST_H_ +#define _INTEL_QUICKASSIST_H_ + +#ifdef HAVE_INTEL_QA + +#include "cpa.h" +#include "cpa_cy_im.h" +#include "cpa_cy_sym.h" +#include "cpa_cy_rsa.h" +#include "cpa_cy_ln.h" +#include "cpa_cy_ecdh.h" +#include "cpa_cy_ecdsa.h" +#include "cpa_cy_dh.h" +#include "cpa_cy_drbg.h" +#include "cpa_cy_nrbg.h" +#include "cpa_cy_prime.h" + +/* User space utils */ +#include +#include +#include +#include + +#if 0 + /* Optional feature for partial QAT hashing support */ + /* This will process updates through hardware instead of caching them */ + #define QAT_HASH_ENABLE_PARTIAL +#endif +#ifdef QAT_HASH_ENABLE_PARTIAL + #define MAX_QAT_HASH_BUFFERS 2 +#endif + +/* Detect QAT driver version */ +#if defined(CPA_CY_API_VERSION_NUM_MAJOR) && CPA_CY_API_VERSION_NUM_MAJOR > 1 + #define QAT_V2 + #if CPA_CY_API_VERSION_NUM_MAJOR > 2 || ( \ + CPA_CY_API_VERSION_NUM_MAJOR == 2 && ( \ + defined(CPA_CY_API_VERSION_NUM_MINOR) && \ + CPA_CY_API_VERSION_NUM_MINOR >= 4 \ + ) \ + ) + #define QAT_V2_4_PLUS + #endif +#endif + +#ifdef QAT_V2 + /* quickassist/utilities/libusdm_drv/qae_mem.h */ + /* Provides user-space API's for accessing NUMA allocated memory + * through usdm_drv */ + #include "qae_mem.h" +#endif + +#ifdef QAT_USE_POLLING_THREAD + #include +#endif +#ifdef QA_DEMO_MAIN + #include +#endif + + +/* Tunable parameters */ +#ifndef QAT_PROCESS_NAME + #define QAT_PROCESS_NAME "SSL" +#endif +#ifndef QAT_LIMIT_DEV_ACCESS + #define QAT_LIMIT_DEV_ACCESS CPA_FALSE +#endif +#ifndef QAT_MAX_DEVICES + #define QAT_MAX_DEVICES (1) /* maximum number of QAT cards */ +#endif +#ifndef QAT_MAX_PENDING + /* max num of concurrent ops */ + #ifdef WC_NO_ASYNC_THREADING + #define QAT_MAX_PENDING (40) + #elif defined(WC_ASYNC_BENCH_THREAD_COUNT) + #define QAT_MAX_PENDING ((40/WC_ASYNC_BENCH_THREAD_COUNT)*2) + #else + #define QAT_MAX_PENDING (15) + #endif +#endif +#ifndef QAT_RETRY_LIMIT + #define QAT_RETRY_LIMIT (100) +#endif +#ifndef QAT_POLL_RESP_QUOTA + #define QAT_POLL_RESP_QUOTA (0) /* all pending */ +#endif + +/* TODO: Tune this value to get best performance */ +#ifndef WC_ASYNC_THRESH_AES_CBC + #define WC_ASYNC_THRESH_AES_CBC 128 +#endif +#ifndef WC_ASYNC_THRESH_AES_GCM + #define WC_ASYNC_THRESH_AES_GCM 128 +#endif +#ifndef WC_ASYNC_THRESH_DES3_CBC + #define WC_ASYNC_THRESH_DES3_CBC 128 +#endif + +/* Macros */ +#define INVALID_STATUS -256 + + +#if !defined(NO_SHA256) || defined(WOLFSSL_SHA512) || \ + defined(WOLFSSL_SHA384) || !defined(NO_HMAC) || !defined(NO_MD5) || \ + defined(WOLFSSL_SHA224) + #define QAT_ENABLE_HASH +#endif +#if !defined(NO_AES) || !defined(NO_DES3) + #define QAT_ENABLE_CRYPTO +#endif +#if !defined(NO_RSA) || defined(HAVE_ECC) || !defined(NO_DH) + #define QAT_ENABLE_PKI +#endif + +/* QAT 1.7 does not support NRBG or DRBG */ +#if !defined(QAT_V2) && !defined(NO_QAT_RNG) + #define QAT_ENABLE_RNG +#endif + +/* Pre-declarations */ +struct WC_ASYNC_DEV; +struct WC_BIGINT; +struct IntelQaDev; +struct WC_RNG; + +#if defined(QAT_ENABLE_HASH) || defined(QAT_ENABLE_CRYPTO) +/* symmetric context */ +typedef struct IntelQaSymCtx { + CpaCySymOpData opData; + CpaCySymSessionCtx symCtxSrc; + CpaCySymSessionCtx symCtx; + word32 symCtxSize; + + /* flags */ + word32 isOpen:1; + word32 isCopy:1; +} IntelQaSymCtx; +#endif + +typedef void (*IntelQaFreeFunc)(struct WC_ASYNC_DEV*); + +#if defined(QAT_ENABLE_PKI) && !defined(NO_RSA) && defined (WOLFSSL_KEY_GEN) + #ifndef QAT_PRIME_GEN_TRIES + /* number of times to try generating a prime candidates + based on generated miller rabbin */ + #define QAT_PRIME_GEN_TRIES 100 + #endif + #ifndef QAT_PRIME_GEN_RETRIES + /* number of times to try new prime candidates */ + #define QAT_PRIME_GEN_RETRIES 1000 + #endif + #ifndef QAT_PRIME_GEN_MR_ROUNDS + /* Miller Rabbin Rounds */ + #define QAT_PRIME_GEN_MR_ROUNDS 2 + #endif + + enum { + QAT_PRIME_CHK_STATUS_INIT = 0, + QAT_PRIME_CHK_STATUS_FAILED, + QAT_PRIME_CHK_STATUS_PASSED, + QAT_PRIME_CHK_STATUS_ERROR, + }; +#endif + +#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL) + struct sp_int; + #define MATH_INT_T struct sp_int +#elif defined(USE_FAST_MATH) + struct fp_int; + #define MATH_INT_T struct fp_int +#else + struct mp_int; + #define MATH_INT_T struct mp_int +#endif + +/* QuickAssist device */ +typedef struct IntelQaDev { + CpaInstanceHandle handle; + int devId; + + /* callback return info */ + int ret; + byte* out; + union { + word32* outLenPtr; + word32 outLen; + }; + + /* operations */ + IntelQaFreeFunc freeFunc; + union { + #if defined(QAT_ENABLE_PKI) && !defined(NO_RSA) + #ifdef WOLFSSL_KEY_GEN + struct { + CpaCyPrimeTestOpData* opData; + CpaFlatBuffer* primeCandidates; + byte* pMillerRabinData; + byte testStatus[QAT_PRIME_GEN_TRIES]; + } prime_gen; + struct { + CpaCyRsaKeyGenOpData opData; + CpaCyRsaPrivateKey privateKey; + CpaCyRsaPublicKey publicKey; + struct RsaKey* rsakey; + } rsa_keygen; + #endif /* WOLFSSL_KEY_GEN */ + struct { + CpaCyRsaDecryptOpData opData; + CpaCyRsaPrivateKey privateKey; + CpaFlatBuffer outBuf; + } rsa_priv; + struct { + CpaCyRsaEncryptOpData opData; + CpaCyRsaPublicKey publicKey; + CpaFlatBuffer outBuf; + } rsa_pub; + struct { + CpaCyLnModExpOpData opData; + CpaFlatBuffer target; + } rsa_modexp; + #endif + #ifdef QAT_ENABLE_CRYPTO + struct { + IntelQaSymCtx ctx; + CpaBufferList bufferList; + CpaFlatBuffer flatBuffer; + byte* authTag; + word32 authTagSz; + byte* iv; + word32 ivSz; + } cipher; + #endif + #if defined(QAT_ENABLE_PKI) && defined(HAVE_ECC) + struct { + CpaCyEcPointMultiplyOpData opData; + CpaFlatBuffer pXk; + CpaFlatBuffer pYk; + CpaBoolean multiplyStatus; + + /* output pub */ + MATH_INT_T* pubX; + MATH_INT_T* pubY; + MATH_INT_T* pubZ; + } ecc_mul; + #ifdef HAVE_ECC_DHE + struct { + CpaCyEcdhPointMultiplyOpData opData; + CpaFlatBuffer pXk; + CpaFlatBuffer pYk; + CpaBoolean multiplyStatus; + } ecc_ecdh; + #endif + #ifdef HAVE_ECC_SIGN + struct { + CpaCyEcdsaSignRSOpData opData; + CpaFlatBuffer R; + CpaFlatBuffer S; + CpaBoolean signStatus; + + struct WC_BIGINT* pR; + struct WC_BIGINT* pS; + } ecc_sign; + #endif + #ifdef HAVE_ECC_VERIFY + struct { + CpaCyEcdsaVerifyOpData opData; + CpaBoolean verifyStatus; + int* stat; + } ecc_verify; + #endif + #endif /* HAVE_ECC */ + #ifdef QAT_ENABLE_HASH + struct { + IntelQaSymCtx ctx; + CpaBufferList* srcList; + /* tmp buffer to hold anything pending less than block size */ + byte* tmpIn; + word32 tmpInSz; + word32 tmpInBufSz; + + #ifdef QAT_HASH_ENABLE_PARTIAL + int bufferCount; + byte* buffers[MAX_QAT_HASH_BUFFERS]; + word32 buffersSz[MAX_QAT_HASH_BUFFERS]; + #endif + } hash; + #endif + #if defined(QAT_ENABLE_PKI) && !defined(NO_DH) + struct { + CpaCyDhPhase1KeyGenOpData opData; + CpaFlatBuffer pOut; + } dh_gen; + struct { + CpaCyDhPhase2SecretKeyGenOpData opData; + CpaFlatBuffer pOut; + } dh_agree; + #endif + #ifdef QAT_ENABLE_RNG + struct { + CpaCyDrbgGenOpData opData; + CpaCyDrbgSessionHandle handle; + CpaFlatBuffer pOut; + } drbg; + #endif + } op; + +#ifdef QAT_USE_POLLING_THREAD + pthread_t pollingThread; + byte pollingCy; +#endif +} IntelQaDev; + + +/* Interface */ +WOLFSSL_LOCAL int IntelQaHardwareStart(const char* process_name, + int limitDevAccess); +WOLFSSL_LOCAL void IntelQaHardwareStop(void); + +WOLFSSL_LOCAL int IntelQaInit(void* threadId); +WOLFSSL_LOCAL void IntelQaDeInit(int); + +WOLFSSL_LOCAL int IntelQaNumInstances(void); + +WOLFSSL_LOCAL int IntelQaOpen(struct WC_ASYNC_DEV* dev, int devId); +WOLFSSL_LOCAL void IntelQaClose(struct WC_ASYNC_DEV* dev); + +WOLFSSL_LOCAL int IntelQaDevCopy(struct WC_ASYNC_DEV* src, + struct WC_ASYNC_DEV* dst); + +WOLFSSL_LOCAL int IntelQaPoll(struct WC_ASYNC_DEV* dev); + +WOLFSSL_LOCAL int IntelQaGetCyInstanceCount(void); + +#ifndef NO_RSA + #ifdef WOLFSSL_KEY_GEN + WOLFSSL_LOCAL int IntelQaGenPrime(struct WC_ASYNC_DEV* dev, + struct WC_RNG* rng, byte* primeBuf, word32 primeSz); + WOLFSSL_LOCAL int IntelQaRsaKeyGen(struct WC_ASYNC_DEV* dev, + struct RsaKey* key, int keyBits, long e, + struct WC_RNG* rng); + #endif + WOLFSSL_LOCAL int IntelQaRsaPrivate(struct WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + struct WC_BIGINT* d, struct WC_BIGINT* n, + byte* out, word32* outLen); + WOLFSSL_LOCAL int IntelQaRsaCrtPrivate(struct WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + struct WC_BIGINT* p, struct WC_BIGINT* q, + struct WC_BIGINT* dP, struct WC_BIGINT* dQ, + struct WC_BIGINT* qInv, + byte* out, word32* outLen); + WOLFSSL_LOCAL int IntelQaRsaPublic(struct WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + struct WC_BIGINT* e, struct WC_BIGINT* n, + byte* out, word32* outLen); + WOLFSSL_LOCAL int IntelQaRsaExptMod(struct WC_ASYNC_DEV* dev, + const byte* in, word32 inLen, + struct WC_BIGINT* e, struct WC_BIGINT* n, + byte* out, word32* outLen); +#endif /* !NO_RSA */ + +#ifndef NO_AES + #ifdef HAVE_AES_CBC + WOLFSSL_LOCAL int IntelQaSymAesCbcEncrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz); + #ifdef HAVE_AES_DECRYPT + WOLFSSL_LOCAL int IntelQaSymAesCbcDecrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + #ifdef HAVE_AESGCM + WOLFSSL_LOCAL int IntelQaSymAesGcmEncrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); + #ifdef HAVE_AES_DECRYPT + WOLFSSL_LOCAL int IntelQaSymAesGcmDecrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ + +#ifndef NO_DES3 + WOLFSSL_LOCAL int IntelQaSymDes3CbcEncrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz); + WOLFSSL_LOCAL int IntelQaSymDes3CbcDecrypt(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz, + const byte* key, word32 keySz, + byte* iv, word32 ivSz); +#endif /*! NO_DES3 */ + +#ifdef WOLFSSL_SHA512 + WOLFSSL_LOCAL int IntelQaSymSha512(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); + + #ifdef WOLFSSL_SHA384 + WOLFSSL_LOCAL int IntelQaSymSha384(struct WC_ASYNC_DEV* dev, + byte* out, const byte* in, word32 sz); + #endif +#endif + +#ifndef NO_SHA256 + WOLFSSL_LOCAL int IntelQaSymSha256(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); + #ifdef WOLFSSL_SHA224 + WOLFSSL_LOCAL int IntelQaSymSha224(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); + #endif +#endif /* !NO_SHA256 */ + +#ifndef NO_SHA + WOLFSSL_LOCAL int IntelQaSymSha(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); +#endif /* !NO_SHA */ + +#ifndef NO_MD5 + WOLFSSL_LOCAL int IntelQaSymMd5(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); +#endif /* !NO_MD5 */ + +#if defined(WOLFSSL_SHA3) && defined(QAT_V2) + WOLFSSL_LOCAL int IntelQaSymSha3(struct WC_ASYNC_DEV* dev, byte* out, + const byte* in, word32 sz); +#endif + +#ifdef HAVE_ECC + #ifdef HAVE_ECC_DHE + WOLFSSL_LOCAL int IntelQaEccPointMul(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* k, MATH_INT_T* pubX, MATH_INT_T* pubY, + MATH_INT_T* pubZ, + struct WC_BIGINT* xG, struct WC_BIGINT* yG, + struct WC_BIGINT* a, struct WC_BIGINT* b, + struct WC_BIGINT* q, word32 cofactor); + WOLFSSL_LOCAL int IntelQaEcdh(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* k, struct WC_BIGINT* xG, + struct WC_BIGINT* yG, byte* out, word32* outlen, + struct WC_BIGINT* a, struct WC_BIGINT* b, + struct WC_BIGINT* q, word32 cofactor); + #endif /* HAVE_ECC_DHE */ + #ifdef HAVE_ECC_SIGN + WOLFSSL_LOCAL int IntelQaEcdsaSign(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* m, struct WC_BIGINT* d, + struct WC_BIGINT* k, + struct WC_BIGINT* r, struct WC_BIGINT* s, + struct WC_BIGINT* a, struct WC_BIGINT* b, + struct WC_BIGINT* q, struct WC_BIGINT* n, + struct WC_BIGINT* xg, struct WC_BIGINT* yg); + #endif /* HAVE_ECC_SIGN */ + #ifdef HAVE_ECC_VERIFY + WOLFSSL_LOCAL int IntelQaEcdsaVerify(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* m, struct WC_BIGINT* xp, + struct WC_BIGINT* yp, struct WC_BIGINT* r, + struct WC_BIGINT* s, struct WC_BIGINT* a, + struct WC_BIGINT* b, struct WC_BIGINT* q, + struct WC_BIGINT* n, struct WC_BIGINT* xg, + struct WC_BIGINT* yg, int* stat); + #endif /* HAVE_ECC_VERIFY */ +#endif /* HAVE_ECC */ + +#ifndef NO_DH + WOLFSSL_LOCAL int IntelQaDhKeyGen(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* p, struct WC_BIGINT* g, struct WC_BIGINT* x, + byte* pub, word32* pubSz); + + + WOLFSSL_LOCAL int IntelQaDhAgree(struct WC_ASYNC_DEV* dev, + struct WC_BIGINT* p, + byte* agree, word32* agreeSz, + const byte* priv, word32 privSz, + const byte* otherPub, word32 pubSz); +#endif /* !NO_DH */ + +#ifndef NO_HMAC + WOLFSSL_LOCAL int IntelQaHmacGetType(int macType, word32* hashAlgorithm); + WOLFSSL_LOCAL int IntelQaHmac(struct WC_ASYNC_DEV* dev, + int macType, byte* keyRaw, word16 keyLen, + byte* out, const byte* in, word32 sz); +#endif /* !NO_HMAC */ + +WOLFSSL_LOCAL int IntelQaDrbg(struct WC_ASYNC_DEV* dev, byte* rngBuf, + word32 rngSz); +WOLFSSL_LOCAL int IntelQaNrbg(CpaFlatBuffer* pBuffer, Cpa32U length); + +#endif /* HAVE_INTEL_QA */ + +#endif /* _INTEL_QUICKASSIST_H_ */ diff --git a/wolfssl/wolfcrypt/port/intel/quickassist_mem.h b/wolfssl/wolfcrypt/port/intel/quickassist_mem.h new file mode 100644 index 000000000..cfdfb3b63 --- /dev/null +++ b/wolfssl/wolfcrypt/port/intel/quickassist_mem.h @@ -0,0 +1,64 @@ +/* quickassist_mem.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef _QUICKASSIST_MEM_H_ +#define _QUICKASSIST_MEM_H_ + +#ifdef HAVE_INTEL_QA + +#include + +CpaStatus qaeMemInit(void); +void qaeMemDestroy(void); + +#ifndef QAT_V2 + #define QAE_PHYS_ADDR CpaPhysicalAddr + WOLFSSL_LOCAL QAE_PHYS_ADDR qaeVirtToPhysNUMA(void* pVirtAddress); +#endif + + +#ifdef WOLFSSL_TRACK_MEMORY + WOLFSSL_API int InitMemoryTracker(void); + WOLFSSL_API void ShowMemoryTracker(void); +#endif + + +WOLFSSL_API void* IntelQaMalloc(size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +); + +WOLFSSL_API void IntelQaFree(void *ptr, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +); + +WOLFSSL_API void* IntelQaRealloc(void *ptr, size_t size, void* heap, int type +#ifdef WOLFSSL_DEBUG_MEMORY + , const char* func, unsigned int line +#endif +); + +#endif /* HAVE_INTEL_QA */ + +#endif /* _QUICKASSIST_MEM_H_ */