Skip to content

Commit bfd52de

Browse files
committed
Performance improvements
AES-GCM: don't generate M0 when using assembly unless falling back to C and then use new assembly code. HMAC: add option to copy hashes (--enable-hash-copy -DWOLFSSL_HMAC_COPY_HASH) to improve performance when using the same key for multiple operations.
1 parent 8f131ff commit bfd52de

8 files changed

Lines changed: 1912 additions & 164 deletions

File tree

.wolfssl_known_macro_extras

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ WOLFSSL_HARDEN_TLS_ALLOW_OLD_TLS
648648
WOLFSSL_HARDEN_TLS_ALLOW_TRUNCATED_HMAC
649649
WOLFSSL_HARDEN_TLS_NO_PKEY_CHECK
650650
WOLFSSL_HARDEN_TLS_NO_SCR_CHECK
651+
WOLFSSL_HMAC_COPY_HASH
651652
WOLFSSL_HOSTNAME_VERIFY_ALT_NAME_ONLY
652653
WOLFSSL_I2D_ECDSA_SIG_ALLOC
653654
WOLFSSL_IAR_ARM_TIME

configure.ac

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,25 @@ AC_ARG_ENABLE([hmac],
295295
[ ENABLED_HMAC=yes ]
296296
)
297297

298+
# enable HMAC hash copying automatically for x86_64 and aarch64 (except Linux kernel module)
299+
HMAC_COPY_DEFAULT=no
300+
if test "$ENABLED_LINUXKM_DEFAULTS" = "no"
301+
then
302+
if test "$host_cpu" = "x86_64" || test "$host_cpu" = "aarch64" || test "$host_cpu" = "amd64"
303+
then
304+
HMAC_COPY_DEFAULT=yes
305+
fi
306+
fi
307+
AC_ARG_ENABLE([hmac-copy],
308+
[AS_HELP_STRING([--enable-hmac-copy],[Enables digest copying implementation for HMAC (default: disabled)])],
309+
[ ENABLED_HMAC_COPY=$enableval ],
310+
[ ENABLED_HMAC_COPY=$HMAC_COPY_DEFAULT ]
311+
)
312+
if test "$ENABLED_HMAC_COPY" = "yes"
313+
then
314+
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HMAC_COPY_HASH"
315+
fi
316+
298317
AC_ARG_ENABLE([do178],
299318
[AS_HELP_STRING([--enable-do178],[Enable DO-178, Will NOT work w/o DO178 license (default: disabled)])],
300319
[ENABLED_DO178=$enableval],

wolfcrypt/benchmark/benchmark.c

Lines changed: 117 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ static int lng_index = 0;
11931193

11941194
#ifndef NO_MAIN_DRIVER
11951195
#ifndef MAIN_NO_ARGS
1196-
static const char* bench_Usage_msg1[][25] = {
1196+
static const char* bench_Usage_msg1[][27] = {
11971197
/* 0 English */
11981198
{ "-? <num> Help, print this usage\n",
11991199
" 0: English, 1: Japanese\n",
@@ -1207,6 +1207,8 @@ static const char* bench_Usage_msg1[][25] = {
12071207
" (if set via -aad_size) <aad_size> bytes.\n"
12081208
),
12091209
"-dgst_full Full digest operation performed.\n",
1210+
"-mac_final MAC update and final operation timed.\n",
1211+
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
12101212
"-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
12111213
"<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
12121214
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@@ -1240,6 +1242,8 @@ static const char* bench_Usage_msg1[][25] = {
12401242
"-aad_size <num> TBD.\n",
12411243
"-all_aad TBD.\n",
12421244
"-dgst_full フルの digest 暗号操作を実施します。\n",
1245+
"-mac_final MAC update and final operation timed.\n",
1246+
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
12431247
"-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
12441248
"<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
12451249
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@@ -2056,6 +2060,8 @@ static int numBlocks = NUM_BLOCKS;
20562060
static word32 bench_size = BENCH_SIZE;
20572061
static int base2 = 1;
20582062
static int digest_stream = 1;
2063+
static int mac_stream = 1;
2064+
static int aead_set_key = 0;
20592065
#ifdef HAVE_CHACHA
20602066
static int encrypt_only = 0;
20612067
#endif
@@ -4505,10 +4511,12 @@ static void bench_aesgcm_internal(int useDeviceID,
45054511
goto exit;
45064512
}
45074513

4508-
ret = wc_AesGcmSetKey(enc[i], key, keySz);
4509-
if (ret != 0) {
4510-
printf("AesGcmSetKey failed, ret = %d\n", ret);
4511-
goto exit;
4514+
if (!aead_set_key) {
4515+
ret = wc_AesGcmSetKey(enc[i], key, keySz);
4516+
if (ret != 0) {
4517+
printf("AesGcmSetKey failed, ret = %d\n", ret);
4518+
goto exit;
4519+
}
45124520
}
45134521
}
45144522

@@ -4522,6 +4530,14 @@ static void bench_aesgcm_internal(int useDeviceID,
45224530
for (i = 0; i < BENCH_MAX_PENDING; i++) {
45234531
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(enc[i]), 0,
45244532
&times, numBlocks, &pending)) {
4533+
if (aead_set_key) {
4534+
ret = wc_AesGcmSetKey(enc[i], key, keySz);
4535+
if (!bench_async_handle(&ret,
4536+
BENCH_ASYNC_GET_DEV(enc[i]), 0,
4537+
&times, &pending)) {
4538+
goto exit_aes_gcm;
4539+
}
4540+
}
45254541
ret = wc_AesGcmEncrypt(enc[i], bench_cipher,
45264542
bench_plain, bench_size,
45274543
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@@ -4560,10 +4576,12 @@ static void bench_aesgcm_internal(int useDeviceID,
45604576
goto exit;
45614577
}
45624578

4563-
ret = wc_AesGcmSetKey(dec[i], key, keySz);
4564-
if (ret != 0) {
4565-
printf("AesGcmSetKey failed, ret = %d\n", ret);
4566-
goto exit;
4579+
if (!aead_set_key) {
4580+
ret = wc_AesGcmSetKey(dec[i], key, keySz);
4581+
if (ret != 0) {
4582+
printf("AesGcmSetKey failed, ret = %d\n", ret);
4583+
goto exit;
4584+
}
45674585
}
45684586
}
45694587

@@ -4576,6 +4594,14 @@ static void bench_aesgcm_internal(int useDeviceID,
45764594
for (i = 0; i < BENCH_MAX_PENDING; i++) {
45774595
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(dec[i]), 0,
45784596
&times, numBlocks, &pending)) {
4597+
if (aead_set_key) {
4598+
ret = wc_AesGcmSetKey(dec[i], key, keySz);
4599+
if (!bench_async_handle(&ret,
4600+
BENCH_ASYNC_GET_DEV(dec[i]), 0,
4601+
&times, &pending)) {
4602+
goto exit_aes_gcm_dec;
4603+
}
4604+
}
45794605
ret = wc_AesGcmDecrypt(dec[i], bench_plain,
45804606
bench_cipher, bench_size,
45814607
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@@ -8300,50 +8326,89 @@ static void bench_hmac(int useDeviceID, int type, int digestSz,
83008326
}
83018327
}
83028328

8303-
bench_stats_start(&count, &start);
8304-
do {
8305-
for (times = 0; times < numBlocks || pending > 0; ) {
8306-
bench_async_poll(&pending);
8329+
if (mac_stream) {
8330+
bench_stats_start(&count, &start);
8331+
do {
8332+
for (times = 0; times < numBlocks || pending > 0; ) {
8333+
bench_async_poll(&pending);
83078334

8308-
/* while free pending slots in queue, submit ops */
8309-
for (i = 0; i < BENCH_MAX_PENDING; i++) {
8310-
if (bench_async_check(&ret,
8311-
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8312-
&times, numBlocks, &pending)) {
8313-
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
8314-
if (!bench_async_handle(&ret,
8315-
BENCH_ASYNC_GET_DEV(hmac[i]),
8316-
0, &times, &pending)) {
8317-
goto exit_hmac;
8335+
/* while free pending slots in queue, submit ops */
8336+
for (i = 0; i < BENCH_MAX_PENDING; i++) {
8337+
if (bench_async_check(&ret,
8338+
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8339+
&times, numBlocks, &pending)) {
8340+
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
8341+
if (!bench_async_handle(&ret,
8342+
BENCH_ASYNC_GET_DEV(hmac[i]),
8343+
0, &times, &pending)) {
8344+
goto exit_hmac;
8345+
}
83188346
}
8319-
}
8320-
} /* for i */
8321-
} /* for times */
8322-
count += times;
8347+
} /* for i */
8348+
} /* for times */
8349+
count += times;
8350+
8351+
times = 0;
8352+
do {
8353+
bench_async_poll(&pending);
83238354

8324-
times = 0;
8355+
for (i = 0; i < BENCH_MAX_PENDING; i++) {
8356+
if (bench_async_check(&ret,
8357+
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8358+
&times, numBlocks, &pending)) {
8359+
ret = wc_HmacFinal(hmac[i], digest[i]);
8360+
if (!bench_async_handle(&ret,
8361+
BENCH_ASYNC_GET_DEV(hmac[i]),
8362+
0, &times, &pending)) {
8363+
goto exit_hmac;
8364+
}
8365+
}
8366+
RECORD_MULTI_VALUE_STATS();
8367+
} /* for i */
8368+
} while (pending > 0);
8369+
} while (bench_stats_check(start)
8370+
#ifdef MULTI_VALUE_STATISTICS
8371+
|| runs < minimum_runs
8372+
#endif
8373+
);
8374+
}
8375+
else {
8376+
bench_stats_start(&count, &start);
83258377
do {
8326-
bench_async_poll(&pending);
8378+
for (times = 0; times < numBlocks || pending > 0; ) {
8379+
bench_async_poll(&pending);
83278380

8328-
for (i = 0; i < BENCH_MAX_PENDING; i++) {
8329-
if (bench_async_check(&ret,
8330-
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8331-
&times, numBlocks, &pending)) {
8332-
ret = wc_HmacFinal(hmac[i], digest[i]);
8333-
if (!bench_async_handle(&ret,
8334-
BENCH_ASYNC_GET_DEV(hmac[i]),
8335-
0, &times, &pending)) {
8336-
goto exit_hmac;
8381+
/* while free pending slots in queue, submit ops */
8382+
for (i = 0; i < BENCH_MAX_PENDING; i++) {
8383+
if (bench_async_check(&ret,
8384+
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8385+
&times, numBlocks, &pending)) {
8386+
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
8387+
if (!bench_async_handle(&ret,
8388+
BENCH_ASYNC_GET_DEV(hmac[i]),
8389+
0, &times, &pending)) {
8390+
goto exit_hmac;
8391+
}
83378392
}
8338-
}
8339-
RECORD_MULTI_VALUE_STATS();
8340-
} /* for i */
8341-
} while (pending > 0);
8342-
} while (bench_stats_check(start)
8343-
#ifdef MULTI_VALUE_STATISTICS
8344-
|| runs < minimum_runs
8345-
#endif
8346-
);
8393+
if (bench_async_check(&ret,
8394+
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
8395+
&times, numBlocks, &pending)) {
8396+
ret = wc_HmacFinal(hmac[i], digest[i]);
8397+
if (!bench_async_handle(&ret,
8398+
BENCH_ASYNC_GET_DEV(hmac[i]),
8399+
0, &times, &pending)) {
8400+
goto exit_hmac;
8401+
}
8402+
}
8403+
} /* for i */
8404+
} /* for times */
8405+
count += times;
8406+
} while (bench_stats_check(start)
8407+
#ifdef MULTI_VALUE_STATISTICS
8408+
|| runs < minimum_runs
8409+
#endif
8410+
);
8411+
}
83478412

83488413
exit_hmac:
83498414
bench_stats_sym_finish(label, useDeviceID, count, bench_size, start, ret);
@@ -14989,6 +15054,7 @@ static void Usage(void)
1498915054
e += 3;
1499015055
#endif
1499115056
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -dgst_full */
15057+
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -mca_final */
1499215058
#ifndef NO_RSA
1499315059
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -ras_sign */
1499415060
#ifdef WOLFSSL_KEY_GEN
@@ -15186,6 +15252,10 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
1518615252
#endif
1518715253
else if (string_matches(argv[1], "-dgst_full"))
1518815254
digest_stream = 0;
15255+
else if (string_matches(argv[1], "-mac_final"))
15256+
mac_stream = 0;
15257+
else if (string_matches(argv[1], "-aead_set_key"))
15258+
aead_set_key = 1;
1518915259
#ifdef HAVE_CHACHA
1519015260
else if (string_matches(argv[1], "-enc_only"))
1519115261
encrypt_only = 1;

wolfcrypt/src/aes.c

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6633,6 +6633,25 @@ void GenerateM0(Gcm* gcm)
66336633

66346634
#endif /* GCM_TABLE */
66356635

6636+
#if defined(WOLFSSL_AESNI) && defined(USE_INTEL_SPEEDUP)
6637+
#define HAVE_INTEL_AVX1
6638+
#define HAVE_INTEL_AVX2
6639+
#endif
6640+
6641+
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
6642+
defined(WC_C_DYNAMIC_FALLBACK)
6643+
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
6644+
XASM_LINK("GCM_generate_m0_aesni");
6645+
#ifdef HAVE_INTEL_AVX1
6646+
void GCM_generate_m0_avx1(const unsigned char *h, unsigned char *m)
6647+
XASM_LINK("GCM_generate_m0_avx1");
6648+
#endif
6649+
#ifdef HAVE_INTEL_AVX2
6650+
void GCM_generate_m0_avx2(const unsigned char *h, unsigned char *m)
6651+
XASM_LINK("GCM_generate_m0_avx2");
6652+
#endif
6653+
#endif /* WOLFSSL_AESNI && GCM_TABLE_4BIT && WC_C_DYNAMIC_FALLBACK */
6654+
66366655
/* Software AES - GCM SetKey */
66376656
int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
66386657
{
@@ -6702,9 +6721,33 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
67026721
VECTOR_REGISTERS_POP;
67036722
}
67046723
if (ret == 0) {
6705-
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
6706-
GenerateM0(&aes->gcm);
6707-
#endif /* GCM_TABLE */
6724+
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
6725+
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
6726+
if (aes->use_aesni) {
6727+
#if defined(WC_C_DYNAMIC_FALLBACK)
6728+
#ifdef HAVE_INTEL_AVX2
6729+
if (IS_INTEL_AVX2(intel_flags)) {
6730+
GCM_generate_m0_avx2(aes->gcm.H, (byte*)aes->gcm.M0);
6731+
}
6732+
else
6733+
#endif
6734+
#if defined(HAVE_INTEL_AVX1)
6735+
if (IS_INTEL_AVX1(intel_flags)) {
6736+
GCM_generate_m0_avx1(aes->gcm.H, (byte*)aes->gcm.M0);
6737+
}
6738+
else
6739+
#endif
6740+
{
6741+
GCM_generate_m0_aesni(aes->gcm.H, (byte*)aes->gcm.M0);
6742+
}
6743+
#endif
6744+
}
6745+
else
6746+
#endif
6747+
{
6748+
GenerateM0(&aes->gcm);
6749+
}
6750+
#endif /* GCM_TABLE || GCM_TABLE_4BIT */
67086751
}
67096752
#endif /* FREESCALE_LTC_AES_GCM */
67106753

@@ -6727,11 +6770,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
67276770

67286771
#ifdef WOLFSSL_AESNI
67296772

6730-
#if defined(USE_INTEL_SPEEDUP)
6731-
#define HAVE_INTEL_AVX1
6732-
#define HAVE_INTEL_AVX2
6733-
#endif /* USE_INTEL_SPEEDUP */
6734-
67356773
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
67366774
const unsigned char* addt, const unsigned char* ivec,
67376775
unsigned char *tag, word32 nbytes,

0 commit comments

Comments
 (0)