diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index cee1257e3..bcb81990d 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -7839,10 +7839,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 64; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -7850,7 +7850,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -19407,10 +19407,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 96; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -19418,7 +19418,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -72049,10 +72049,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 128; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -72060,7 +72060,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -73192,7 +73192,7 @@ static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -73579,10 +73579,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 8; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -73590,7 +73590,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -81204,7 +81204,7 @@ static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -81458,10 +81458,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 12; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -81469,7 +81469,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index e58a05803..345e287ad 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -5235,10 +5235,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 32; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -5246,7 +5246,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -12907,10 +12907,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 48; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -12918,7 +12918,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -17806,10 +17806,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 64; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -17817,7 +17817,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -18568,7 +18568,7 @@ static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -18787,10 +18787,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 4; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -18798,7 +18798,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -36352,7 +36352,7 @@ static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -36624,10 +36624,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 6; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -36635,7 +36635,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 90dd61709..9230304e3 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -4545,10 +4545,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 64; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -4556,7 +4556,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -10288,10 +10288,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 96; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -10299,7 +10299,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -14691,10 +14691,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 128; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -14702,7 +14702,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -15852,7 +15852,7 @@ static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -16075,10 +16075,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 8; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -16086,7 +16086,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -21937,7 +21937,7 @@ static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -22191,10 +22191,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 12; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -22202,7 +22202,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 61cb33fab..0d06cffcc 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -3652,10 +3652,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 90; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 23) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -3663,7 +3663,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 23 - s; @@ -7489,10 +7489,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 134; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 23) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -7500,7 +7500,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 23 - s; @@ -11490,10 +11490,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 196; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 21) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -11501,7 +11501,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 21 - s; @@ -12454,7 +12454,7 @@ static int sp_256_point_new_ex_10(void* heap, sp_point_256* sp, sp_point_256** p #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -12742,10 +12742,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 10; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 26) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -12753,7 +12753,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 26 - s; @@ -17779,7 +17779,7 @@ static int sp_384_point_new_ex_15(void* heap, sp_point_384* sp, sp_point_384** p #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -18108,10 +18108,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 15; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 26) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -18119,7 +18119,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 26 - s; diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index d41cfe1a6..6335f60a9 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -3292,10 +3292,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 36; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 57) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -3303,7 +3303,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 57 - s; @@ -7403,10 +7403,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 54; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 57) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -7414,7 +7414,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 57 - s; @@ -11759,10 +11759,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 78; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 53) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -11770,7 +11770,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 53 - s; @@ -12488,7 +12488,7 @@ static int sp_256_point_new_ex_5(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -12759,10 +12759,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 5; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 52) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -12770,7 +12770,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 52 - s; @@ -17566,7 +17566,7 @@ static int sp_384_point_new_ex_7(void* heap, sp_point_384* sp, sp_point_384** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -17868,10 +17868,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 7; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 55) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -17879,7 +17879,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 55 - s; diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index df4a8e77e..133ae6922 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -4297,10 +4297,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 64; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -4308,7 +4308,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -8889,10 +8889,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 96; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -8900,7 +8900,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -12428,10 +12428,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 128; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -12439,7 +12439,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -13587,7 +13587,7 @@ static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -13810,10 +13810,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 8; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -13821,7 +13821,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; @@ -20048,7 +20048,7 @@ static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -20302,10 +20302,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 12; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 32) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -20313,7 +20313,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 32 - s; diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index fd66629e3..6b46e7fd3 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -1650,10 +1650,10 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 32; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -1661,7 +1661,7 @@ static int sp_2048_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -3704,10 +3704,10 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 48; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -3715,7 +3715,7 @@ static int sp_3072_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -5212,10 +5212,10 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 64; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -5223,7 +5223,7 @@ static int sp_4096_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -5693,7 +5693,7 @@ static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -5912,10 +5912,10 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 4; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -5923,7 +5923,7 @@ static int sp_256_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; @@ -23529,7 +23529,7 @@ static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p) #else *p = sp; #endif - if (p == NULL) { + if (*p == NULL) { ret = MEMORY_E; } return ret; @@ -23801,10 +23801,10 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[0] = 0; for (i = 0; i < 6; i++) { - r->dp[j] |= a[i] << s; + r->dp[j] |= (mp_digit)(a[i] << s); r->dp[j] &= (1L << DIGIT_BIT) - 1; s = DIGIT_BIT - s; - r->dp[++j] = a[i] >> s; + r->dp[++j] = (mp_digit)(a[i] >> s); while (s + DIGIT_BIT <= 64) { s += DIGIT_BIT; r->dp[j++] &= (1L << DIGIT_BIT) - 1; @@ -23812,7 +23812,7 @@ static int sp_384_to_mp(const sp_digit* a, mp_int* r) r->dp[j] = 0; } else { - r->dp[j] = a[i] >> s; + r->dp[j] = (mp_digit)(a[i] >> s); } } s = 64 - s; diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index b01c6276f..c6941f1f0 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -37530,13 +37530,13 @@ _sp_256_mont_mul_4: sbbq $0, %r9 # a -= (mu << 32) << 192 subq %rax, %r12 - movq $4294967295, %rax sbbq %rsi, %r13 - movq $18446744069414584321, %rsi sbbq %r8, %r14 sbbq %rdx, %r15 sbbq %rcx, %rbx adcq $0, %r9 + movq $4294967295, %rax + movq $18446744069414584321, %rsi # mask m and sub from result if overflow # m[0] = -1 & mask = mask andq %r9, %rax @@ -37709,13 +37709,13 @@ _sp_256_mont_sqr_4: sbbq $0, %r9 # a -= (mu << 32) << 192 subq %rax, %r12 - movq $4294967295, %rax sbbq %rsi, %r13 - movq $18446744069414584321, %rsi sbbq %r8, %r14 sbbq %rdx, %r15 sbbq %rcx, %rbx adcq $0, %r9 + movq $4294967295, %rax + movq $18446744069414584321, %rsi # mask m and sub from result if overflow # m[0] = -1 & mask = mask andq %r9, %rax @@ -38009,17 +38009,17 @@ _sp_256_mont_add_4: movq 8(%rsi), %rcx movq 16(%rsi), %r8 movq 24(%rsi), %r9 + movq $4294967295, %r10 + movq $18446744069414584321, %r11 addq (%rdx), %rax adcq 8(%rdx), %rcx - movq $4294967295, %r10 adcq 16(%rdx), %r8 - movq $18446744069414584321, %r11 + movq $0, %rsi adcq 24(%rdx), %r9 - movq $0, %rdx - sbbq $0, %rdx - andq %rdx, %r10 - andq %rdx, %r11 - subq %rdx, %rax + sbbq $0, %rsi + andq %rsi, %r10 + andq %rsi, %r11 + subq %rsi, %rax sbbq %r10, %rcx movq %rax, (%rdi) sbbq $0, %r8 @@ -38051,13 +38051,13 @@ _sp_256_mont_dbl_4: movq 8(%rsi), %rax movq 16(%rsi), %rcx movq 24(%rsi), %r8 - xorq %r11, %r11 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 addq %rdx, %rdx adcq %rax, %rax - movq $4294967295, %r9 adcq %rcx, %rcx + movq $0, %r11 adcq %r8, %r8 - movq $18446744069414584321, %r10 sbbq $0, %r11 andq %r11, %r9 andq %r11, %r10 @@ -38093,13 +38093,13 @@ _sp_256_mont_tpl_4: movq 8(%rsi), %rax movq 16(%rsi), %rcx movq 24(%rsi), %r8 - xorq %r11, %r11 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 addq %rdx, %rdx adcq %rax, %rax - movq $4294967295, %r9 adcq %rcx, %rcx + movq $0, %r11 adcq %r8, %r8 - movq $18446744069414584321, %r10 sbbq $0, %r11 andq %r11, %r9 andq %r11, %r10 @@ -38107,13 +38107,13 @@ _sp_256_mont_tpl_4: sbbq %r9, %rax sbbq $0, %rcx sbbq %r10, %r8 - xorq %r11, %r11 + movq $4294967295, %r9 + movq $18446744069414584321, %r10 addq (%rsi), %rdx adcq 8(%rsi), %rax - movq $4294967295, %r9 adcq 16(%rsi), %rcx + movq $0, %r11 adcq 24(%rsi), %r8 - movq $18446744069414584321, %r10 sbbq $0, %r11 andq %r11, %r9 andq %r11, %r10 @@ -38150,17 +38150,17 @@ _sp_256_mont_sub_4: movq 8(%rsi), %rcx movq 16(%rsi), %r8 movq 24(%rsi), %r9 + movq $4294967295, %r10 + movq $18446744069414584321, %r11 subq (%rdx), %rax sbbq 8(%rdx), %rcx - movq $4294967295, %r10 sbbq 16(%rdx), %r8 - movq $18446744069414584321, %r11 + movq $0, %rsi sbbq 24(%rdx), %r9 - movq $0, %rdx - sbbq $0, %rdx - andq %rdx, %r10 - andq %rdx, %r11 - addq %rdx, %rax + sbbq $0, %rsi + andq %rsi, %r10 + andq %rsi, %r11 + addq %rsi, %rax adcq %r10, %rcx movq %rax, (%rdi) adcq $0, %r8 @@ -38370,13 +38370,13 @@ _sp_256_mont_mul_avx2_4: sbbq $0, %r8 # a -= (mu << 32) << 192 subq %rax, %r11 - movq $4294967295, %rax sbbq %rsi, %r12 - movq $18446744069414584321, %rsi sbbq %rbp, %r13 sbbq %rdx, %r14 sbbq %rcx, %r15 adcq $0, %r8 + movq $4294967295, %rax + movq $18446744069414584321, %rsi # mask m and sub from result if overflow # m[0] = -1 & mask = mask andq %r8, %rax @@ -38424,25 +38424,26 @@ _sp_256_mont_sqr_avx2_4: push %rbx # A[0] * A[1] movq (%rsi), %rdx + movq 16(%rsi), %r15 mulxq 8(%rsi), %r9, %r10 # A[0] * A[3] mulxq 24(%rsi), %r11, %r12 # A[2] * A[1] - movq 16(%rsi), %rdx + movq %r15, %rdx mulxq 8(%rsi), %rcx, %rbx - xorq %r15, %r15 - adoxq %rcx, %r11 # A[2] * A[3] mulxq 24(%rsi), %r13, %r14 + xorq %r15, %r15 + adoxq %rcx, %r11 adoxq %rbx, %r12 # A[2] * A[0] mulxq (%rsi), %rcx, %rbx - adoxq %r15, %r13 - adcxq %rcx, %r10 - adoxq %r15, %r14 # A[1] * A[3] movq 8(%rsi), %rdx + adoxq %r15, %r13 mulxq 24(%rsi), %rax, %r8 + adcxq %rcx, %r10 + adoxq %r15, %r14 adcxq %rbx, %r11 adcxq %rax, %r12 adcxq %r8, %r13 @@ -38497,7 +38498,7 @@ _sp_256_mont_sqr_avx2_4: adcq %r10, %rdx # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu # a += mu << 256 - movq $0, %r8 + xorq %r8, %r8 addq %rax, %r12 adcq %rsi, %r13 adcq %rcx, %r14 @@ -38526,13 +38527,13 @@ _sp_256_mont_sqr_avx2_4: sbbq $0, %r8 # a -= (mu << 32) << 192 subq %rax, %r11 - movq $4294967295, %rax sbbq %rsi, %r12 - movq $18446744069414584321, %rsi sbbq %rcx, %r13 sbbq %rdx, %r14 sbbq %rbx, %r15 adcq $0, %r8 + movq $4294967295, %rax + movq $18446744069414584321, %rsi # mask m and sub from result if overflow # m[0] = -1 & mask = mask andq %r8, %rax @@ -38876,102 +38877,103 @@ sp_256_mul_avx2_4: .p2align 4 _sp_256_mul_avx2_4: #endif /* __APPLE__ */ + push %rbx + push %rbp push %r12 push %r13 push %r14 push %r15 - push %rbx - movq %rdx, %rax - # A[0] * B[0] - movq (%rax), %rdx - mulxq (%rsi), %r9, %r10 - # A[2] * B[0] - mulxq 16(%rsi), %r11, %r12 - # A[1] * B[0] - mulxq 8(%rsi), %rcx, %r8 - xorq %rbx, %rbx + movq %rdx, %rbp + # A[0] * B[0] + movq (%rbp), %rdx + mulxq (%rsi), %r8, %r9 + # A[2] * B[0] + mulxq 16(%rsi), %r10, %r11 + # A[1] * B[0] + mulxq 8(%rsi), %rax, %rcx + xorq %r15, %r15 + adcxq %rax, %r9 + # A[1] * B[3] + movq 24(%rbp), %rdx + mulxq 8(%rsi), %r12, %r13 adcxq %rcx, %r10 - # A[1] * B[3] - movq 24(%rax), %rdx - mulxq 8(%rsi), %r13, %r14 - adcxq %r8, %r11 - # A[0] * B[1] - movq 8(%rax), %rdx - mulxq (%rsi), %rcx, %r8 + # A[0] * B[1] + movq 8(%rbp), %rdx + mulxq (%rsi), %rax, %rcx + adoxq %rax, %r9 + # A[2] * B[1] + mulxq 16(%rsi), %rax, %r14 adoxq %rcx, %r10 - # A[2] * B[1] - mulxq 16(%rsi), %rcx, %r15 - adoxq %r8, %r11 - adcxq %rcx, %r12 - # A[1] * B[2] - movq 16(%rax), %rdx - mulxq 8(%rsi), %rcx, %r8 + adcxq %rax, %r11 + # A[1] * B[2] + movq 16(%rbp), %rdx + mulxq 8(%rsi), %rax, %rcx + adcxq %r14, %r12 + adoxq %rax, %r11 adcxq %r15, %r13 adoxq %rcx, %r12 - adcxq %rbx, %r14 - adoxq %r8, %r13 - # A[0] * B[2] - mulxq (%rsi), %rcx, %r8 - adoxq %rbx, %r14 - xorq %r15, %r15 + # A[0] * B[2] + mulxq (%rsi), %rax, %rcx + adoxq %r15, %r13 + xorq %r14, %r14 + adcxq %rax, %r10 + # A[1] * B[1] + movq 8(%rbp), %rdx + mulxq 8(%rsi), %rdx, %rax adcxq %rcx, %r11 - # A[1] * B[1] - movq 8(%rax), %rdx - mulxq 8(%rsi), %rdx, %rcx - adcxq %r8, %r12 - adoxq %rdx, %r11 - # A[3] * B[1] - movq 8(%rax), %rdx - adoxq %rcx, %r12 - mulxq 24(%rsi), %rcx, %r8 + adoxq %rdx, %r10 + # A[3] * B[1] + movq 8(%rbp), %rdx + adoxq %rax, %r11 + mulxq 24(%rsi), %rax, %rcx + adcxq %rax, %r12 + # A[2] * B[2] + movq 16(%rbp), %rdx + mulxq 16(%rsi), %rdx, %rax adcxq %rcx, %r13 - # A[2] * B[2] - movq 16(%rax), %rdx - mulxq 16(%rsi), %rdx, %rcx - adcxq %r8, %r14 - adoxq %rdx, %r13 - # A[3] * B[3] - movq 24(%rax), %rdx - adoxq %rcx, %r14 - mulxq 24(%rsi), %rcx, %r8 - adoxq %rbx, %r15 - adcxq %rcx, %r15 - # A[0] * B[3] - mulxq (%rsi), %rdx, %rcx - adcxq %r8, %rbx - xorq %r8, %r8 - adcxq %rdx, %r12 - # A[3] * B[0] - movq (%rax), %rdx - adcxq %rcx, %r13 - mulxq 24(%rsi), %rdx, %rcx adoxq %rdx, %r12 - adoxq %rcx, %r13 - # A[2] * B[3] - movq 24(%rax), %rdx - mulxq 16(%rsi), %rdx, %rcx - adcxq %rdx, %r14 - # A[3] * B[2] - movq 16(%rax), %rdx + # A[3] * B[3] + movq 24(%rbp), %rdx + adoxq %rax, %r13 + mulxq 24(%rsi), %rax, %rcx + adoxq %r15, %r14 + adcxq %rax, %r14 + # A[0] * B[3] + mulxq (%rsi), %rdx, %rax adcxq %rcx, %r15 - mulxq 24(%rsi), %rcx, %rdx - adcxq %r8, %rbx - adoxq %rcx, %r14 - adoxq %rdx, %r15 - adoxq %r8, %rbx - movq %r9, (%rdi) - movq %r10, 8(%rdi) - movq %r11, 16(%rdi) - movq %r12, 24(%rdi) - movq %r13, 32(%rdi) - movq %r14, 40(%rdi) - movq %r15, 48(%rdi) - movq %rbx, 56(%rdi) - pop %rbx + xorq %rcx, %rcx + adcxq %rdx, %r11 + # A[3] * B[0] + movq 24(%rsi), %rdx + adcxq %rax, %r12 + mulxq (%rbp), %rbx, %rax + adoxq %rbx, %r11 + adoxq %rax, %r12 + # A[3] * B[2] + mulxq 16(%rbp), %rdx, %rax + adcxq %rdx, %r13 + # A[2] * B[3] + movq 24(%rbp), %rdx + adcxq %rax, %r14 + mulxq 16(%rsi), %rax, %rdx + adcxq %rcx, %r15 + adoxq %rax, %r13 + adoxq %rdx, %r14 + adoxq %rcx, %r15 + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) pop %r15 pop %r14 pop %r13 pop %r12 + pop %rbp + pop %rbx repz retq #ifndef __APPLE__ .size sp_256_mul_avx2_4,.-sp_256_mul_avx2_4 @@ -39291,32 +39293,33 @@ sp_256_sqr_avx2_4: .p2align 4 _sp_256_sqr_avx2_4: #endif /* __APPLE__ */ - push %rbx push %r12 push %r13 push %r14 push %r15 + push %rbx # A[0] * A[1] movq (%rsi), %rdx + movq 16(%rsi), %r15 mulxq 8(%rsi), %r9, %r10 # A[0] * A[3] mulxq 24(%rsi), %r11, %r12 # A[2] * A[1] - movq 16(%rsi), %rdx + movq %r15, %rdx mulxq 8(%rsi), %rcx, %rbx - xorq %r15, %r15 - adoxq %rcx, %r11 # A[2] * A[3] mulxq 24(%rsi), %r13, %r14 + xorq %r15, %r15 + adoxq %rcx, %r11 adoxq %rbx, %r12 # A[2] * A[0] mulxq (%rsi), %rcx, %rbx - adoxq %r15, %r13 - adcxq %rcx, %r10 - adoxq %r15, %r14 # A[1] * A[3] movq 8(%rsi), %rdx + adoxq %r15, %r13 mulxq 24(%rsi), %rax, %r8 + adcxq %rcx, %r10 + adoxq %r15, %r14 adcxq %rbx, %r11 adcxq %rax, %r12 adcxq %r8, %r13 @@ -39327,11 +39330,11 @@ _sp_256_sqr_avx2_4: movq (%rsi), %rdx mulxq %rdx, %r8, %rax adcxq %r9, %r9 + adcxq %r10, %r10 + adoxq %rax, %r9 # A[1] * A[1] movq 8(%rsi), %rdx mulxq %rdx, %rcx, %rbx - adcxq %r10, %r10 - adoxq %rax, %r9 adcxq %r11, %r11 adoxq %rcx, %r10 # A[2] * A[2] @@ -39341,10 +39344,10 @@ _sp_256_sqr_avx2_4: adoxq %rbx, %r11 adcxq %r13, %r13 adoxq %rax, %r12 + adcxq %r14, %r14 # A[3] * A[3] movq 24(%rsi), %rdx mulxq %rdx, %rax, %rbx - adcxq %r14, %r14 adoxq %rcx, %r13 adcxq %r15, %r15 adoxq %rax, %r14 @@ -39357,11 +39360,11 @@ _sp_256_sqr_avx2_4: movq %r13, 40(%rdi) movq %r14, 48(%rdi) movq %r15, 56(%rdi) + pop %rbx pop %r15 pop %r14 pop %r13 pop %r12 - pop %rbx repz retq #ifndef __APPLE__ .size sp_256_sqr_avx2_4,.-sp_256_sqr_avx2_4