SP ASM ARM32/Thumb2: inline asm for add and subs

Implement add, sub, double and triple in assembly for P384.
This commit is contained in:
Sean Parkinson
2025-10-28 17:49:40 +10:00
parent e6af5bcd4f
commit 50521699af
2 changed files with 682 additions and 36 deletions

View File

@@ -91860,10 +91860,83 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_add_12(sp_digit* r,
register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, b);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"mov r3, #0\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"adds r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"adcs r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"adcs r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"adc r3, r3, #0\n\t"
"sub %[r], %[r], #48\n\t"
"rsb r3, r3, #0\n\t"
"lsr r12, r3, #1\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"subs r8, r8, r3\n\t"
"sbcs r9, r9, #0\n\t"
"sbcs r10, r10, #0\n\t"
"sbcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"sbcs r8, r8, r12, LSL #1\n\t"
"sbcs r9, r9, r3\n\t"
"sbcs r10, r10, r3\n\t"
"sbcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"sbcs r8, r8, r3\n\t"
"sbcs r9, r9, r3\n\t"
"sbcs r10, r10, r3\n\t"
"sbcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"sbc %[b], %[b], %[b]\n\t"
"sub %[r], %[r], #48\n\t"
"sub r3, r3, %[b]\n\t"
"lsr r12, r3, #1\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"subs r8, r8, r3\n\t"
"sbcs r9, r9, #0\n\t"
"sbcs r10, r10, #0\n\t"
"sbcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"sbcs r8, r8, r12, LSL #1\n\t"
"sbcs r9, r9, r3\n\t"
"sbcs r10, r10, r3\n\t"
"sbcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"sbcs r8, r8, r3\n\t"
"sbcs r9, r9, r3\n\t"
"sbcs r10, r10, r3\n\t"
"sbc r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
}
/* Double a Montgomery form number (r = a + a % m).
@@ -91886,10 +91959,73 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_dbl_12(sp_digit* r,
register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"mov r2, #0\n\t"
"ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adds r4, r4, r4\n\t"
"adcs r5, r5, r5\n\t"
"adcs r6, r6, r6\n\t"
"adcs r7, r7, r7\n\t"
"adcs r8, r8, r8\n\t"
"adcs r9, r9, r9\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adcs r4, r4, r4\n\t"
"adcs r5, r5, r5\n\t"
"adcs r6, r6, r6\n\t"
"adcs r7, r7, r7\n\t"
"adcs r8, r8, r8\n\t"
"adcs r9, r9, r9\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adc r2, r2, #0\n\t"
"sub %[r], %[r], #48\n\t"
"rsb r2, r2, #0\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"sbc %[a], %[a], %[a]\n\t"
"sub %[r], %[r], #48\n\t"
"sub r2, r2, %[a]\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbc r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
:
#else
:
: [r] "r" (r), [a] "r" (a), [m] "r" (m)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3"
);
}
/* Triple a Montgomery form number (r = a + a + a % m).
@@ -91912,12 +92048,138 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_tpl_12(sp_digit* r,
register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
o = sp_384_add_12(r, r, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"mov r2, #0\n\t"
"ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adds r4, r4, r4\n\t"
"adcs r5, r5, r5\n\t"
"adcs r6, r6, r6\n\t"
"adcs r7, r7, r7\n\t"
"adcs r8, r8, r8\n\t"
"adcs r9, r9, r9\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adcs r4, r4, r4\n\t"
"adcs r5, r5, r5\n\t"
"adcs r6, r6, r6\n\t"
"adcs r7, r7, r7\n\t"
"adcs r8, r8, r8\n\t"
"adcs r9, r9, r9\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"adc r2, r2, #0\n\t"
"sub %[r], %[r], #48\n\t"
"rsb r2, r2, #0\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"sbc r12, r12, r12\n\t"
"sub %[r], %[r], #48\n\t"
"sub r2, r2, r12\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbc r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"sub %[r], %[r], #48\n\t"
"sub %[a], %[a], #48\n\t"
"mov r2, #0\n\t"
"ldm %[a]!, {r4, r5, r6, r7}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adds r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r4, r5, r6, r7}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r4, r5, r6, r7}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r4\n\t"
"adcs r9, r9, r5\n\t"
"adcs r10, r10, r6\n\t"
"adcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"adc r2, r2, #0\n\t"
"sub %[r], %[r], #48\n\t"
"rsb r2, r2, #0\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"sbc r12, r12, r12\n\t"
"sub %[r], %[r], #48\n\t"
"sub r2, r2, r12\n\t"
"lsr r3, r2, #1\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"subs r4, r4, r2\n\t"
"sbcs r5, r5, #0\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r3, LSL #1\n\t"
"sbcs r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"sbcs r4, r4, r2\n\t"
"sbcs r5, r5, r2\n\t"
"sbcs r6, r6, r2\n\t"
"sbcs r7, r7, r2\n\t"
"sbcs r8, r8, r2\n\t"
"sbc r9, r9, r2\n\t"
"stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
:
#else
:
: [r] "r" (r), [a] "r" (a), [m] "r" (m)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r3", "r12"
);
}
#ifdef WOLFSSL_SP_SMALL
@@ -92185,10 +92447,81 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_sub_12(sp_digit* r,
register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_sub_12(r, a, b);
sp_384_cond_add_12(r, r, m, o);
__asm__ __volatile__ (
"mov r3, #0\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"subs r8, r8, r4\n\t"
"sbcs r9, r9, r5\n\t"
"sbcs r10, r10, r6\n\t"
"sbcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"sbcs r8, r8, r4\n\t"
"sbcs r9, r9, r5\n\t"
"sbcs r10, r10, r6\n\t"
"sbcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[a]!, {r8, r9, r10, r11}\n\t"
"ldm %[b]!, {r4, r5, r6, r7}\n\t"
"sbcs r8, r8, r4\n\t"
"sbcs r9, r9, r5\n\t"
"sbcs r10, r10, r6\n\t"
"sbcs r11, r11, r7\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"sbc r3, r3, #0\n\t"
"sub %[r], %[r], #48\n\t"
"lsr r12, r3, #1\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adds r8, r8, r3\n\t"
"adcs r9, r9, #0\n\t"
"adcs r10, r10, #0\n\t"
"adcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r12, LSL #1\n\t"
"adcs r9, r9, r3\n\t"
"adcs r10, r10, r3\n\t"
"adcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r3\n\t"
"adcs r9, r9, r3\n\t"
"adcs r10, r10, r3\n\t"
"adcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"adc r3, r3, #0\n\t"
"sub %[r], %[r], #48\n\t"
"lsr r12, r3, #1\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adds r8, r8, r3\n\t"
"adcs r9, r9, #0\n\t"
"adcs r10, r10, #0\n\t"
"adcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r12, LSL #1\n\t"
"adcs r9, r9, r3\n\t"
"adcs r10, r10, r3\n\t"
"adcs r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
"ldm %[r], {r8, r9, r10, r11}\n\t"
"adcs r8, r8, r3\n\t"
"adcs r9, r9, r3\n\t"
"adcs r10, r10, r3\n\t"
"adc r11, r11, r3\n\t"
"stm %[r]!, {r8, r9, r10, r11}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
:
#else
:
: [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
}
#ifdef WOLFSSL_SP_SMALL

View File

@@ -45199,10 +45199,78 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r,
register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, b);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"MOV r3, #0x0\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"ADDS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"ADCS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"ADCS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"ADC r3, r3, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"RSB r3, r3, #0x0\n\t"
"LSR r12, r3, #1\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SUBS r8, r8, r3\n\t"
"SBCS r9, r9, #0x0\n\t"
"SBCS r10, r10, #0x0\n\t"
"SBCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SBCS r8, r8, r12, LSL #1\n\t"
"SBCS r9, r9, r3\n\t"
"SBCS r10, r10, r3\n\t"
"SBCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SBCS r8, r8, r3\n\t"
"SBCS r9, r9, r3\n\t"
"SBCS r10, r10, r3\n\t"
"SBCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"SBC %[b], %[b], %[b]\n\t"
"SUB %[r], %[r], #0x30\n\t"
"SUB r3, r3, %[b]\n\t"
"LSR r12, r3, #1\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SUBS r8, r8, r3\n\t"
"SBCS r9, r9, #0x0\n\t"
"SBCS r10, r10, #0x0\n\t"
"SBCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SBCS r8, r8, r12, LSL #1\n\t"
"SBCS r9, r9, r3\n\t"
"SBCS r10, r10, r3\n\t"
"SBCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"SBCS r8, r8, r3\n\t"
"SBCS r9, r9, r3\n\t"
"SBCS r10, r10, r3\n\t"
"SBC r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
}
/* Double a Montgomery form number (r = a + a % m).
@@ -45225,10 +45293,68 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r,
register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"MOV r2, #0x0\n\t"
"LDM %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r4, r4, r4\n\t"
"ADCS r5, r5, r5\n\t"
"ADCS r6, r6, r6\n\t"
"ADCS r7, r7, r7\n\t"
"ADCS r8, r8, r8\n\t"
"ADCS r9, r9, r9\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADCS r4, r4, r4\n\t"
"ADCS r5, r5, r5\n\t"
"ADCS r6, r6, r6\n\t"
"ADCS r7, r7, r7\n\t"
"ADCS r8, r8, r8\n\t"
"ADCS r9, r9, r9\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADC r2, r2, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"RSB r2, r2, #0x0\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"SBC %[a], %[a], %[a]\n\t"
"SUB %[r], %[r], #0x30\n\t"
"SUB r2, r2, %[a]\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBC r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
: [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3"
);
}
/* Triple a Montgomery form number (r = a + a + a % m).
@@ -45251,12 +45377,133 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r,
register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_add_12(r, a, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
o = sp_384_add_12(r, r, a);
sp_384_cond_sub_12(r, r, m, 0 - o);
__asm__ __volatile__ (
"MOV r2, #0x0\n\t"
"LDM %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r4, r4, r4\n\t"
"ADCS r5, r5, r5\n\t"
"ADCS r6, r6, r6\n\t"
"ADCS r7, r7, r7\n\t"
"ADCS r8, r8, r8\n\t"
"ADCS r9, r9, r9\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADCS r4, r4, r4\n\t"
"ADCS r5, r5, r5\n\t"
"ADCS r6, r6, r6\n\t"
"ADCS r7, r7, r7\n\t"
"ADCS r8, r8, r8\n\t"
"ADCS r9, r9, r9\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"ADC r2, r2, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"RSB r2, r2, #0x0\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"SBC r12, r12, r12\n\t"
"SUB %[r], %[r], #0x30\n\t"
"SUB r2, r2, r12\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBC r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"SUB %[r], %[r], #0x30\n\t"
"SUB %[a], %[a], #0x30\n\t"
"MOV r2, #0x0\n\t"
"LDM %[a]!, {r4, r5, r6, r7}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADDS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r4, r5, r6, r7}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r4, r5, r6, r7}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r4\n\t"
"ADCS r9, r9, r5\n\t"
"ADCS r10, r10, r6\n\t"
"ADCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"ADC r2, r2, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"RSB r2, r2, #0x0\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"SBC r12, r12, r12\n\t"
"SUB %[r], %[r], #0x30\n\t"
"SUB r2, r2, r12\n\t"
"LSR r3, r2, #1\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SUBS r4, r4, r2\n\t"
"SBCS r5, r5, #0x0\n\t"
"SBCS r6, r6, #0x0\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r3, LSL #1\n\t"
"SBCS r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
"LDM %[r], {r4, r5, r6, r7, r8, r9}\n\t"
"SBCS r4, r4, r2\n\t"
"SBCS r5, r5, r2\n\t"
"SBCS r6, r6, r2\n\t"
"SBCS r7, r7, r2\n\t"
"SBCS r8, r8, r2\n\t"
"SBC r9, r9, r2\n\t"
"STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
: [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r3", "r12"
);
}
#ifdef WOLFSSL_SP_SMALL
@@ -45524,10 +45771,76 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r,
register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p;
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
sp_digit o;
o = sp_384_sub_12(r, a, b);
sp_384_cond_add_12(r, r, m, o);
__asm__ __volatile__ (
"MOV r3, #0x0\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"SUBS r8, r8, r4\n\t"
"SBCS r9, r9, r5\n\t"
"SBCS r10, r10, r6\n\t"
"SBCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"SBCS r8, r8, r4\n\t"
"SBCS r9, r9, r5\n\t"
"SBCS r10, r10, r6\n\t"
"SBCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[a]!, {r8, r9, r10, r11}\n\t"
"LDM %[b]!, {r4, r5, r6, r7}\n\t"
"SBCS r8, r8, r4\n\t"
"SBCS r9, r9, r5\n\t"
"SBCS r10, r10, r6\n\t"
"SBCS r11, r11, r7\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"SBC r3, r3, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"LSR r12, r3, #1\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADDS r8, r8, r3\n\t"
"ADCS r9, r9, #0x0\n\t"
"ADCS r10, r10, #0x0\n\t"
"ADCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r12, LSL #1\n\t"
"ADCS r9, r9, r3\n\t"
"ADCS r10, r10, r3\n\t"
"ADCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r3\n\t"
"ADCS r9, r9, r3\n\t"
"ADCS r10, r10, r3\n\t"
"ADCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"ADC r3, r3, #0x0\n\t"
"SUB %[r], %[r], #0x30\n\t"
"LSR r12, r3, #1\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADDS r8, r8, r3\n\t"
"ADCS r9, r9, #0x0\n\t"
"ADCS r10, r10, #0x0\n\t"
"ADCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r12, LSL #1\n\t"
"ADCS r9, r9, r3\n\t"
"ADCS r10, r10, r3\n\t"
"ADCS r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
"LDM %[r], {r8, r9, r10, r11}\n\t"
"ADCS r8, r8, r3\n\t"
"ADCS r9, r9, r3\n\t"
"ADCS r10, r10, r3\n\t"
"ADC r11, r11, r3\n\t"
"STM %[r]!, {r8, r9, r10, r11}\n\t"
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12"
);
}
#ifdef WOLFSSL_SP_SMALL