@@ -91860,10 +91860,83 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_add_12(sp_digit* r,
9186091860 register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
9186191861#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
9186291862
91863- sp_digit o;
91864-
91865- o = sp_384_add_12(r, a, b);
91866- sp_384_cond_sub_12(r, r, m, 0 - o);
91863+ __asm__ __volatile__ (
91864+ "mov r3, #0\n\t"
91865+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
91866+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
91867+ "adds r8, r8, r4\n\t"
91868+ "adcs r9, r9, r5\n\t"
91869+ "adcs r10, r10, r6\n\t"
91870+ "adcs r11, r11, r7\n\t"
91871+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91872+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
91873+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
91874+ "adcs r8, r8, r4\n\t"
91875+ "adcs r9, r9, r5\n\t"
91876+ "adcs r10, r10, r6\n\t"
91877+ "adcs r11, r11, r7\n\t"
91878+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91879+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
91880+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
91881+ "adcs r8, r8, r4\n\t"
91882+ "adcs r9, r9, r5\n\t"
91883+ "adcs r10, r10, r6\n\t"
91884+ "adcs r11, r11, r7\n\t"
91885+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91886+ "adc r3, r3, #0\n\t"
91887+ "sub %[r], %[r], #48\n\t"
91888+ "rsb r3, r3, #0\n\t"
91889+ "lsr r12, r3, #1\n\t"
91890+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91891+ "subs r8, r8, r3\n\t"
91892+ "sbcs r9, r9, #0\n\t"
91893+ "sbcs r10, r10, #0\n\t"
91894+ "sbcs r11, r11, r3\n\t"
91895+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91896+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91897+ "sbcs r8, r8, r12, LSL #1\n\t"
91898+ "sbcs r9, r9, r3\n\t"
91899+ "sbcs r10, r10, r3\n\t"
91900+ "sbcs r11, r11, r3\n\t"
91901+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91902+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91903+ "sbcs r8, r8, r3\n\t"
91904+ "sbcs r9, r9, r3\n\t"
91905+ "sbcs r10, r10, r3\n\t"
91906+ "sbcs r11, r11, r3\n\t"
91907+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91908+ "sbc %[b], %[b], %[b]\n\t"
91909+ "sub %[r], %[r], #48\n\t"
91910+ "sub r3, r3, %[b]\n\t"
91911+ "lsr r12, r3, #1\n\t"
91912+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91913+ "subs r8, r8, r3\n\t"
91914+ "sbcs r9, r9, #0\n\t"
91915+ "sbcs r10, r10, #0\n\t"
91916+ "sbcs r11, r11, r3\n\t"
91917+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91918+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91919+ "sbcs r8, r8, r12, LSL #1\n\t"
91920+ "sbcs r9, r9, r3\n\t"
91921+ "sbcs r10, r10, r3\n\t"
91922+ "sbcs r11, r11, r3\n\t"
91923+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91924+ "ldm %[r], {r8, r9, r10, r11}\n\t"
91925+ "sbcs r8, r8, r3\n\t"
91926+ "sbcs r9, r9, r3\n\t"
91927+ "sbcs r10, r10, r3\n\t"
91928+ "sbc r11, r11, r3\n\t"
91929+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
91930+ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG
91931+ : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
91932+ :
91933+ #else
91934+ :
91935+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
91936+ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
91937+ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
91938+ "r12"
91939+ );
9186791940}
9186891941
9186991942/* Double a Montgomery form number (r = a + a % m).
@@ -91886,10 +91959,73 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_dbl_12(sp_digit* r,
9188691959 register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
9188791960#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
9188891961
91889- sp_digit o;
91890-
91891- o = sp_384_add_12(r, a, a);
91892- sp_384_cond_sub_12(r, r, m, 0 - o);
91962+ __asm__ __volatile__ (
91963+ "mov r2, #0\n\t"
91964+ "ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
91965+ "adds r4, r4, r4\n\t"
91966+ "adcs r5, r5, r5\n\t"
91967+ "adcs r6, r6, r6\n\t"
91968+ "adcs r7, r7, r7\n\t"
91969+ "adcs r8, r8, r8\n\t"
91970+ "adcs r9, r9, r9\n\t"
91971+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
91972+ "ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
91973+ "adcs r4, r4, r4\n\t"
91974+ "adcs r5, r5, r5\n\t"
91975+ "adcs r6, r6, r6\n\t"
91976+ "adcs r7, r7, r7\n\t"
91977+ "adcs r8, r8, r8\n\t"
91978+ "adcs r9, r9, r9\n\t"
91979+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
91980+ "adc r2, r2, #0\n\t"
91981+ "sub %[r], %[r], #48\n\t"
91982+ "rsb r2, r2, #0\n\t"
91983+ "lsr r3, r2, #1\n\t"
91984+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
91985+ "subs r4, r4, r2\n\t"
91986+ "sbcs r5, r5, #0\n\t"
91987+ "sbcs r6, r6, #0\n\t"
91988+ "sbcs r7, r7, r2\n\t"
91989+ "sbcs r8, r8, r3, LSL #1\n\t"
91990+ "sbcs r9, r9, r2\n\t"
91991+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
91992+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
91993+ "sbcs r4, r4, r2\n\t"
91994+ "sbcs r5, r5, r2\n\t"
91995+ "sbcs r6, r6, r2\n\t"
91996+ "sbcs r7, r7, r2\n\t"
91997+ "sbcs r8, r8, r2\n\t"
91998+ "sbcs r9, r9, r2\n\t"
91999+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92000+ "sbc %[a], %[a], %[a]\n\t"
92001+ "sub %[r], %[r], #48\n\t"
92002+ "sub r2, r2, %[a]\n\t"
92003+ "lsr r3, r2, #1\n\t"
92004+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92005+ "subs r4, r4, r2\n\t"
92006+ "sbcs r5, r5, #0\n\t"
92007+ "sbcs r6, r6, #0\n\t"
92008+ "sbcs r7, r7, r2\n\t"
92009+ "sbcs r8, r8, r3, LSL #1\n\t"
92010+ "sbcs r9, r9, r2\n\t"
92011+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92012+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92013+ "sbcs r4, r4, r2\n\t"
92014+ "sbcs r5, r5, r2\n\t"
92015+ "sbcs r6, r6, r2\n\t"
92016+ "sbcs r7, r7, r2\n\t"
92017+ "sbcs r8, r8, r2\n\t"
92018+ "sbc r9, r9, r2\n\t"
92019+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92020+ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG
92021+ : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
92022+ :
92023+ #else
92024+ :
92025+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
92026+ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
92027+ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3"
92028+ );
9189392029}
9189492030
9189592031/* Triple a Montgomery form number (r = a + a + a % m).
@@ -91912,12 +92048,138 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_tpl_12(sp_digit* r,
9191292048 register const sp_digit* m asm ("r2") = (const sp_digit*)m_p;
9191392049#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
9191492050
91915- sp_digit o;
91916-
91917- o = sp_384_add_12(r, a, a);
91918- sp_384_cond_sub_12(r, r, m, 0 - o);
91919- o = sp_384_add_12(r, r, a);
91920- sp_384_cond_sub_12(r, r, m, 0 - o);
92051+ __asm__ __volatile__ (
92052+ "mov r2, #0\n\t"
92053+ "ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
92054+ "adds r4, r4, r4\n\t"
92055+ "adcs r5, r5, r5\n\t"
92056+ "adcs r6, r6, r6\n\t"
92057+ "adcs r7, r7, r7\n\t"
92058+ "adcs r8, r8, r8\n\t"
92059+ "adcs r9, r9, r9\n\t"
92060+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92061+ "ldm %[a]!, {r4, r5, r6, r7, r8, r9}\n\t"
92062+ "adcs r4, r4, r4\n\t"
92063+ "adcs r5, r5, r5\n\t"
92064+ "adcs r6, r6, r6\n\t"
92065+ "adcs r7, r7, r7\n\t"
92066+ "adcs r8, r8, r8\n\t"
92067+ "adcs r9, r9, r9\n\t"
92068+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92069+ "adc r2, r2, #0\n\t"
92070+ "sub %[r], %[r], #48\n\t"
92071+ "rsb r2, r2, #0\n\t"
92072+ "lsr r3, r2, #1\n\t"
92073+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92074+ "subs r4, r4, r2\n\t"
92075+ "sbcs r5, r5, #0\n\t"
92076+ "sbcs r6, r6, #0\n\t"
92077+ "sbcs r7, r7, r2\n\t"
92078+ "sbcs r8, r8, r3, LSL #1\n\t"
92079+ "sbcs r9, r9, r2\n\t"
92080+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92081+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92082+ "sbcs r4, r4, r2\n\t"
92083+ "sbcs r5, r5, r2\n\t"
92084+ "sbcs r6, r6, r2\n\t"
92085+ "sbcs r7, r7, r2\n\t"
92086+ "sbcs r8, r8, r2\n\t"
92087+ "sbcs r9, r9, r2\n\t"
92088+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92089+ "sbc r12, r12, r12\n\t"
92090+ "sub %[r], %[r], #48\n\t"
92091+ "sub r2, r2, r12\n\t"
92092+ "lsr r3, r2, #1\n\t"
92093+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92094+ "subs r4, r4, r2\n\t"
92095+ "sbcs r5, r5, #0\n\t"
92096+ "sbcs r6, r6, #0\n\t"
92097+ "sbcs r7, r7, r2\n\t"
92098+ "sbcs r8, r8, r3, LSL #1\n\t"
92099+ "sbcs r9, r9, r2\n\t"
92100+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92101+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92102+ "sbcs r4, r4, r2\n\t"
92103+ "sbcs r5, r5, r2\n\t"
92104+ "sbcs r6, r6, r2\n\t"
92105+ "sbcs r7, r7, r2\n\t"
92106+ "sbcs r8, r8, r2\n\t"
92107+ "sbc r9, r9, r2\n\t"
92108+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92109+ "sub %[r], %[r], #48\n\t"
92110+ "sub %[a], %[a], #48\n\t"
92111+ "mov r2, #0\n\t"
92112+ "ldm %[a]!, {r4, r5, r6, r7}\n\t"
92113+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92114+ "adds r8, r8, r4\n\t"
92115+ "adcs r9, r9, r5\n\t"
92116+ "adcs r10, r10, r6\n\t"
92117+ "adcs r11, r11, r7\n\t"
92118+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92119+ "ldm %[a]!, {r4, r5, r6, r7}\n\t"
92120+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92121+ "adcs r8, r8, r4\n\t"
92122+ "adcs r9, r9, r5\n\t"
92123+ "adcs r10, r10, r6\n\t"
92124+ "adcs r11, r11, r7\n\t"
92125+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92126+ "ldm %[a]!, {r4, r5, r6, r7}\n\t"
92127+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92128+ "adcs r8, r8, r4\n\t"
92129+ "adcs r9, r9, r5\n\t"
92130+ "adcs r10, r10, r6\n\t"
92131+ "adcs r11, r11, r7\n\t"
92132+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92133+ "adc r2, r2, #0\n\t"
92134+ "sub %[r], %[r], #48\n\t"
92135+ "rsb r2, r2, #0\n\t"
92136+ "lsr r3, r2, #1\n\t"
92137+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92138+ "subs r4, r4, r2\n\t"
92139+ "sbcs r5, r5, #0\n\t"
92140+ "sbcs r6, r6, #0\n\t"
92141+ "sbcs r7, r7, r2\n\t"
92142+ "sbcs r8, r8, r3, LSL #1\n\t"
92143+ "sbcs r9, r9, r2\n\t"
92144+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92145+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92146+ "sbcs r4, r4, r2\n\t"
92147+ "sbcs r5, r5, r2\n\t"
92148+ "sbcs r6, r6, r2\n\t"
92149+ "sbcs r7, r7, r2\n\t"
92150+ "sbcs r8, r8, r2\n\t"
92151+ "sbcs r9, r9, r2\n\t"
92152+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92153+ "sbc r12, r12, r12\n\t"
92154+ "sub %[r], %[r], #48\n\t"
92155+ "sub r2, r2, r12\n\t"
92156+ "lsr r3, r2, #1\n\t"
92157+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92158+ "subs r4, r4, r2\n\t"
92159+ "sbcs r5, r5, #0\n\t"
92160+ "sbcs r6, r6, #0\n\t"
92161+ "sbcs r7, r7, r2\n\t"
92162+ "sbcs r8, r8, r3, LSL #1\n\t"
92163+ "sbcs r9, r9, r2\n\t"
92164+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92165+ "ldm %[r], {r4, r5, r6, r7, r8, r9}\n\t"
92166+ "sbcs r4, r4, r2\n\t"
92167+ "sbcs r5, r5, r2\n\t"
92168+ "sbcs r6, r6, r2\n\t"
92169+ "sbcs r7, r7, r2\n\t"
92170+ "sbcs r8, r8, r2\n\t"
92171+ "sbc r9, r9, r2\n\t"
92172+ "stm %[r]!, {r4, r5, r6, r7, r8, r9}\n\t"
92173+ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG
92174+ : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
92175+ :
92176+ #else
92177+ :
92178+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
92179+ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
92180+ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
92181+ "r3", "r12"
92182+ );
9192192183}
9192292184
9192392185#ifdef WOLFSSL_SP_SMALL
@@ -92185,10 +92447,81 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_sub_12(sp_digit* r,
9218592447 register const sp_digit* m asm ("r3") = (const sp_digit*)m_p;
9218692448#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
9218792449
92188- sp_digit o;
92189-
92190- o = sp_384_sub_12(r, a, b);
92191- sp_384_cond_add_12(r, r, m, o);
92450+ __asm__ __volatile__ (
92451+ "mov r3, #0\n\t"
92452+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
92453+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
92454+ "subs r8, r8, r4\n\t"
92455+ "sbcs r9, r9, r5\n\t"
92456+ "sbcs r10, r10, r6\n\t"
92457+ "sbcs r11, r11, r7\n\t"
92458+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92459+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
92460+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
92461+ "sbcs r8, r8, r4\n\t"
92462+ "sbcs r9, r9, r5\n\t"
92463+ "sbcs r10, r10, r6\n\t"
92464+ "sbcs r11, r11, r7\n\t"
92465+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92466+ "ldm %[a]!, {r8, r9, r10, r11}\n\t"
92467+ "ldm %[b]!, {r4, r5, r6, r7}\n\t"
92468+ "sbcs r8, r8, r4\n\t"
92469+ "sbcs r9, r9, r5\n\t"
92470+ "sbcs r10, r10, r6\n\t"
92471+ "sbcs r11, r11, r7\n\t"
92472+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92473+ "sbc r3, r3, #0\n\t"
92474+ "sub %[r], %[r], #48\n\t"
92475+ "lsr r12, r3, #1\n\t"
92476+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92477+ "adds r8, r8, r3\n\t"
92478+ "adcs r9, r9, #0\n\t"
92479+ "adcs r10, r10, #0\n\t"
92480+ "adcs r11, r11, r3\n\t"
92481+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92482+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92483+ "adcs r8, r8, r12, LSL #1\n\t"
92484+ "adcs r9, r9, r3\n\t"
92485+ "adcs r10, r10, r3\n\t"
92486+ "adcs r11, r11, r3\n\t"
92487+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92488+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92489+ "adcs r8, r8, r3\n\t"
92490+ "adcs r9, r9, r3\n\t"
92491+ "adcs r10, r10, r3\n\t"
92492+ "adcs r11, r11, r3\n\t"
92493+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92494+ "adc r3, r3, #0\n\t"
92495+ "sub %[r], %[r], #48\n\t"
92496+ "lsr r12, r3, #1\n\t"
92497+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92498+ "adds r8, r8, r3\n\t"
92499+ "adcs r9, r9, #0\n\t"
92500+ "adcs r10, r10, #0\n\t"
92501+ "adcs r11, r11, r3\n\t"
92502+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92503+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92504+ "adcs r8, r8, r12, LSL #1\n\t"
92505+ "adcs r9, r9, r3\n\t"
92506+ "adcs r10, r10, r3\n\t"
92507+ "adcs r11, r11, r3\n\t"
92508+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92509+ "ldm %[r], {r8, r9, r10, r11}\n\t"
92510+ "adcs r8, r8, r3\n\t"
92511+ "adcs r9, r9, r3\n\t"
92512+ "adcs r10, r10, r3\n\t"
92513+ "adc r11, r11, r3\n\t"
92514+ "stm %[r]!, {r8, r9, r10, r11}\n\t"
92515+ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG
92516+ : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
92517+ :
92518+ #else
92519+ :
92520+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
92521+ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
92522+ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
92523+ "r12"
92524+ );
9219292525}
9219392526
9219492527#ifdef WOLFSSL_SP_SMALL
0 commit comments