From 8143005c3816b8fe12d88d559b8bffcedbc31e5d Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 23 Oct 2023 20:05:56 -0400 Subject: [PATCH 1/5] [Zvbc32e] enabling 32-bit vclmul/vclmulh --- disasm/disasm.cc | 2 +- disasm/isa_parser.cc | 2 ++ riscv/decode_macros.h | 1 + riscv/insns/vclmul_vv.h | 8 ++++---- riscv/insns/vclmul_vx.h | 8 ++++---- riscv/insns/vclmulh_vv.h | 4 ++-- riscv/insns/vclmulh_vx.h | 8 ++++---- riscv/isa_parser.h | 2 ++ riscv/zvk_ext_macros.h | 25 +++++++++++++++++++++++++ 9 files changed, 45 insertions(+), 15 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 06ee9b85cc..a248e8aa6d 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2184,7 +2184,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) #undef DISASM_VECTOR_VV_VX_VIU_ZIMM6 } - if (ext_enabled(EXT_ZVBC)) { + if (ext_enabled(EXT_ZVBC) || ext_enabled(EXT_ZVBC32E)) { #define DISASM_VECTOR_VV_VX(name) \ DEFINE_VECTOR_VV(name##_vv); \ DEFINE_VECTOR_VX(name##_vx) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index e2a8f1aecc..a1ce316196 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -282,6 +282,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVBB] = true; } else if (ext_str == "zvbc") { extension_table[EXT_ZVBC] = true; + } else if (ext_str == "zvbc32e") { + extension_table[EXT_ZVBC32E] = true; } else if (ext_str == "zvfbfmin") { extension_table[EXT_ZVFBFMIN] = true; } else if (ext_str == "zvfbfwma") { diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index f9a2f3c71d..ccb3efa7c6 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -163,6 +163,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define require_rv32 require(xlen == 32) #define require_extension(s) require(p->extension_enabled(s)) #define require_either_extension(A,B) require(p->extension_enabled(A) || p->extension_enabled(B)); +#define require_either_extension_condition(A,cA, B, cB) require(p->extension_enabled(A) && (cA) || p->extension_enabled(B) && (cB)); #define require_impl(s) require(p->supports_impl(s)) #define require_fp STATE.fflags->verify_permissions(insn, false) #define require_accelerator require(STATE.sstatus->enabled(SSTATUS_XS)) diff --git a/riscv/insns/vclmul_vv.h b/riscv/insns/vclmul_vv.h index 8957738adc..4cee57e062 100644 --- a/riscv/insns/vclmul_vv.h +++ b/riscv/insns/vclmul_vv.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VV_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the low 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the low SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { diff --git a/riscv/insns/vclmul_vx.h b/riscv/insns/vclmul_vx.h index 1df7a3a2a4..060d30a985 100644 --- a/riscv/insns/vclmul_vx.h +++ b/riscv/insns/vclmul_vx.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VX_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the low 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the low SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index 6a54bcfaa6..cb9e45df17 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -2,8 +2,8 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmulh_vx.h b/riscv/insns/vclmulh_vx.h index e874d1df68..8628a6bac6 100644 --- a/riscv/insns/vclmulh_vx.h +++ b/riscv/insns/vclmulh_vx.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VX_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the high 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the high SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 2864b6b903..3b47aea378 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -60,9 +60,11 @@ typedef enum { EXT_ZILSD, EXT_ZVBB, EXT_ZVBC, + EXT_ZVBC32E, EXT_ZVFBFMIN, EXT_ZVFBFWMA, EXT_ZVKG, + EXT_ZVKGS, EXT_ZVKNED, EXT_ZVKNHA, EXT_ZVKNHB, diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index 702ad9179a..d8d17462cc 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -29,6 +29,23 @@ require_extension(EXT_ZVBC); \ } while (0) +// Ensures that the ZVBC32e extension (vector carryless multiplication +// with 32-bit elements) is present, and the vector unit is enabled +// and in a valid state. +#define require_zvbc32e \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVBC32E); \ + } while (0) + +// Ensures that any ZVBC extensions (vector carryless multiplication) +// is present, and the vector unit is enabled and in a valid state. +#define require_any_zvbc \ + do { \ + require_vector(true); \ + require_either_extension(EXT_ZVBC, EXT_ZVBC32E); \ + } while (0) + // Ensures that the ZVKG extension (vector Galois Field Multiplication) // is present, and the vector unit is enabled and in a valid state. #define require_zvkg \ @@ -37,6 +54,14 @@ require_extension(EXT_ZVKG); \ } while (0) +// Ensures that the ZVKGS extension (vector Galois Field Multiplication +// with vector-scalar variant) is present, and the vector unit is +// enabled and in a valid state. +#define require_zvkgs \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKGS); \ + } while (0) // Ensures that a ZVK extension supporting SHA-256 is present. // For SHA-256, this support is present in either Zvknha or Zvknhb. // Also ensures that the vector unit is enabled and in a valid state. From 16c21cd7313427ec100f156a8c1495f5ac2e0670 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Wed, 31 Jan 2024 19:28:11 -0800 Subject: [PATCH 2/5] [Zvbc32e] updating vclmul_vv comment --- riscv/insns/vclmulh_vv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index cb9e45df17..9bf7c429f8 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -7,8 +7,8 @@ require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU. VI_VV_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the high 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the high SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { From 3df0232a0b63e4f598badb4973fcf51773038fee Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Tue, 23 Jul 2024 12:46:34 -0700 Subject: [PATCH 3/5] Importing new encoding.h with additional vector crypto instructions --- riscv/encoding.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/riscv/encoding.h b/riscv/encoding.h index 4d41b855f2..a44992ce21 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -1918,8 +1918,12 @@ #define MASK_VFWSUB_WF 0xfc00707f #define MATCH_VFWSUB_WV 0xd8001057 #define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VGHSH_VS 0x8e002077 +#define MASK_VGHSH_VS 0xfe00707f #define MATCH_VGHSH_VV 0xb2002077 #define MASK_VGHSH_VV 0xfe00707f +#define MATCH_VGMUL_VS 0xa608a077 +#define MASK_VGMUL_VS 0xfe0ff07f #define MATCH_VGMUL_VV 0xa208a077 #define MASK_VGMUL_VV 0xfe0ff07f #define MATCH_VID_V 0x5008a057 @@ -3790,7 +3794,9 @@ DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vghsh_vs, MATCH_VGHSH_VS, MASK_VGHSH_VS) DECLARE_INSN(vghsh_vv, MATCH_VGHSH_VV, MASK_VGHSH_VV) +DECLARE_INSN(vgmul_vs, MATCH_VGMUL_VS, MASK_VGMUL_VS) DECLARE_INSN(vgmul_vv, MATCH_VGMUL_VV, MASK_VGMUL_VV) DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) From c4cf0bf8e2806ef725b7adf8af1ca73251240105 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Wed, 24 Jul 2024 15:49:57 -0700 Subject: [PATCH 4/5] Adding support for Zvkgs's vgmul.vs and vghsh.vs --- disasm/disasm.cc | 5 +++++ disasm/isa_parser.cc | 2 ++ riscv/insns/vghsh_vs.h | 46 ++++++++++++++++++++++++++++++++++++++++++ riscv/insns/vgmul_vs.h | 41 +++++++++++++++++++++++++++++++++++++ riscv/riscv.mk.in | 9 +++++++++ riscv/zvk_ext_macros.h | 7 +++++++ 6 files changed, 110 insertions(+) create mode 100644 riscv/insns/vghsh_vs.h create mode 100644 riscv/insns/vgmul_vs.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index a248e8aa6d..c81e3b68c5 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2202,6 +2202,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) DEFINE_VECTOR_VV(vghsh_vv); } + if (ext_enabled(EXT_ZVKGS)) { + DEFINE_VECTOR_V(vgmul_vs); + DEFINE_VECTOR_VV(vghsh_vs); + } + if (ext_enabled(EXT_ZVKNED)) { // Despite their suffixes, the vaes*.{vv,vs} instructions // are really ".v", with the form ".{vv,vs} vd, vs2". diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index a1ce316196..fcdb434e51 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -290,6 +290,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVFBFWMA] = true; } else if (ext_str == "zvkg") { extension_table[EXT_ZVKG] = true; + } else if (ext_str == "zvkgs") { + extension_table[EXT_ZVKGS] = true; } else if (ext_str == "zvkn") { extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKNED] = true; diff --git a/riscv/insns/vghsh_vs.h b/riscv/insns/vghsh_vs.h new file mode 100644 index 0000000000..ced25122d6 --- /dev/null +++ b/riscv/insns/vghsh_vs.h @@ -0,0 +1,46 @@ +// vghsh.vs vd, vs2, vs1 + +#include "zvk_ext_macros.h" + +require_zvkgs; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS1_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + EGU32x4_t H = P.VU.elt_group(vs2_num, 0); EGU32x4_BREV8(H);, + { + EGU32x4_t Y = P.VU.elt_group(vd_num, idx_eg);; // Current partial hash + EGU32x4_t X = P.VU.elt_group(vs1_num, idx_eg);; // Block cipher output + + EGU32x4_t Z = {}; + + // S = brev8(Y ^ X) + EGU32x4_t S; + EGU32x4_XOR(S, Y, X); + EGU32x4_BREV8(S); + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(S, bit)) { + EGU32x4_XOREQ(Z, H); + } + + const bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Left shift by 1. + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + // Update the destination register. + EGU32x4_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU32x4_COPY(vd, Z); + } +); diff --git a/riscv/insns/vgmul_vs.h b/riscv/insns/vgmul_vs.h new file mode 100644 index 0000000000..80a097d122 --- /dev/null +++ b/riscv/insns/vgmul_vs.h @@ -0,0 +1,41 @@ +// vgmul.vs vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvkgs; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + EGU32x4_t H = P.VU.elt_group(vs2_num, 0); EGU32x4_BREV8(H); + , + { + EGU32x4_t Y = P.VU.elt_group(vd_num, idx_eg); // Multiplier + EGU32x4_BREV8(Y); + EGU32x4_t Z = {}; + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(Y, bit)) { + EGU32x4_XOREQ(Z, H); + } + + bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Lef shift by 1 + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + // Update the destination register. + EGU32x4_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU32x4_COPY(vd, Z); + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index e7837816bc..7a1902e448 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1046,6 +1046,10 @@ riscv_insn_ext_zvkg= \ vghsh_vv \ vgmul_vv \ +riscv_insn_ext_zvkgs= \ + vghsh_vs \ + vgmul_vs \ + riscv_insn_ext_zvkned = \ vaesdf_vs \ vaesdf_vv \ @@ -1123,6 +1127,10 @@ riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvksed) \ $(riscv_insn_ext_zvksh) \ +riscv_insn_ext_zvka = \ + $(riscv_insn_ext_zvbc32e) \ + $(riscv_insn_ext_zvkgs) \ + riscv_insn_list = \ $(riscv_insn_ext_i) \ $(riscv_insn_ext_c) \ @@ -1149,6 +1157,7 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ $(riscv_insn_ext_zvk) \ + $(riscv_insn_ext_zvka) \ $(riscv_insn_ext_zvbdot) \ $(riscv_insn_ext_zvldot) \ $(riscv_insn_priv) \ diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index d8d17462cc..db08eb4ec5 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -983,6 +983,13 @@ (DST)[bidx] = (SRC)[bidx]; \ } +// Copies a EGU32x4_t value from 'SRC' into 'DST'. +#define EGU32x4_COPY(DST, SRC) \ + for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ + (DST)[bidx] = (SRC)[bidx]; \ + } + + // Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes // in A (mutated) with the bytes in B (unchanged). #define EGU8x16_XOREQ(MUT_A, CONST_B) \ From efbacc59cfec944234a1444ed4ff029a24bf95a3 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 6 Oct 2024 13:22:14 -0700 Subject: [PATCH 5/5] Extending Zvbc32e vclmul[h] to SEW 8 and 16 --- riscv/insns/vclmul_vv.h | 2 +- riscv/insns/vclmul_vx.h | 2 +- riscv/insns/vclmulh_vv.h | 2 +- riscv/insns/vclmulh_vx.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/riscv/insns/vclmul_vv.h b/riscv/insns/vclmul_vv.h index 4cee57e062..ca1a64d37a 100644 --- a/riscv/insns/vclmul_vv.h +++ b/riscv/insns/vclmul_vv.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmul_vx.h b/riscv/insns/vclmul_vx.h index 060d30a985..a929bda00f 100644 --- a/riscv/insns/vclmul_vx.h +++ b/riscv/insns/vclmul_vx.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VX_ULOOP ({ diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index 9bf7c429f8..12d5952003 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmulh_vx.h b/riscv/insns/vclmulh_vx.h index 8628a6bac6..77625672a1 100644 --- a/riscv/insns/vclmulh_vx.h +++ b/riscv/insns/vclmulh_vx.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VX_ULOOP ({