# --- T2-COPYRIGHT-NOTE-BEGIN --- # T2 SDE: package/*/linux/rvv-0.7.patch.riscv64 # Copyright (C) 2024 The T2 SDE Project # # This Copyright note is generated by scripts/Create-CopyPatch, # more information can be found in the files COPYING and README. # # This patch file is dual-licensed. It is available under the license the # patched project is licensed under, as long as it is an OpenSource license # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms # of the GNU General Public License version 2 as used by the T2 SDE. # --- T2-COPYRIGHT-NOTE-END --- From: Heiko Stuebner The VCSR CSR contains two elements VXRM[2:1] and VXSAT[0]. Define constants for those to access the elements in a readable way. Acked-by: Guo Ren Reviewed-by: Conor Dooley Signed-off-by: Heiko Stuebner --- arch/riscv/include/asm/csr.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index b98b3b6c9da2..2d79bca6ffe8 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -199,6 +199,11 @@ #define ENVCFG_CBIE_INV _AC(0x3, UL) #define ENVCFG_FIOM _AC(0x1, UL) +/* VCSR flags */ +#define VCSR_VXRM_MASK 3 +#define VCSR_VXRM_SHIFT 1 +#define VCSR_VXSAT_MASK 1 + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 -- 2.39.2 From: Heiko Stuebner There is at least one core implementing the wrong vector specification, which cannot claim to implement the v extension but still is able to do vectors similar to v. To not hack around this by claiming to do v, move the has_vector() return to act similar to riscv_noncoherent_supported() and move to a separate variable that can be set for example from errata code. Signed-off-by: Heiko Stuebner --- arch/riscv/include/asm/vector.h | 5 ++++- arch/riscv/kernel/setup.c | 6 ++++++ arch/riscv/kernel/vector.c | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 04c0b07bf6cd..315c96d2b4d0 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -79,6 +79,9 @@ void __init riscv_v_setup_ctx_cache(void); void riscv_v_thread_alloc(struct task_struct *tsk); +extern bool riscv_v_supported; +void riscv_vector_supported(void); + static inline u32 riscv_v_flags(void) { return READ_ONCE(current->thread.riscv_v_flags); @@ -86,7 +89,7 @@ static __always_inline bool has_vector(void) { - return riscv_has_extension_unlikely(RISCV_ISA_EXT_v); + return riscv_v_supported; } static inline void __riscv_v_vstate_clean(struct pt_regs *regs) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 971fe776e2f8..952dfb90525e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "head.h" @@ -308,6 +309,10 @@ void __init setup_arch(char **cmdline_p) init_rt_signal_env(); apply_boot_alternatives(); + if (IS_ENABLED(CONFIG_RISCV_ISA_V) && + riscv_isa_extension_available(NULL, v)) + riscv_vector_supported(); + if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) riscv_noncoherent_supported(); diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index f9c8e19ab301..74178fb71805 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -22,6 +22,9 @@ static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); +bool riscv_v_supported; +EXPORT_SYMBOL_GPL(riscv_v_supported); + unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -274,3 +277,8 @@ static int riscv_v_init(void) return riscv_v_sysctl_init(); } core_initcall(riscv_v_init); + +void riscv_vector_supported(void) +{ + riscv_v_supported = true; +} -- 2.39.2 From: Heiko Stuebner T-Head C9xx cores implement an older version (0.7.1) of the vector specification. Relevant changes concerning the kernel are: - different placement of the SR_VS bit for the vector unit status - different encoding of the vsetvli instruction - different instructions for loads and stores And a fixed VLEN of 128. The in-kernel access to vector instances is limited to the save and restore of process states so the above mentioned areas can simply be handled via the alternatives framework, similar to other T-Head specific issues. Signed-off-by: Heiko Stuebner --- arch/riscv/Kconfig.errata | 13 +++ arch/riscv/errata/thead/errata.c | 32 ++++++ arch/riscv/include/asm/csr.h | 24 ++++- arch/riscv/include/asm/errata_list.h | 45 ++++++++- arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++-- arch/riscv/kernel/vector.c | 2 +- 6 files changed, 238 insertions(+), 17 deletions(-) diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 0c8f4652cd82..b461312dd452 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_VECTOR + bool "Apply T-Head Vector errata" + depends on ERRATA_THEAD && RISCV_ISA_V + default y + help + The T-Head C9xx cores implement an earlier version 0.7.1 + of the vector extensions. + + This will apply the necessary errata to handle the non-standard + behaviour via when switch to and from vector mode for processes. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index c259dc925ec1..c41ec84bc8a5 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static bool errata_probe_pbmt(unsigned int stage, @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_vector(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { + /* + * Disable VECTOR to detect illegal usage of vector in kernel. + * This is normally done in _start_kernel but with the + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's + * vector-0.7.1 and the vector-1.0-bits are unused there. + */ + csr_clear(CSR_STATUS, SR_VS_THEAD); + return false; + } + + /* let has_vector() return true and set the static vlen */ + riscv_vector_supported(); + riscv_v_vsize = 128 / 8 * 32; + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + if (errata_probe_vector(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 2d79bca6ffe8..521b3b939e51 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -24,11 +24,25 @@ #define SR_FS_CLEAN _AC(0x00004000, UL) #define SR_FS_DIRTY _AC(0x00006000, UL) -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ -#define SR_VS_OFF _AC(0x00000000, UL) -#define SR_VS_INITIAL _AC(0x00000200, UL) -#define SR_VS_CLEAN _AC(0x00000400, UL) -#define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_OFF _AC(0x00000000, UL) + +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) + +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + +/* + * Always default to vector-1.0 handling in assembly and let the broken + * implementations handle their case separately. + */ +#ifdef __ASSEMBLY__ +#define SR_VS SR_VS_1_0 +#endif #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..ab21fadbe9c6 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -26,6 +26,7 @@ #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_PMU 1 #define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_VECTOR 3 #endif #ifdef __ASSEMBLY__ @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ : "=r" (__ovl) : \ : "memory") +#ifdef CONFIG_ERRATA_THEAD_VECTOR + +#define THEAD_C9XX_CSR_VXSAT 0x9 +#define THEAD_C9XX_CSR_VXRM 0xa + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 +#define ALT_SR_VS_THEAD_SHIFT 23 + +#define ALT_SR_VS(_val, prot) \ +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ + : "=r"(_val) \ + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(ALT_SR_VS_THEAD_SHIFT)) +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 315c96d2b4d0..fa47f60f81e3 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,55 @@ #include #include #include +#include + +#ifdef CONFIG_ERRATA_THEAD_VECTOR + +static inline unsigned long riscv_sr_vs(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS); + return val; +} + +static inline unsigned long riscv_sr_vs_initial(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_INITIAL); + return val; +} + +static inline unsigned long riscv_sr_vs_clean(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_CLEAN); + return val; +} + +static inline unsigned long riscv_sr_vs_dirty(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_DIRTY); + return val; +} + +#define SR_VS riscv_sr_vs() +#define SR_VS_INITIAL riscv_sr_vs_initial() +#define SR_VS_CLEAN riscv_sr_vs_clean() +#define SR_VS_DIRTY riscv_sr_vs_dirty() + +#else /* CONFIG_ERRATA_THEAD_VECTOR */ + +#define SR_VS SR_VS_1_0 +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 + +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ extern bool riscv_v_supported; void riscv_vector_supported(void); @@ -68,27 +117,76 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + dest->vlenb = riscv_v_vsize / 32; + asm volatile (ALTERNATIVE( "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" "csrr %3, " __stringify(CSR_VCSR) "\n\t" "csrr %4, " __stringify(CSR_VLENB) "\n\t" + __nops(4), + "csrs sstatus, t1\n\t" + "csrr %0, " __stringify(CSR_VSTART) "\n\t" + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" + "csrr %2, " __stringify(CSR_VL) "\n\t" + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" + "or %3, %3, t4\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr), "=r" (dest->vlenb) : "r"(t1) : "t4"); } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + asm volatile (ALTERNATIVE( ".option push\n\t" ".option arch, +v\n\t" "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" "csrw " __stringify(CSR_VCSR) ", %3\n\t" + __nops(6), + "csrs sstatus, t1\n\t" + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvl x0, %2, %1\n\t" + ".option pop\n\t" + "csrw " __stringify(CSR_VSTART) ", %0\n\t" + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + "r" (src->vcsr), "r"(t1) : "t4"); } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( + asm volatile (ALTERNATIVE( + "nop\n\t" ".option push\n\t" ".option arch, +v\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ "vle8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + ".option pop\n\t", + "mv t0, %1\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V8T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V16T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) + : "=&r" (vl) : "r" (datap) : "t0", "t4"); __vstate_csr_restore(restore_from); riscv_v_disable(); } diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 74178fb71805..51726890a4d0 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 insn = (u32)regs->badaddr; /* Do not handle if V is not supported, or disabled */ - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + if (!has_vector()) return false; /* If V has been enabled then it is not the first-use trap */ -- 2.39.2