diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2024-02-03 11:45:23 +0100 |
---|---|---|
committer | Heiko Carstens <hca@linux.ibm.com> | 2024-02-16 14:30:17 +0100 |
commit | dcd3e1de9d17dc43dfed87a9fc814b9dec508043 (patch) | |
tree | f4dc28be4bafb8cde3a619428250517fac82244d /arch/s390/include/asm/fpu-insn.h | |
parent | cb2a1dd589a0ce97429bf2beeb560e5b030c2ccc (diff) |
s390/checksum: provide csum_partial_copy_nocheck()
With csum_partial(), which reads all bytes into registers it is easy to
also implement csum_partial_copy_nocheck() which copies the buffer while
calculating its checksum.
For a 512 byte buffer this reduces the runtime by 19%. Compared to the old
generic variant (memcpy() + cksm instruction) runtime is reduced by 42%).
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/fpu-insn.h')
-rw-r--r-- | arch/s390/include/asm/fpu-insn.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index 7e9997fa45d3..35c4fbe0bdd6 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -241,6 +241,64 @@ static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index) #ifdef CONFIG_CC_IS_CLANG +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("\n" + " la 1,%[vxr]\n" + " VST %[v1],0,,1\n" + : [vxr] "=R" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +#ifdef CONFIG_CC_IS_CLANG + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("\n" + " la 1,%[vxr]\n" + " VSTL %[v1],%[index],0,1\n" + : [vxr] "=R" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +#ifdef CONFIG_CC_IS_CLANG + #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ |