From 25b92693a1b67a47b0c64a3410009d09e9658412 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 13 Feb 2020 12:14:52 +0000 Subject: arm64: mm: convert cpu_do_switch_mm() to C There's no reason that cpu_do_switch_mm() needs to be written as an assembly function, and having it as a C function would make it easier to maintain. This patch converts cpu_do_switch_mm() to C, removing code that this change makes redundant (e.g. the mmid macro). Since the header comment was stale and the prototype now implies all the necessary information, this comment is removed. The 'pgd_phys' argument is made a phys_addr_t to match the return type of virt_to_phys(). At the same time, post_ttbr_update_workaround() is updated to use IS_ENABLED(), which allows the compiler to figure out it can elide calls for !CONFIG_CAVIUM_ERRATUM_27456 builds. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Will Deacon [catalin.marinas@arm.com: change comments from asm-style to C-style] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 6 ------ arch/arm64/include/asm/mmu_context.h | 2 ++ arch/arm64/include/asm/proc-fns.h | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aca337d79d12..af03001293c6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -256,12 +256,6 @@ alternative_endif ldr \rd, [\rn, #VMA_VM_MM] .endm -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm /* * read_ctr - read CTR_EL0. If the system has mismatched register fields, * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3827ff4040a3..ab46187c6300 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -46,6 +46,8 @@ static inline void cpu_set_reserved_ttbr0(void) isb(); } +void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); + static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index a2ce65a0c1fa..0d5d1f0525eb 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -13,11 +13,9 @@ #include -struct mm_struct; struct cpu_suspend_ctx; extern void cpu_do_idle(void); -extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); -- cgit From 90765f745b08fdf069e4562f6985bdba0fefad8d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 28 Feb 2020 12:43:55 +0000 Subject: arm64: Update comment for ASID() macro Commit 25b92693a1b6 ("arm64: mm: convert cpu_do_switch_mm() to C") added a new use of the ASID() macro, so update the comment in asm/mmu.h which reasons about why an atomic reload of 'mm->context.id.counter' is not required. Cc: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index e4d862420bb4..21a4bcfdb378 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -23,9 +23,9 @@ typedef struct { } mm_context_t; /* - * This macro is only used by the TLBI code, which cannot race with an - * ASID change and therefore doesn't need to reload the counter using - * atomic64_read. + * This macro is only used by the TLBI and low-level switch_mm() code, + * neither of which can race with an ASID change. We therefore don't + * need to reload the counter using atomic64_read(). */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -- cgit From 2c9d45b43c39e26fd2a73f2203321cdaee98b58b Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:21 +0000 Subject: arm64: add support for the AMU extension v1 The activity monitors extension is an optional extension introduced by the ARMv8.4 CPU architecture. This implements basic support for version 1 of the activity monitors architecture, AMUv1. This support includes: - Extension detection on each CPU (boot, secondary, hotplugged) - Register interface for AMU aarch64 registers Signed-off-by: Ionela Voinescu Reviewed-by: Valentin Schneider Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/sysreg.h | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 865e0253fc1e..185e44aa2713 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -58,7 +58,8 @@ #define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 #define ARM64_HAS_E0PD 49 #define ARM64_HAS_RNG 50 +#define ARM64_HAS_AMU_EXTN 51 -#define ARM64_NCAPS 51 +#define ARM64_NCAPS 52 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92ef9539874a..485e069d8768 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -678,6 +678,11 @@ static inline bool cpu_has_hw_af(void) ID_AA64MMFR1_HADBS_SHIFT); } +#ifdef CONFIG_ARM64_AMU_EXTN +/* Check whether the cpu supports the Activity Monitors Unit (AMU) */ +extern bool cpu_has_amu_feat(int cpu); +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b91570ff9db1..085d248f824e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -386,6 +386,42 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +/* Definitions for system register interface to AMU for ARMv8.4 onwards */ +#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) +#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) +#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1) +#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2) +#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3) +#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4) +#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5) +#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0) +#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1) + +/* + * Group 0 of activity monitors (architected): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 010:n<3> n<2:0> + * Type: 11 011 1101 011:n<3> n<2:0> + * n: 0-15 + * + * Group 1 of activity monitors (auxiliary): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 110:n<3> n<2:0> + * Type: 11 011 1101 111:n<3> n<2:0> + * n: 0-15 + */ + +#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPE0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPE1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) + +/* AMU v1: Fixed (architecturally defined) activity monitors */ +#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) +#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1) +#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2) +#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3) + #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) @@ -598,6 +634,7 @@ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_DIT_SHIFT 48 +#define ID_AA64PFR0_AMU_SHIFT 44 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -608,6 +645,7 @@ #define ID_AA64PFR0_EL1_SHIFT 4 #define ID_AA64PFR0_EL0_SHIFT 0 +#define ID_AA64PFR0_AMU 0x1 #define ID_AA64PFR0_SVE 0x1 #define ID_AA64PFR0_RAS_V1 0x1 #define ID_AA64PFR0_FP_NI 0xf -- cgit From 87a1f063464afd934f0f22aac710ca65bef77af3 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:22 +0000 Subject: arm64: trap to EL1 accesses to AMU counters from EL0 The activity monitors extension is an optional extension introduced by the ARMv8.4 CPU architecture. In order to access the activity monitors counters safely, if desired, the kernel should detect the presence of the extension through the feature register, and mediate the access. Therefore, disable direct accesses to activity monitors counters from EL0 (userspace) and trap them to EL1 (kernel). To be noted that the ARM64_AMU_EXTN kernel config does not have an effect on this code. Given that the amuserenr_el0 resets to an UNKNOWN value, setting the trap of EL0 accesses to EL1 is always attempted for safety and security considerations. Therefore firmware should still ensure accesses to AMU registers are not trapped in EL2/EL3 as this code cannot be bypassed if the CPU implements the Activity Monitors Unit. Signed-off-by: Ionela Voinescu Reviewed-by: James Morse Reviewed-by: Valentin Schneider Reviewed-by: Suzuki K Poulose Cc: Steve Capper Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aca337d79d12..c5487806273f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -430,6 +430,16 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU 9000: .endm +/* + * reset_amuserenr_el0 - reset AMUSERENR_EL0 if AMUv1 present + */ + .macro reset_amuserenr_el0, tmpreg + mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1 + ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4 + cbz \tmpreg, .Lskip_\@ // Skip if no AMU present + msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0 +.Lskip_\@: + .endm /* * copy_page - copy src to dest using temp registers t1-t8 */ -- cgit From 4fcdf106a4330bb5c2306a1efbb3af3b7c0db537 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:23 +0000 Subject: arm64/kvm: disable access to AMU registers from kvm guests Access to the AMU counters should be disabled by default in kvm guests, as information from the counters might reveal activity in other guests or activity on the host. Therefore, disable access to AMU registers from EL0 and EL1 in kvm guests by: - Hiding the presence of the extension in the feature register (SYS_ID_AA64PFR0_EL1) on the VCPU. - Disabling access to the AMU registers before switching to the guest. - Trapping accesses and injecting an undefined instruction into the guest. Signed-off-by: Ionela Voinescu Reviewed-by: Suzuki K Poulose Reviewed-by: Valentin Schneider Acked-by: Marc Zyngier Cc: Will Deacon Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Julien Thierry Cc: James Morse Cc: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_arm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6e5d839f42b5..51c1d9918999 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -267,6 +267,7 @@ /* Hyp Coprocessor Trap Register */ #define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) -- cgit From cd0ed03a8903a0b0c6fc36e32d133d1ddfe70cd6 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:26 +0000 Subject: arm64: use activity monitors for frequency invariance The Frequency Invariance Engine (FIE) is providing a frequency scaling correction factor that helps achieve more accurate load-tracking. So far, for arm and arm64 platforms, this scale factor has been obtained based on the ratio between the current frequency and the maximum supported frequency recorded by the cpufreq policy. The setting of this scale factor is triggered from cpufreq drivers by calling arch_set_freq_scale. The current frequency used in computation is the frequency requested by a governor, but it may not be the frequency that was implemented by the platform. This correction factor can also be obtained using a core counter and a constant counter to get information on the performance (frequency based only) obtained in a period of time. This will more accurately reflect the actual current frequency of the CPU, compared with the alternative implementation that reflects the request of a performance level from the OS. Therefore, implement arch_scale_freq_tick to use activity monitors, if present, for the computation of the frequency scale factor. The use of AMU counters depends on: - CONFIG_ARM64_AMU_EXTN - depents on the AMU extension being present - CONFIG_CPU_FREQ - the current frequency obtained using counter information is divided by the maximum frequency obtained from the cpufreq policy. While it is possible to have a combination of CPUs in the system with and without support for activity monitors, the use of counters for frequency invariance is only enabled for a CPU if all related CPUs (CPUs in the same frequency domain) support and have enabled the core and constant activity monitor counters. In this way, there is a clear separation between the policies for which arch_set_freq_scale (cpufreq based FIE) is used, and the policies for which arch_scale_freq_tick (counter based FIE) is used to set the frequency scale factor. For this purpose, a late_initcall_sync is registered to trigger validation work for policies that will enable or disable the use of AMU counters for frequency invariance. If CONFIG_CPU_FREQ is not defined, the use of counters is enabled on all CPUs only if all possible CPUs correctly support the necessary counters. Signed-off-by: Ionela Voinescu Reviewed-by: Lukasz Luba Acked-by: Sudeep Holla Cc: Sudeep Holla Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/topology.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index a4d945db95a2..21d4d40d6243 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -16,6 +16,15 @@ int pcibus_to_node(struct pci_bus *bus); #include +#ifdef CONFIG_ARM64_AMU_EXTN +/* + * Replace task scheduler's default counter-based + * frequency-invariance scale factor setting. + */ +void topology_scale_freq_tick(void); +#define arch_scale_freq_tick topology_scale_freq_tick +#endif /* CONFIG_ARM64_AMU_EXTN */ + /* Replace task scheduler's default frequency-invariant accounting */ #define arch_scale_freq_capacity topology_get_freq_scale -- cgit From 27afb236fe5adaa3911e47c91057ba783549226f Mon Sep 17 00:00:00 2001 From: 王程刚 Date: Mon, 9 Mar 2020 15:21:42 +0800 Subject: arch/arm64: fix typo in a comment Fix typo in a comment in arch/arm64/include/asm/esr.h "Unallocted" -> "Unallocated" Signed-off-by: Chenggang Wang Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index cb29253ae86b..6a395a7e6707 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -60,7 +60,7 @@ #define ESR_ELx_EC_BKPT32 (0x38) /* Unallocated EC: 0x39 */ #define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ -/* Unallocted EC: 0x3B */ +/* Unallocated EC: 0x3B */ #define ESR_ELx_EC_BRK64 (0x3C) /* Unallocated EC: 0x3D - 0x3F */ #define ESR_ELx_EC_MAX (0x3F) -- cgit From e9c7ddbf8b4b6a291bf3b5bfa7c883235164d9be Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 20 Jan 2020 18:52:29 +0000 Subject: arm64: csum: Optimise IPv6 header checksum Throwing our __uint128_t idioms at csum_ipv6_magic() makes it about 1.3x-2x faster across a range of microarchitecture/compiler combinations. Not much in absolute terms, but every little helps. Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/checksum.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 8d2a7de39744..b6f7bc6da5fb 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -5,7 +5,12 @@ #ifndef __ASM_CHECKSUM_H #define __ASM_CHECKSUM_H -#include +#include + +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); static inline __sum16 csum_fold(__wsum csum) { -- cgit From 1db5cdeccd813330aaab19b3fccab15e1d07fe12 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 21 Feb 2020 14:50:21 +0000 Subject: arm64: cpufeature: add cpus_have_final_cap() When cpus_have_const_cap() was originally introduced it was intended to be safe in hyp context, where it is not safe to access the cpu_hwcaps array as cpus_have_cap() did. For more details see commit: a4023f682739439b ("arm64: Add hypervisor safe helper for checking constant capabilities") We then made use of cpus_have_const_cap() throughout the kernel. Subsequently, we had to defer updating the static_key associated with each capability in order to avoid lockdep complaints. To avoid breaking kernel-wide usage of cpus_have_const_cap(), this was updated to fall back to the cpu_hwcaps array if called before the static_keys were updated. As the kvm hyp code was only called later than this, the fallback is redundant but not functionally harmful. For more details, see commit: 63a1e1c95e60e798 ("arm64/cpufeature: don't use mutex in bringup path") Today we have more users of cpus_have_const_cap() which are only called once the relevant static keys are initialized, and it would be beneficial to avoid the redundant code. To that end, this patch adds a new cpus_have_final_cap(), helper which is intend to be used in code which is only run once capabilities have been finalized, and will never check the cpus_hwcap array. This helps the compiler to generate better code as it no longer needs to generate code to address and test the cpus_hwcap array. To help catch misuse, cpus_have_final_cap() will BUG() if called before capabilities are finalized. In hyp context, BUG() will result in a hyp panic, but the specific BUG() instance will not be identified in the usual way. Comments are added to the various cpus_have_*_cap() helpers to describe the constraints on when they can be used. For clarity cpus_have_cap() is moved above the other helpers. Similarly the helpers are updated to use system_capabilities_finalized() consistently, and this is made __always_inline as required by its new callers. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyngier Reviewed-by: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 58 ++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 11 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92ef9539874a..940b2b67b428 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -390,14 +390,16 @@ unsigned long cpu_get_elf_hwcap2(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) -/* System capability check for constant caps */ -static __always_inline bool __cpus_have_const_cap(int num) +static __always_inline bool system_capabilities_finalized(void) { - if (num >= ARM64_NCAPS) - return false; - return static_branch_unlikely(&cpu_hwcap_keys[num]); + return static_branch_likely(&arm64_const_caps_ready); } +/* + * Test for a capability with a runtime check. + * + * Before the capability is detected, this returns false. + */ static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) @@ -405,14 +407,53 @@ static inline bool cpus_have_cap(unsigned int num) return test_bit(num, cpu_hwcaps); } +/* + * Test for a capability without a runtime check. + * + * Before capabilities are finalized, this returns false. + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ +static __always_inline bool __cpus_have_const_cap(int num) +{ + if (num >= ARM64_NCAPS) + return false; + return static_branch_unlikely(&cpu_hwcap_keys[num]); +} + +/* + * Test for a capability, possibly with a runtime check. + * + * Before capabilities are finalized, this behaves as cpus_have_cap(). + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ static __always_inline bool cpus_have_const_cap(int num) { - if (static_branch_likely(&arm64_const_caps_ready)) + if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else return cpus_have_cap(num); } +/* + * Test for a capability without a runtime check. + * + * Before capabilities are finalized, this will BUG(). + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ +static __always_inline bool cpus_have_final_cap(int num) +{ + if (system_capabilities_finalized()) + return __cpus_have_const_cap(num); + else + BUG(); +} + static inline void cpus_set_cap(unsigned int num) { if (num >= ARM64_NCAPS) { @@ -613,11 +654,6 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } -static inline bool system_capabilities_finalized(void) -{ - return static_branch_likely(&arm64_const_caps_ready); -} - #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 -- cgit From c17a290f7e7e59d24b4507736b7b40b0eb5f8f1f Mon Sep 17 00:00:00 2001 From: "glider@google.com" Date: Thu, 12 Mar 2020 16:59:20 +0100 Subject: arm64: define __alloc_zeroed_user_highpage When running the kernel with init_on_alloc=1, calling the default implementation of __alloc_zeroed_user_highpage() from include/linux/highmem.h leads to double-initialization of the allocated page (first by the page allocator, then by clear_user_page(). Calling alloc_page_vma() with __GFP_ZERO, similarly to e.g. x86, seems to be enough to ensure the user page is zeroed only once. Signed-off-by: Alexander Potapenko Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/page.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index d39ddb258a04..75d6cd23a679 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -21,6 +21,10 @@ extern void __cpu_copy_user_page(void *to, const void *from, extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + #define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) #define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) -- cgit From 8e35aa642ee4dab01b16cc4b2df59d1936f3b3c2 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 2 Mar 2020 18:17:50 +0000 Subject: arm64: cpufeature: Extract capped perfmon fields When emulating ID registers there is often a need to cap the version bits of a feature such that the guest will not use features that the host is not aware of. For example, when KVM mediates access to the PMU by emulating register accesses. Let's add a helper that extracts a performance monitors ID field and caps the version to a given value. Fields that identify the version of the Performance Monitors Extension do not follow the standard ID scheme, and instead follow the scheme described in ARM DDI 0487E.a page D13-2825 "Alternative ID scheme used for the Performance Monitors Extension version". The value 0xF means an IMPLEMENTATION DEFINED PMU is present, and values 0x0-OxE can be treated the same as an unsigned field with 0x0 meaning no PMU is present. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose [Mark: rework to handle perfmon fields] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92ef9539874a..186f4e19207e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -447,6 +447,29 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } +/* + * Fields that identify the version of the Performance Monitors Extension do + * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825, + * "Alternative ID scheme used for the Performance Monitors Extension version". + */ +static inline u64 __attribute_const__ +cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) +{ + u64 val = cpuid_feature_extract_unsigned_field(features, field); + u64 mask = GENMASK_ULL(field + 3, field); + + /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ + if (val == 0xf) + val = 0; + + if (val > cap) { + features &= ~mask; + features |= (cap << field) & mask; + } + + return features; +} + static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); -- cgit From c854188ea01062f5a5fd7f05658feb1863774eaa Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 2 Mar 2020 18:17:51 +0000 Subject: KVM: arm64: limit PMU version to PMUv3 for ARMv8.1 We currently expose the PMU version of the host to the guest via emulation of the DFR0_EL1 and AA64DFR0_EL1 debug feature registers. However many of the features offered beyond PMUv3 for 8.1 are not supported in KVM. Examples of this include support for the PMMIR registers (added in PMUv3 for ARMv8.4) and 64-bit event counters added in (PMUv3 for ARMv8.5). Let's trap the Debug Feature Registers in order to limit PMUVer/PerfMon in the Debug Feature Registers to PMUv3 for ARMv8.1 to avoid unexpected behaviour. Both ID_AA64DFR0.PMUVer and ID_DFR0.PerfMon follow the "Alternative ID scheme used for the Performance Monitors Extension version" where 0xF means an IMPLEMENTATION DEFINED PMU is implemented, and values 0x0-0xE are treated as with an unsigned field (with 0x0 meaning no PMU is present). As we don't expect to expose an IMPLEMENTATION DEFINED PMU, and our cap is below 0xF, we can treat these fields as unsigned when applying the cap. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose [Mark: make field names consistent, use perfmon cap] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b91570ff9db1..d8f1eed070f0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -702,6 +702,12 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_PMUVER_8_1 0x4 + +#define ID_DFR0_PERFMON_SHIFT 24 + +#define ID_DFR0_PERFMON_8_1 0x4 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 -- cgit From 8673e02e58410e6c4cefa499efa846286e45a991 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 2 Mar 2020 18:17:52 +0000 Subject: arm64: perf: Add support for ARMv8.5-PMU 64-bit counters At present ARMv8 event counters are limited to 32-bits, though by using the CHAIN event it's possible to combine adjacent counters to achieve 64-bits. The perf config1:0 bit can be set to use such a configuration. With the introduction of ARMv8.5-PMU support, all event counters can now be used as 64-bit counters. Let's enable 64-bit event counters where support exists. Unless the user sets config1:0 we will adjust the counter value such that it overflows upon 32-bit overflow. This follows the same behaviour as the cycle counter which has always been (and remains) 64-bits. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose [Mark: fix ID field names, compare with 8.5 value] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 3 ++- arch/arm64/include/asm/sysreg.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2bdbc79bbd01..e7765b62c712 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -176,9 +176,10 @@ #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d8f1eed070f0..9b66c5b5b36f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -702,7 +702,11 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_PMUVER_8_0 0x1 #define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf #define ID_DFR0_PERFMON_SHIFT 24 -- cgit From 6885fb129be30c627eb2f5b1498dba498ff6c037 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Mar 2020 10:01:43 +1100 Subject: arm64: Rename cpu_read_ops() to init_cpu_ops() This renames cpu_read_ops() to init_cpu_ops() as the function is only called in initialization phase. Also, we will introduce get_cpu_ops() in the subsequent patches, to retireve the CPU operation by the given CPU index. The usage of cpu_read_ops() and get_cpu_ops() are difficult to be distinguished from their names. Signed-off-by: Gavin Shan Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu_ops.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 86aabf1e0199..baa13b5db2ca 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -56,11 +56,11 @@ struct cpu_operations { }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; -int __init cpu_read_ops(int cpu); +int __init init_cpu_ops(int cpu); -static inline void __init cpu_read_bootcpu_ops(void) +static inline void __init init_bootcpu_ops(void) { - cpu_read_ops(0); + init_cpu_ops(0); } #endif /* ifndef __ASM_CPU_OPS_H */ -- cgit From de58ed5e16e62f36c7ed05552f18b7f9c647dcaf Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Mar 2020 10:01:44 +1100 Subject: arm64: Introduce get_cpu_ops() helper function This introduces get_cpu_ops() to return the CPU operations according to the given CPU index. For now, it simply returns the @cpu_ops[cpu] as before. Also, helper function __cpu_try_die() is introduced to be shared by cpu_die() and ipi_cpu_crash_stop(). So it shouldn't introduce any functional changes. Signed-off-by: Gavin Shan Signed-off-by: Catalin Marinas Acked-by: Mark Rutland --- arch/arm64/include/asm/cpu_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index baa13b5db2ca..d28e8f37d3b4 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -55,8 +55,8 @@ struct cpu_operations { #endif }; -extern const struct cpu_operations *cpu_ops[NR_CPUS]; int __init init_cpu_ops(int cpu); +extern const struct cpu_operations *get_cpu_ops(int cpu); static inline void __init init_bootcpu_ops(void) { -- cgit