diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r-- | arch/arm64/kvm/hyp/entry.S | 7 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/exception.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/hyp-entry.S | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/gfp.h | 45 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/memory.h | 7 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mm.h | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/cache.S | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 60 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/page_alloc.c | 112 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 33 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 61 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/reserved_mem.c | 3 |
15 files changed, 230 insertions, 149 deletions
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e831d3dfd50d..435346ea1504 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -13,6 +13,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_mte.h> #include <asm/kvm_ptrauth.h> .text @@ -51,6 +52,9 @@ alternative_else_nop_endif add x29, x0, #VCPU_CONTEXT + // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1) + mte_switch_to_guest x29, x1, x2 + // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // when this feature is enabled for kernel code. ptrauth_switch_to_hyp x1, x2, x3, x4, x5 + // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1) + mte_switch_to_hyp x1, x2, x3 + // Restore hyp's sp_el0 restore_sp_el0 x2, x3 diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 11541b94b328..0418399e0a20 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + if (kvm_has_mte(vcpu->kvm)) + new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5f49df4ffdd8..9aa9b73475c9 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -76,6 +76,7 @@ el1_trap: b __guest_exit el1_irq: +el1_fiq: get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit @@ -131,7 +132,6 @@ SYM_CODE_END(\label) invalid_vector el2t_error_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid .ltorg @@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_sync // Synchronous 64-bit EL1 valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_fiq // FIQ 64-bit EL1 valid_vect el1_error // Error 64-bit EL1 valid_vect el1_sync // Synchronous 32-bit EL1 valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_fiq // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index cce43bfe158f..de7e14c862e6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -14,6 +14,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); + } + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); + } + if (!has_vhe() && cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 18a4494337bd..fb0f523d1492 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include <nvhe/memory.h> #include <nvhe/spinlock.h> -#define HYP_NO_ORDER UINT_MAX +#define HYP_NO_ORDER USHRT_MAX struct hyp_pool { /* @@ -19,48 +19,13 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned int max_order; + unsigned short max_order; }; -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - p->refcount++; - hyp_spin_unlock(&pool->lock); -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - int ret; - - hyp_spin_lock(&pool->lock); - p->refcount--; - ret = (p->refcount == 0); - hyp_spin_unlock(&pool->lock); - - return ret; -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - if (p->refcount) { - hyp_spin_unlock(&pool->lock); - BUG(); - } - p->refcount = 1; - hyp_spin_unlock(&pool->lock); -} - /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); -void hyp_get_page(void *addr); -void hyp_put_page(void *addr); +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); /* Used pages cannot be freed */ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 42d81ec739fa..9c227d87c36d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -23,7 +23,7 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index fd78bde939ee..592b7edb3edb 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,12 +7,9 @@ #include <linux/types.h> -struct hyp_pool; struct hyp_page { - unsigned int refcount; - unsigned int order; - struct hyp_pool *pool; - struct list_head node; + unsigned short refcount; + unsigned short order; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 0095f6289742..8ec3a5a7744b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void) return res; } -static inline unsigned long host_s2_mem_pgtable_pages(void) +static inline unsigned long host_s2_pgtable_pages(void) { + unsigned long res; + /* * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - return __hyp_pgtable_total_pages() + 16; -} + res = __hyp_pgtable_total_pages() + 16; -static inline unsigned long host_s2_dev_pgtable_pages(void) -{ /* Allow 1 GiB for MMIO mappings */ - return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; } #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..958734f4d6b0 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,7 @@ #include <asm/assembler.h> #include <asm/alternative.h> -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4b60c0056c04..d938ce95d3bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,7 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -static struct hyp_pool host_s2_mem; -static struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_pool; /* * Copies of the host's CPU features registers holding sanitized values. @@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_mem, get_order(size)); + return hyp_alloc_pages(&host_s2_pool, get_order(size)); } static void *host_s2_zalloc_page(void *pool) @@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } -static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +static void host_s2_get_page(void *addr) +{ + hyp_get_page(&host_s2_pool, addr); +} + +static void host_s2_put_page(void *addr) +{ + hyp_put_page(&host_s2_pool, addr); +} + +static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; int ret; - pfn = hyp_virt_to_pfn(mem_pgt_pool); - nr_pages = host_s2_mem_pgtable_pages(); - ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); - if (ret) - return ret; - - pfn = hyp_virt_to_pfn(dev_pgt_pool); - nr_pages = host_s2_dev_pgtable_pages(); - ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + pfn = hyp_virt_to_pfn(pgt_pool_base); + nr_pages = host_s2_pgtable_pages(); + ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); if (ret) return ret; @@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = host_s2_get_page, + .put_page = host_s2_put_page, }; return 0; @@ -86,7 +89,7 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; int ret; @@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); - ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; @@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end) } static inline int __host_stage2_idmap(u64 start, u64 end, - enum kvm_pgtable_prot prot, - struct hyp_pool *pool) + enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, pool); + prot, &host_s2_pool); } static int host_stage2_idmap(u64 addr) @@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr) enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); - struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; int ret; if (is_memory) @@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); - if (is_memory || ret != -ENOMEM) + ret = __host_stage2_idmap(range.start, range.end, prot); + if (ret != -ENOMEM) goto unlock; /* - * host_s2_mem has been provided with enough pages to cover all of - * memory with page granularity, so we should never hit the ENOMEM case. - * However, it is difficult to know how much of the MMIO range we will - * need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of memory + * with page granularity, but it is difficult to know how much of the + * MMIO range we will need to cover upfront, so we may need to 'recycle' + * the pages if we run out. */ ret = host_stage2_unmap_dev_all(); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); + ret = __host_stage2_idmap(range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) hyp_spin_lock(&host_kvm.lock); ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_mem, pkvm_hyp_id); + &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 237e03bf0cb1..41fc25bdfb34 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); - if (!buddy || buddy->order != order || list_empty(&buddy->node)) + if (!buddy || buddy->order != order || buddy->refcount) return NULL; return buddy; } +/* + * Pages that are available for allocation are tracked in free-lists, so we use + * the pages themselves to store the list nodes to avoid wasting space. As the + * allocator always returns zeroed pages (which are zeroed on the hyp_put_page() + * path to optimize allocation speed), we also need to clean-up the list node in + * each page when we take it out of the list. + */ +static inline void page_remove_from_list(struct hyp_page *p) +{ + struct list_head *node = hyp_page_to_virt(p); + + __list_del_entry(node); + memset(node, 0, sizeof(*node)); +} + +static inline void page_add_to_list(struct hyp_page *p, struct list_head *head) +{ + struct list_head *node = hyp_page_to_virt(p); + + INIT_LIST_HEAD(node); + list_add_tail(node, head); +} + +static inline struct hyp_page *node_to_page(struct list_head *node) +{ + return hyp_virt_to_page(node); +} + static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned int order = p->order; + unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool, break; /* Take the buddy out of its list, and coallesce with @p */ - list_del_init(&buddy->node); + page_remove_from_list(buddy); buddy->order = HYP_NO_ORDER; p = min(p, buddy); } /* Mark the new head, and insert it */ p->order = order; - list_add_tail(&p->node, &pool->free_area[order]); -} - -static void hyp_attach_page(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - __hyp_attach_page(pool, p); - hyp_spin_unlock(&pool->lock); + page_add_to_list(p, &pool->free_area[order]); } static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy; - list_del_init(&p->node); + page_remove_from_list(p); while (p->order > order) { /* * The buddy of order n - 1 currently has HYP_NO_ORDER as it @@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, p->order--; buddy = __find_buddy_nocheck(pool, p, p->order); buddy->order = p->order; - list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + page_add_to_list(buddy, &pool->free_area[buddy->order]); } return p; } -void hyp_put_page(void *addr) +static inline void hyp_page_ref_inc(struct hyp_page *p) { - struct hyp_page *p = hyp_virt_to_page(addr); + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} + +static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) +{ if (hyp_page_ref_dec_and_test(p)) - hyp_attach_page(p); + __hyp_attach_page(pool, p); +} + +/* + * Changes to the buddy tree and page refcounts must be done with the hyp_pool + * lock held. If a refcount change requires an update to the buddy tree (e.g. + * hyp_put_page()), both operations must be done within the same critical + * section to guarantee transient states (e.g. a page with null refcount but + * not yet attached to a free list) can't be observed by well-behaved readers. + */ +void hyp_put_page(struct hyp_pool *pool, void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + hyp_spin_lock(&pool->lock); + __hyp_put_page(pool, p); + hyp_spin_unlock(&pool->lock); } -void hyp_get_page(void *addr) +void hyp_get_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); + hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { - unsigned int i = order; + unsigned short i = order; struct hyp_page *p; hyp_spin_lock(&pool->lock); @@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) } /* Extract it from the tree at the right order */ - p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = node_to_page(pool->free_area[i].next); p = __hyp_extract_page(pool, p, order); - hyp_spin_unlock(&pool->lock); hyp_set_page_refcounted(p); + hyp_spin_unlock(&pool->lock); return hyp_page_to_virt(p); } @@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - memset(p, 0, sizeof(*p) * nr_pages); for (i = 0; i < nr_pages; i++) { - p[i].pool = pool; - INIT_LIST_HEAD(&p[i].node); + p[i].order = 0; + hyp_set_page_refcounted(&p[i]); } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) - __hyp_attach_page(pool, &p[i]); + __hyp_put_page(pool, &p[i]); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a3d3a275344e..0b574d106519 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; -static void *host_s2_mem_pgt_base; -static void *host_s2_dev_pgt_base; +static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) @@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; - nr_pages = host_s2_mem_pgtable_pages(); - host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_mem_pgt_base) - return -ENOMEM; - - nr_pages = host_s2_dev_pgtable_pages(); - host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_dev_pgt_base) + nr_pages = host_s2_pgtable_pages(); + host_s2_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_pgt_base) return -ENOMEM; return 0; @@ -134,7 +128,8 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area(params, sizeof(*params)); + dcache_clean_inval_poc((unsigned long)params, + (unsigned long)params + sizeof(*params)); } } @@ -143,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg) return hyp_alloc_pages(&hpool, 0); } +static void hpool_get_page(void *addr) +{ + hyp_get_page(&hpool, addr); +} + +static void hpool_put_page(void *addr) +{ + hyp_put_page(&hpool, addr); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -158,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + ret = kvm_host_prepare_stage2(host_s2_pgt_base); if (ret) goto out; @@ -166,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void) .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = hpool_get_page, + .put_page = hpool_put_page, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 83dc3b271bc5..38ed0f6f2703 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, * you should be running with VHE enabled. */ if (icache_is_vpipt()) - __flush_icache_all(); + icache_inval_all_pou(); __tlb_switch_to_host(&cxt); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..05321f4165e3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, mm_ops->put_page(ptep); } +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) @@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } + /* Perform CMOs before installation of the guest stage-2 PTE */ + if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), + granule); + + if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) + mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); + smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); @@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) @@ -839,8 +853,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); + + dcache_clean_inval_poc((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); } if (childp) @@ -861,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) } struct stage2_attr_data { - kvm_pte_t attr_set; - kvm_pte_t attr_clr; - kvm_pte_t pte; - u32 level; + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -873,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, { kvm_pte_t pte = *ptep; struct stage2_attr_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; @@ -887,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * but worst-case the access flag update gets lost and will be * set on the next access instead. */ - if (data->pte != pte) + if (data->pte != pte) { + /* + * Invalidate instruction cache before updating the guest + * stage-2 PTE if we are going to add executable permission. + */ + if (mm_ops->icache_inval_pou && + stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); WRITE_ONCE(*ptep, pte); + } return 0; } @@ -903,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, @@ -988,11 +1017,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; + kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); + pte_follow = kvm_pte_follow(pte, mm_ops); + dcache_clean_inval_poc((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 83ca23ac259b..d654921dd09b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_mem_pgtable_pages(); - hyp_mem_pages += host_s2_dev_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages |