summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_64_mmu_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_mmu_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c742
1 files changed, 378 insertions, 364 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b42812e014c0..f305395cf26e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
@@ -29,7 +18,6 @@
#include <linux/file.h>
#include <linux/debugfs.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/book3s/64/mmu-hash.h>
@@ -37,7 +25,10 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
+#include <asm/pte-walk.h>
+#include "book3s.h"
+#include "book3s_hv.h"
#include "trace_hv.h"
//#define DEBUG_RESIZE_HPT 1
@@ -63,17 +54,21 @@ struct kvm_resize_hpt {
struct work_struct work;
u32 order;
- /* These fields protected by kvm->lock */
+ /* These fields protected by kvm->arch.mmu_setup_lock */
+
+ /* Possible values and their usage:
+ * <0 an error occurred during allocation,
+ * -EBUSY allocation is in the progress,
+ * 0 allocation made successfully.
+ */
int error;
- bool prepare_done;
- /* Private to the work thread, until prepare_done is true,
- * then protected by kvm->resize_hpt_sem */
+ /* Private to the work thread, until error != -EBUSY,
+ * then protected by kvm->arch.mmu_setup_lock.
+ */
struct kvm_hpt_info hpt;
};
-static void kvmppc_rmap_reset(struct kvm *kvm);
-
int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
{
unsigned long hpt = 0;
@@ -103,9 +98,8 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
npte = 1ul << (order - 4);
/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * npte);
+ rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
if (!rev) {
- pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n");
if (cma)
kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
@@ -127,28 +121,31 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
kvm->arch.hpt = *info;
kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
- pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
+ pr_debug("KVM guest htab at %lx (order %ld), LPID %llx\n",
info->virt, (long)info->order, kvm->arch.lpid);
}
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
+int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
{
- long err = -EBUSY;
+ int err = -EBUSY;
struct kvm_hpt_info info;
- if (kvm_is_radix(kvm))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- if (kvm->arch.hpte_setup_done) {
- kvm->arch.hpte_setup_done = 0;
- /* order hpte_setup_done vs. vcpus_running */
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
- kvm->arch.hpte_setup_done = 1;
+ kvm->arch.mmu_ready = 1;
goto out;
}
}
+ if (kvm_is_radix(kvm)) {
+ err = kvmppc_switch_mmu_to_hpt(kvm);
+ if (err)
+ goto out;
+ }
+
if (kvm->arch.hpt.order == order) {
/* We already have a suitable HPT */
@@ -158,8 +155,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
- /* Ensure that each vcpu will flush its TLB on next entry. */
- cpumask_setall(&kvm->arch.need_tlb_flush);
err = 0;
goto out;
}
@@ -175,15 +170,20 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
kvmppc_set_hpt(kvm, &info);
out:
- mutex_unlock(&kvm->lock);
+ if (err == 0)
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return err;
}
void kvmppc_free_hpt(struct kvm_hpt_info *info)
{
vfree(info->rev);
+ info->rev = NULL;
if (info->cma)
- kvm_free_hpt_cma(virt_to_page(info->virt),
+ kvm_free_hpt_cma(virt_to_page((void *)info->virt),
1 << (info->order - PAGE_SHIFT));
else if (info->virt)
free_pages(info->virt, info->order - PAGE_SHIFT);
@@ -257,34 +257,36 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
int kvmppc_mmu_hv_init(void)
{
- unsigned long host_lpid, rsvd_lpid;
+ unsigned long nr_lpids;
- if (!cpu_has_feature(CPU_FTR_HVMODE))
+ if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
- host_lpid = mfspr(SPRN_LPID);
- rsvd_lpid = LPID_RSVD;
-
- kvmppc_init_lpid(rsvd_lpid + 1);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (WARN_ON(mfspr(SPRN_LPID) != 0))
+ return -EINVAL;
+ nr_lpids = 1UL << mmu_lpid_bits;
+ } else {
+ nr_lpids = 1UL << KVM_MAX_NESTED_GUESTS_SHIFT;
+ }
- kvmppc_claim_lpid(host_lpid);
- /* rsvd_lpid is reserved for use in partition switching */
- kvmppc_claim_lpid(rsvd_lpid);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ WARN_ON(nr_lpids != 1UL << 12);
+ else
+ WARN_ON(nr_lpids != 1UL << 10);
- return 0;
-}
+ /*
+ * Reserve the last implemented LPID use in partition
+ * switching for POWER7 and POWER8.
+ */
+ nr_lpids -= 1;
+ }
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
+ kvmppc_init_lpid(nr_lpids);
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
+ return 0;
}
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
@@ -293,11 +295,10 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
{
long ret;
- /* Protect linux PTE lookup from page table destruction */
- rcu_read_lock_sched(); /* this disables preemption too */
+ preempt_disable();
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
- current->mm->pgd, false, pte_idx_ret);
- rcu_read_unlock_sched();
+ kvm->mm->pgd, false, pte_idx_ret);
+ preempt_enable();
if (ret == H_TOO_HARD) {
/* this can't happen */
pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
@@ -333,7 +334,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
{
unsigned long ra_mask;
- ra_mask = hpte_page_size(v, r) - 1;
+ ra_mask = kvmppc_actual_pgsz(v, r) - 1;
return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
}
@@ -346,8 +347,11 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned long pp, key;
unsigned long v, orig_v, gr;
__be64 *hptep;
- int index;
- int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+ long int index;
+ int virtmode = __kvmppc_get_msr_hv(vcpu) & (data ? MSR_DR : MSR_IR);
+
+ if (kvm_is_radix(vcpu->kvm))
+ return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite);
/* Get SLB entry */
if (virtmode) {
@@ -382,7 +386,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Get PP bits and key for permission check */
pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
- key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ key = (__kvmppc_get_msr_hv(vcpu) & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
key &= slb_v;
/* Calculate permissions */
@@ -412,20 +416,43 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
* embodied here.) If the instruction isn't a load or store, then
* this doesn't return anything useful.
*/
-static int instruction_is_store(unsigned int instr)
+static int instruction_is_store(ppc_inst_t instr)
{
unsigned int mask;
+ unsigned int suffix;
mask = 0x10000000;
- if ((instr & 0xfc000000) == 0x7c000000)
+ suffix = ppc_inst_val(instr);
+ if (ppc_inst_prefixed(instr))
+ suffix = ppc_inst_suffix(instr);
+ else if ((suffix & 0xfc000000) == 0x7c000000)
mask = 0x100; /* major opcode 31 */
- return (instr & mask) != 0;
+ return (suffix & mask) != 0;
}
-int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
unsigned long gpa, gva_t ea, int is_store)
{
- u32 last_inst;
+ ppc_inst_t last_inst;
+ bool is_prefixed = !!(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+
+ /*
+ * Fast path - check if the guest physical address corresponds to a
+ * device on the FAST_MMIO_BUS, if so we can avoid loading the
+ * instruction all together, then we can just handle it and return.
+ */
+ if (is_store) {
+ int idx, ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0,
+ NULL);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (!ret) {
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + (is_prefixed ? 8 : 4));
+ return RESUME_GUEST;
+ }
+ }
/*
* If we fail, we just return to the guest and try executing it again.
@@ -437,7 +464,16 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
/*
* WARNING: We do not know for sure whether the instruction we just
* read from memory is the same that caused the fault in the first
- * place. If the instruction we read is neither an load or a store,
+ * place.
+ *
+ * If the fault is prefixed but the instruction is not or vice
+ * versa, try again so that we don't advance pc the wrong amount.
+ */
+ if (ppc_inst_prefixed(last_inst) != is_prefixed)
+ return RESUME_GUEST;
+
+ /*
+ * If the instruction we read is neither an load or a store,
* then it can't access memory, so we don't need to worry about
* enforcing access permissions. So, assuming it is a load or
* store, we just check that its direction (load or store) is
@@ -464,10 +500,10 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.paddr_accessed = gpa;
vcpu->arch.vaddr_accessed = ea;
- return kvmppc_emulate_mmio(run, vcpu);
+ return kvmppc_emulate_mmio(vcpu);
}
-int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
@@ -476,20 +512,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
- struct page *page, *pages[1];
- long index, ret, npages;
+ struct page *page;
+ long index, ret;
bool is_ci;
- unsigned int writing, write_ok;
- struct vm_area_struct *vma;
+ bool writing, write_ok;
+ unsigned int shift;
unsigned long rcbits;
long mmio_update;
+ pte_t pte, *ptep;
if (kvm_is_radix(kvm))
- return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
+ return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
/*
* Real-mode code has already searched the HPT and found the
@@ -504,11 +541,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
mmio_update = atomic64_read(&kvm->arch.mmio_update);
if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
r = vcpu->arch.pgfault_cache->rpte;
- psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
+ psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
+ r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
}
@@ -533,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return RESUME_GUEST;
/* Translate the logical address and get the page */
- psize = hpte_page_size(hpte[0], r);
+ psize = kvmppc_actual_pgsz(hpte[0], r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
@@ -544,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
/*
@@ -555,63 +593,50 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return -EFAULT;
/* used to check for invalidations in progress */
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
ret = -EFAULT;
- is_ci = false;
- pfn = 0;
page = NULL;
- pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, writing, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
- if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pte_size = psize;
- is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
- write_ok = vma->vm_flags & VM_WRITE;
- }
- up_read(&current->mm->mmap_sem);
- if (!pfn)
- goto out_put;
- } else {
- page = pages[0];
- pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
- }
- /* if the guest wants write access, see if that is OK */
- if (!writing && hpte_is_writable(r)) {
- pte_t *ptep, pte;
- unsigned long flags;
- /*
- * We need to protect against page table destruction
- * hugepage split and collapse.
- */
- local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep) {
- pte = kvmppc_read_update_linux_pte(ptep, 1);
- if (__pte_write(pte))
- write_ok = 1;
- }
- local_irq_restore(flags);
- }
+
+ pfn = __kvm_faultin_pfn(memslot, gfn, writing ? FOLL_WRITE : 0,
+ &write_ok, &page);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ pte = __pte(0);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!pte_present(pte)) {
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
}
+ hpa = pte_pfn(pte) << PAGE_SHIFT;
+ pte_size = PAGE_SIZE;
+ if (shift)
+ pte_size = 1ul << shift;
+ is_ci = pte_ci(pte);
if (psize > pte_size)
goto out_put;
+ if (pte_size > psize)
+ hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_ci)) {
@@ -625,14 +650,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/*
- * Set the HPTE to point to pfn.
- * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * Set the HPTE to point to hpa.
+ * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
- ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@@ -645,6 +669,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
hnow_r = hpte_new_to_old_r(hnow_r);
}
+
+ /*
+ * If the HPT is being resized, don't update the HPTE,
+ * instead let the guest retry after the resize operation is complete.
+ * The synchronization for mmu_ready test vs. set is provided
+ * by the HPTE lock.
+ */
+ if (!kvm->arch.mmu_ready)
+ goto out_unlock;
+
if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */
@@ -657,7 +691,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Check if we might have been invalidated; let the guest retry if so */
ret = RESUME_GUEST;
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+ if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
unlock_rmap(rmap);
goto out_unlock;
}
@@ -687,20 +721,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
- SetPageDirty(page);
+ set_page_dirty_lock(page);
out_put:
trace_kvm_page_fault_exit(vcpu, hpte, ret);
- if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
- }
+ if (page)
+ put_page(page);
return ret;
out_unlock:
@@ -709,72 +736,31 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
-static void kvmppc_rmap_reset(struct kvm *kvm)
+void kvmppc_rmap_reset(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int srcu_idx;
+ int srcu_idx, bkt;
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
/*
* This assumes it is acceptable to lose reference and
* change bits across a reset.
*/
memset(memslot->arch.rmap, 0,
memslot->npages * sizeof(*memslot->arch.rmap));
+ spin_unlock(&kvm->mmu_lock);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
-typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
- unsigned long gfn);
-
-static int kvm_handle_hva_range(struct kvm *kvm,
- unsigned long start,
- unsigned long end,
- hva_handler_fn handler)
-{
- int ret;
- int retval = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
- unsigned long hva_start, hva_end;
- gfn_t gfn, gfn_end;
-
- hva_start = max(start, memslot->userspace_addr);
- hva_end = min(end, memslot->userspace_addr +
- (memslot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
- /*
- * {gfn(page) | page intersects with [hva_start, hva_end)} =
- * {gfn, gfn+1, ..., gfn_end-1}.
- */
- gfn = hva_to_gfn_memslot(hva_start, memslot);
- gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
-
- for (; gfn < gfn_end; ++gfn) {
- ret = handler(kvm, memslot, gfn);
- retval |= ret;
- }
- }
-
- return retval;
-}
-
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- hva_handler_fn handler)
-{
- return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
-}
-
/* Must be called with both HPTE and rmap locked */
static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+ struct kvm_memory_slot *memslot,
unsigned long *rmapp, unsigned long gfn)
{
__be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
@@ -797,7 +783,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
/* Now check and modify the HPTE */
ptel = rev[i].guest_rpte;
- psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+ psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) {
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -806,8 +792,8 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
- if (rcbits & HPTE_R_C)
- kvmppc_update_rmap_change(rmapp, psize);
+ if ((rcbits & HPTE_R_C) && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot, gfn, psize);
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
@@ -815,8 +801,8 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
}
}
-static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
- unsigned long gfn)
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
unsigned long i;
__be64 *hptep;
@@ -845,29 +831,25 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
continue;
}
- kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
+ kvmppc_unmap_hpte(kvm, i, memslot, rmapp, gfn);
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
- return 0;
}
-int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
+bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- hva_handler_fn handler;
-
- handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
- kvm_handle_hva(kvm, hva, handler);
- return 0;
-}
+ gfn_t gfn;
-int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
-{
- hva_handler_fn handler;
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_rmapp(kvm, range->slot, gfn);
+ }
- handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
- kvm_handle_hva_range(kvm, start, end, handler);
- return 0;
+ return false;
}
void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
@@ -879,11 +861,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
gfn = memslot->base_gfn;
rmapp = memslot->arch.rmap;
+ if (kvm_is_radix(kvm)) {
+ kvmppc_radix_flush_memslot(kvm, memslot);
+ return;
+ }
+
for (n = memslot->npages; n; --n, ++gfn) {
- if (kvm_is_radix(kvm)) {
- kvm_unmap_radix(kvm, memslot, gfn);
- continue;
- }
/*
* Testing the present bit without locking is OK because
* the memslot has been marked invalid already, and hence
@@ -896,13 +879,13 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
}
}
-static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
- unsigned long gfn)
+static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
__be64 *hptep;
- int ret = 0;
+ bool ret = false;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
@@ -910,7 +893,7 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_REFERENCED) {
*rmapp &= ~KVMPPC_RMAP_REFERENCED;
- ret = 1;
+ ret = true;
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
@@ -942,7 +925,7 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
rev[i].guest_rpte |= HPTE_R_R;
note_hpte_modification(kvm, &rev[i]);
}
- ret = 1;
+ ret = true;
}
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
} while ((i = j) != head);
@@ -951,26 +934,34 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
return ret;
}
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
+bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- hva_handler_fn handler;
+ gfn_t gfn;
+ bool ret = false;
- handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
- return kvm_handle_hva_range(kvm, start, end, handler);
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+ }
+
+ return ret;
}
-static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
- unsigned long gfn)
+static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
unsigned long *hp;
- int ret = 1;
+ bool ret = true;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
if (*rmapp & KVMPPC_RMAP_REFERENCED)
- return 1;
+ return true;
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_REFERENCED)
@@ -985,27 +976,21 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
goto out;
} while ((i = j) != head);
}
- ret = 0;
+ ret = false;
out:
unlock_rmap(rmapp);
return ret;
}
-int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
+bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- hva_handler_fn handler;
+ WARN_ON(range->start + 1 != range->end);
- handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
- return kvm_handle_hva(kvm, hva, handler);
-}
-
-void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
- hva_handler_fn handler;
-
- handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
- kvm_handle_hva(kvm, hva, handler);
+ if (kvm_is_radix(kvm))
+ return kvm_test_age_radix(kvm, range->slot, range->start);
+ else
+ return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
static int vcpus_running(struct kvm *kvm)
@@ -1028,14 +1013,6 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
retry:
lock_rmap(rmapp);
- if (*rmapp & KVMPPC_RMAP_CHANGED) {
- long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
- >> KVMPPC_RMAP_CHG_SHIFT;
- *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
- npages_dirty = 1;
- if (change_order > PAGE_SHIFT)
- npages_dirty = 1ul << (change_order - PAGE_SHIFT);
- }
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
return npages_dirty;
@@ -1091,7 +1068,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]);
}
- n = hpte_page_size(v, r);
+ n = kvmppc_actual_pgsz(v, r);
n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (n > npages_dirty)
npages_dirty = n;
@@ -1127,7 +1104,7 @@ void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map)
{
- unsigned long i, j;
+ unsigned long i;
unsigned long *rmapp;
preempt_disable();
@@ -1139,9 +1116,8 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
* since we always put huge-page HPTEs in the rmap chain
* corresponding to their page base address.
*/
- if (npages && map)
- for (j = i; npages; ++j, --npages)
- __set_bit_le(j, map);
+ if (npages)
+ set_dirty_bits(map, i, npages);
++rmapp;
}
preempt_enable();
@@ -1163,7 +1139,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
goto err;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, 1, pages);
+ npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
if (npages < 1)
goto err;
page = pages[0];
@@ -1185,7 +1161,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
struct page *page = virt_to_page(va);
struct kvm_memory_slot *memslot;
unsigned long gfn;
- unsigned long *rmap;
int srcu_idx;
put_page(page);
@@ -1193,20 +1168,12 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
if (!dirty)
return;
- /* We need to mark this page dirty in the rmap chain */
+ /* We need to mark this page dirty in the memslot dirty_bitmap, if any */
gfn = gpa >> PAGE_SHIFT;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
- if (memslot) {
- if (!kvm_is_radix(kvm)) {
- rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
- lock_rmap(rmap);
- *rmap |= KVMPPC_RMAP_CHANGED;
- unlock_rmap(rmap);
- } else if (memslot->dirty_bitmap) {
- mark_page_dirty(kvm, gfn);
- }
- }
+ if (memslot && memslot->dirty_bitmap)
+ set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
@@ -1221,7 +1188,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
if (rc < 0)
return rc;
- resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+ resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__,
resize->hpt.virt);
return 0;
@@ -1239,8 +1206,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
unsigned long vpte, rpte, guest_rpte;
int ret;
struct revmap_entry *rev;
- unsigned long apsize, psize, avpn, pteg, hash;
+ unsigned long apsize, avpn, pteg, hash;
unsigned long new_idx, new_pteg, replace_vpte;
+ int pshift;
hptep = (__be64 *)(old->virt + (idx << 4));
@@ -1261,12 +1229,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Nothing to do */
goto out;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = be64_to_cpu(hptep[1]);
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ }
+
/* Unmap */
rev = &old->rev[idx];
guest_rpte = rev->guest_rpte;
ret = -EIO;
- apsize = hpte_page_size(vpte, guest_rpte);
+ apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
if (!apsize)
goto out;
@@ -1281,7 +1254,7 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
lock_rmap(rmapp);
- kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
+ kvmppc_unmap_hpte(kvm, idx, memslot, rmapp, gfn);
unlock_rmap(rmapp);
}
@@ -1290,7 +1263,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Reload PTE after unmap */
vpte = be64_to_cpu(hptep[0]);
-
BUG_ON(vpte & HPTE_V_VALID);
BUG_ON(!(vpte & HPTE_V_ABSENT));
@@ -1299,8 +1271,14 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
- psize = hpte_base_page_size(vpte, rpte);
- avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ rpte = hpte_new_to_old_r(rpte);
+ }
+
+ pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+ avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
if (vpte & HPTE_V_SECONDARY)
pteg = ~pteg;
@@ -1312,34 +1290,34 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
offset = (avpn & 0x1f) << 23;
vsid = avpn >> 5;
/* We can find more bits from the pteg value */
- if (psize < (1ULL << 23))
- offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+ if (pshift < 23)
+ offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
- hash = vsid ^ (offset / psize);
+ hash = vsid ^ (offset >> pshift);
} else {
unsigned long offset, vsid;
/* We only have 40 - 23 bits of seg_off in avpn */
offset = (avpn & 0x1ffff) << 23;
vsid = avpn >> 17;
- if (psize < (1ULL << 23))
- offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
+ if (pshift < 23)
+ offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
- hash = vsid ^ (vsid << 25) ^ (offset / psize);
+ hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
}
new_pteg = hash & new_hash_mask;
- if (vpte & HPTE_V_SECONDARY) {
- BUG_ON(~pteg != (hash & old_hash_mask));
- new_pteg = ~new_pteg;
- } else {
- BUG_ON(pteg != (hash & old_hash_mask));
- }
+ if (vpte & HPTE_V_SECONDARY)
+ new_pteg = ~hash & new_hash_mask;
new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
new_hptep = (__be64 *)(new->virt + (new_idx << 4));
replace_vpte = be64_to_cpu(new_hptep[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+ }
if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
BUG_ON(new->order >= old->order);
@@ -1355,6 +1333,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Discard the previous HPTE */
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = hpte_old_to_new_r(vpte, rpte);
+ vpte = hpte_old_to_new_v(vpte);
+ }
+
new_hptep[1] = cpu_to_be64(rpte);
new->rev[new_idx].guest_rpte = guest_rpte;
/* No need for a barrier, since new HPT isn't active */
@@ -1372,12 +1355,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
unsigned long i;
int rc;
- /*
- * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
- * that POWER9 uses, and could well hit a BUG_ON on POWER9.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- return -EIO;
for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
rc = resize_hpt_rehash_hpte(resize, i);
if (rc != 0)
@@ -1408,21 +1385,28 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
synchronize_srcu_expedited(&kvm->srcu);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
}
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
{
- BUG_ON(kvm->arch.resize_hpt != resize);
+ if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock)))
+ return;
if (!resize)
return;
- if (resize->hpt.virt)
- kvmppc_free_hpt(&resize->hpt);
+ if (resize->error != -EBUSY) {
+ if (resize->hpt.virt)
+ kvmppc_free_hpt(&resize->hpt);
+ kfree(resize);
+ }
- kvm->arch.resize_hpt = NULL;
- kfree(resize);
+ if (kvm->arch.resize_hpt == resize)
+ kvm->arch.resize_hpt = NULL;
}
static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,49 +1415,71 @@ static void resize_hpt_prepare_work(struct work_struct *work)
struct kvm_resize_hpt,
work);
struct kvm *kvm = resize->kvm;
- int err;
+ int err = 0;
- resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
- resize->order);
+ if (WARN_ON(resize->error != -EBUSY))
+ return;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+
+ /* Request is still current? */
+ if (kvm->arch.resize_hpt == resize) {
+ /* We may request large allocations here:
+ * do not sleep with kvm->arch.mmu_setup_lock held for a while.
+ */
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+
+ resize_hpt_debug(resize, "%s(): order = %d\n", __func__,
+ resize->order);
- err = resize_hpt_allocate(resize);
+ err = resize_hpt_allocate(resize);
- mutex_lock(&kvm->lock);
+ /* We have strict assumption about -EBUSY
+ * when preparing for HPT resize.
+ */
+ if (WARN_ON(err == -EBUSY))
+ err = -EINPROGRESS;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ /* It is possible that kvm->arch.resize_hpt != resize
+ * after we grab kvm->arch.mmu_setup_lock again.
+ */
+ }
resize->error = err;
- resize->prepare_done = true;
- mutex_unlock(&kvm->lock);
+ if (kvm->arch.resize_hpt != resize)
+ resize_hpt_release(kvm, resize);
+
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
}
-long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
- struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
{
unsigned long flags = rhpt->flags;
unsigned long shift = rhpt->shift;
struct kvm_resize_hpt *resize;
int ret;
- if (flags != 0)
+ if (flags != 0 || kvm_is_radix(kvm))
return -EINVAL;
if (shift && ((shift < 18) || (shift > 46)))
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.mmu_setup_lock);
resize = kvm->arch.resize_hpt;
if (resize) {
if (resize->order == shift) {
- /* Suitable resize in progress */
- if (resize->prepare_done) {
- ret = resize->error;
- if (ret != 0)
- resize_hpt_release(kvm, resize);
- } else {
+ /* Suitable resize in progress? */
+ ret = resize->error;
+ if (ret == -EBUSY)
ret = 100; /* estimated time in ms */
- }
+ else if (ret)
+ resize_hpt_release(kvm, resize);
goto out;
}
@@ -1493,6 +1499,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
ret = -ENOMEM;
goto out;
}
+
+ resize->error = -EBUSY;
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1503,7 +1511,7 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
ret = 100; /* estimated time in ms */
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return ret;
}
@@ -1512,62 +1520,58 @@ static void resize_hpt_boot_vcpu(void *opaque)
/* Nothing to do, just force a KVM exit */
}
-long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
- struct kvm_ppc_resize_hpt *rhpt)
+int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
{
unsigned long flags = rhpt->flags;
unsigned long shift = rhpt->shift;
struct kvm_resize_hpt *resize;
- long ret;
+ int ret;
- if (flags != 0)
+ if (flags != 0 || kvm_is_radix(kvm))
return -EINVAL;
if (shift && ((shift < 18) || (shift > 46)))
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.mmu_setup_lock);
resize = kvm->arch.resize_hpt;
/* This shouldn't be possible */
ret = -EIO;
- if (WARN_ON(!kvm->arch.hpte_setup_done))
+ if (WARN_ON(!kvm->arch.mmu_ready))
goto out_no_hpt;
/* Stop VCPUs from running while we mess with the HPT */
- kvm->arch.hpte_setup_done = 0;
+ kvm->arch.mmu_ready = 0;
smp_mb();
/* Boot all CPUs out of the guest so they re-read
- * hpte_setup_done */
+ * mmu_ready */
on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
ret = -ENXIO;
if (!resize || (resize->order != shift))
goto out;
- ret = -EBUSY;
- if (!resize->prepare_done)
- goto out;
-
ret = resize->error;
- if (ret != 0)
+ if (ret)
goto out;
ret = resize_hpt_rehash(resize);
- if (ret != 0)
+ if (ret)
goto out;
resize_hpt_pivot(resize);
out:
/* Let VCPUs run again */
- kvm->arch.hpte_setup_done = 1;
+ kvm->arch.mmu_ready = 1;
smp_mb();
out_no_hpt:
resize_hpt_release(kvm, resize);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return ret;
}
@@ -1704,8 +1708,10 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
int first_pass;
unsigned long hpte[2];
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
+ if (kvm_is_radix(kvm))
+ return 0;
first_pass = ctx->first_pass;
flags = ctx->flags;
@@ -1799,21 +1805,24 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
unsigned long tmp[2];
ssize_t nb;
long int err, ret;
- int hpte_setup;
+ int mmu_ready;
+ int pshift;
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
+ if (kvm_is_radix(kvm))
+ return -EINVAL;
/* lock out vcpus from running while we're doing this */
- mutex_lock(&kvm->lock);
- hpte_setup = kvm->arch.hpte_setup_done;
- if (hpte_setup) {
- kvm->arch.hpte_setup_done = 0; /* temporarily */
- /* order hpte_setup_done vs. vcpus_running */
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ mmu_ready = kvm->arch.mmu_ready;
+ if (mmu_ready) {
+ kvm->arch.mmu_ready = 0; /* temporarily */
+ /* order mmu_ready vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
- kvm->arch.hpte_setup_done = 1;
- mutex_unlock(&kvm->lock);
+ kvm->arch.mmu_ready = 1;
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return -EBUSY;
}
}
@@ -1852,6 +1861,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
err = -EINVAL;
if (!(v & HPTE_V_VALID))
goto out;
+ pshift = kvmppc_hpte_base_page_shift(v, r);
+ if (pshift <= 0)
+ goto out;
lbuf += 2;
nb += HPTE_SIZE;
@@ -1861,20 +1873,23 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
tmp);
if (ret != H_SUCCESS) {
- pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
- "r=%lx\n", ret, i, v, r);
+ pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r);
goto out;
}
- if (!hpte_setup && is_vrma_hpte(v)) {
- unsigned long psize = hpte_base_page_size(v, r);
- unsigned long senc = slb_pgsize_encoding(psize);
- unsigned long lpcr;
+ if (!mmu_ready && is_vrma_hpte(v)) {
+ unsigned long senc, lpcr;
+ senc = slb_pgsize_encoding(1ul << pshift);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr = senc << (LPCR_VRMASD_SH - 4);
- kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
- hpte_setup = 1;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr,
+ LPCR_VRMASD);
+ } else {
+ kvmppc_setup_partition_table(kvm);
+ }
+ mmu_ready = 1;
}
++i;
hptp += 2;
@@ -1890,10 +1905,10 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
}
out:
- /* Order HPTE updates vs. hpte_setup_done */
+ /* Order HPTE updates vs. mmu_ready */
smp_wmb();
- kvm->arch.hpte_setup_done = hpte_setup;
- mutex_unlock(&kvm->lock);
+ kvm->arch.mmu_ready = mmu_ready;
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
if (err)
return err;
@@ -1940,7 +1955,8 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
- kvm_put_kvm(kvm);
+ kfree(ctx);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
@@ -2000,6 +2016,10 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
struct kvm *kvm;
__be64 *hptp;
+ kvm = p->kvm;
+ if (kvm_is_radix(kvm))
+ return 0;
+
ret = mutex_lock_interruptible(&p->mutex);
if (ret)
return ret;
@@ -2022,7 +2042,6 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
}
}
- kvm = p->kvm;
i = p->hpt_index;
hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
@@ -2086,9 +2105,8 @@ static const struct file_operations debugfs_htab_fops = {
void kvmppc_mmu_debugfs_init(struct kvm *kvm)
{
- kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
- kvm->arch.debugfs_dir, kvm,
- &debugfs_htab_fops);
+ debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
+ &debugfs_htab_fops);
}
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
@@ -2097,11 +2115,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
- if (kvm_is_radix(vcpu->kvm))
- mmu->xlate = kvmppc_mmu_radix_xlate;
- else
- mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
+ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}