summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/vmx/tdx.c108
-rw-r--r--arch/x86/kvm/vmx/tdx.h8
2 files changed, 44 insertions, 72 deletions
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index addd3678d03d..14cadd0d71c0 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1583,6 +1583,32 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
}
+static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ kvm_pfn_t pfn)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ u64 err, entry, level_state;
+ gpa_t gpa = gfn_to_gpa(gfn);
+
+ lockdep_assert_held(&kvm->slots_lock);
+
+ if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm) ||
+ KVM_BUG_ON(!kvm_tdx->page_add_src, kvm))
+ return -EIO;
+
+ err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn),
+ kvm_tdx->page_add_src, &entry, &level_state);
+ if (unlikely(tdx_operand_busy(err)))
+ return -EBUSY;
+
+ if (KVM_BUG_ON(err, kvm)) {
+ pr_tdx_error_2(TDH_MEM_PAGE_ADD, err, entry, level_state);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
@@ -1628,19 +1654,10 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
/*
* If the TD isn't finalized/runnable, then userspace is initializing
- * the VM image via KVM_TDX_INIT_MEM_REGION. Increment the number of
- * pages that need to be mapped and initialized via TDH.MEM.PAGE.ADD.
- * KVM_TDX_FINALIZE_VM checks the counter to ensure all pre-mapped
- * pages have been added to the image, to prevent running the TD with a
- * valid mapping in the mirror EPT, but not in the S-EPT.
- */
- if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE)) {
- if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm))
- return -EIO;
-
- atomic64_inc(&kvm_tdx->nr_premapped);
- return 0;
- }
+ * the VM image via KVM_TDX_INIT_MEM_REGION; ADD the page to the TD.
+ */
+ if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE))
+ return tdx_mem_page_add(kvm, gfn, level, pfn);
return tdx_mem_page_aug(kvm, gfn, level, pfn);
}
@@ -1666,39 +1683,6 @@ static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
return 0;
}
-/*
- * Check if the error returned from a SEPT zap SEAMCALL is due to that a page is
- * mapped by KVM_TDX_INIT_MEM_REGION without tdh_mem_page_add() being called
- * successfully.
- *
- * Since tdh_mem_sept_add() must have been invoked successfully before a
- * non-leaf entry present in the mirrored page table, the SEPT ZAP related
- * SEAMCALLs should not encounter err TDX_EPT_WALK_FAILED. They should instead
- * find TDX_EPT_ENTRY_STATE_INCORRECT due to an empty leaf entry found in the
- * SEPT.
- *
- * Further check if the returned entry from SEPT walking is with RWX permissions
- * to filter out anything unexpected.
- *
- * Note: @level is pg_level, not the tdx_level. The tdx_level extracted from
- * level_state returned from a SEAMCALL error is the same as that passed into
- * the SEAMCALL.
- */
-static int tdx_is_sept_zap_err_due_to_premap(struct kvm_tdx *kvm_tdx, u64 err,
- u64 entry, int level)
-{
- if (!err || kvm_tdx->state == TD_STATE_RUNNABLE)
- return false;
-
- if (err != (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))
- return false;
-
- if ((is_last_spte(entry, level) && (entry & VMX_EPT_RWX_MASK)))
- return false;
-
- return true;
-}
-
static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, struct page *page)
{
@@ -1718,12 +1702,6 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
tdx_no_vcpus_enter_stop(kvm);
}
- if (tdx_is_sept_zap_err_due_to_premap(kvm_tdx, err, entry, level)) {
- if (KVM_BUG_ON(atomic64_dec_return(&kvm_tdx->nr_premapped) < 0, kvm))
- return -EIO;
-
- return 0;
- }
if (KVM_BUG_ON(err, kvm)) {
pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state);
@@ -2839,12 +2817,6 @@ static int tdx_td_finalize(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
return -EINVAL;
- /*
- * Pages are pending for KVM_TDX_INIT_MEM_REGION to issue
- * TDH.MEM.PAGE.ADD().
- */
- if (atomic64_read(&kvm_tdx->nr_premapped))
- return -EINVAL;
cmd->hw_error = tdh_mr_finalize(&kvm_tdx->td);
if (tdx_operand_busy(cmd->hw_error))
@@ -3141,6 +3113,9 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
struct page *src_page;
int ret, i;
+ if (KVM_BUG_ON(kvm_tdx->page_add_src, kvm))
+ return -EIO;
+
/*
* Get the source page if it has been faulted in. Return failure if the
* source page has been swapped out or unmapped in primary memory.
@@ -3151,19 +3126,14 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
if (ret != 1)
return -ENOMEM;
+ kvm_tdx->page_add_src = src_page;
ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn);
- if (ret < 0)
- goto out;
+ kvm_tdx->page_add_src = NULL;
- ret = 0;
- err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn),
- src_page, &entry, &level_state);
- if (err) {
- ret = unlikely(tdx_operand_busy(err)) ? -EBUSY : -EIO;
- goto out;
- }
+ put_page(src_page);
- KVM_BUG_ON(atomic64_dec_return(&kvm_tdx->nr_premapped) < 0, kvm);
+ if (ret)
+ return ret;
if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
@@ -3176,8 +3146,6 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
}
}
-out:
- put_page(src_page);
return ret;
}
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index ca39a9391db1..1239d9845093 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -36,8 +36,12 @@ struct kvm_tdx {
struct tdx_td td;
- /* For KVM_TDX_INIT_MEM_REGION. */
- atomic64_t nr_premapped;
+ /*
+ * Scratch pointer used to pass the source page to tdx_mem_page_add().
+ * Protected by slots_lock, and non-NULL only when mapping a private
+ * pfn via tdx_gmem_post_populate().
+ */
+ struct page *page_add_src;
/*
* Prevent vCPUs from TD entry to ensure SEPT zap related SEAMCALLs do