diff options
| author | Maxim Levitsky <mlevitsk@redhat.com> | 2025-06-11 15:45:20 -0700 |
|---|---|---|
| committer | Sean Christopherson <seanjc@google.com> | 2025-06-20 13:53:02 -0700 |
| commit | d921665e01ba86212bdace238bdff123bceffd46 (patch) | |
| tree | 669cb0f54a11e86204c8241092d540a30ccf2565 /arch/x86/kvm/svm/avic.c | |
| parent | bafddc70001d1834b2f2e490e108bbb8812b4bed (diff) | |
KVM: SVM: Add enable_ipiv param, never set IsRunning if disabled
Let userspace "disable" IPI virtualization for AVIC via the enable_ipiv
module param, by never setting IsRunning. SVM doesn't provide a way to
disable IPI virtualization in hardware, but by ensuring CPUs never see
IsRunning=1, every IPI in the guest (except for self-IPIs) will generate a
VM-Exit.
To avoid setting the real IsRunning bit, while still allowing KVM to use
each vCPU's entry to update GA log entries, simply maintain a shadow of
the entry, without propagating IsRunning updates to the real table when
IPI virtualization is disabled.
Providing a way to effectively disable IPI virtualization will allow KVM
to safely enable AVIC on hardware that is susceptible to erratum #1235,
which causes hardware to sometimes fail to detect that the IsRunning bit
has been cleared by software.
Note, the table _must_ be fully populated, as broadcast IPIs skip invalid
entries, i.e. won't generate VM-Exit if every entry is invalid, and so
simply pointing the VMCB at a common dummy table won't work.
Alternatively, KVM could allocate a shadow of the entire table, but that'd
be a waste of 4KiB since the per-vCPU entry doesn't actually consume an
additional 8 bytes of memory (vCPU structures are large enough that they
are backed by order-N pages).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
[sean: keep "entry" variables, reuse enable_ipiv, split from erratum]
Link: https://lore.kernel.org/r/20250611224604.313496-19-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch/x86/kvm/svm/avic.c')
| -rw-r--r-- | arch/x86/kvm/svm/avic.c | 32 |
1 files changed, 26 insertions, 6 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index baee6d412e78..d0b845ab66fe 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -293,6 +293,13 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) /* Setting AVIC backing page address in the phy APIC ID table */ new_entry = avic_get_backing_page_address(svm) | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; + svm->avic_physical_id_entry = new_entry; + + /* + * Initialize the real table, as vCPUs must have a valid entry in order + * for broadcast IPIs to function correctly (broadcast IPIs ignore + * invalid entries, i.e. aren't guaranteed to generate a VM-Exit). + */ WRITE_ONCE(kvm_svm->avic_physical_id_table[id], new_entry); return 0; @@ -770,8 +777,6 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { struct kvm_vcpu *vcpu = &svm->vcpu; - struct kvm *kvm = vcpu->kvm; - struct kvm_svm *kvm_svm = to_kvm_svm(kvm); unsigned long flags; u64 entry; @@ -789,7 +794,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, * will update the pCPU info when the vCPU awkened and/or scheduled in. * See also avic_vcpu_load(). */ - entry = READ_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id]); + entry = svm->avic_physical_id_entry; if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, true, pi->ir_data); @@ -999,14 +1004,26 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) */ spin_lock_irqsave(&svm->ir_list_lock, flags); - entry = READ_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id]); + entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + svm->avic_physical_id_entry = entry; + + /* + * If IPI virtualization is disabled, clear IsRunning when updating the + * actual Physical ID table, so that the CPU never sees IsRunning=1. + * Keep the APIC ID up-to-date in the entry to minimize the chances of + * things going sideways if hardware peeks at the ID. + */ + if (!enable_ipiv) + entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry); + avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); spin_unlock_irqrestore(&svm->ir_list_lock, flags); @@ -1031,7 +1048,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) * can't be scheduled out and thus avic_vcpu_{put,load}() can't run * recursively. */ - entry = READ_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id]); + entry = svm->avic_physical_id_entry; /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) @@ -1050,7 +1067,10 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; - WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry); + svm->avic_physical_id_entry = entry; + + if (enable_ipiv) + WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry); spin_unlock_irqrestore(&svm->ir_list_lock, flags); |
