summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-04 18:30:32 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-04 18:30:32 +0100
commit0de4a0eec25b9171f2a2abb1a820e125e6797770 (patch)
tree27ed8be329cdb1ba589feeea3f6906d16375f8b2
parente89f0e9a0a007e8c3afb8ecd739c0b3255422b00 (diff)
parentf8ade833b733ae0b72e87ac6d2202a1afbe3eb4a (diff)
Merge tag 'kvm-x86-fixes-6.19-rc8' of https://github.com/kvm-x86/linux into HEAD
Final KVM fixes for 6.19: - Fix a bug where AVIC is incorrectly inhibited when running with x2AVIC disabled via module param (or on a system without x2AVIC). - Fix a dangling device posted IRQs bug by explicitly checking if the irqfd is still active (on the list) when handling an eventfd signal, instead of zeroing the irqfd's routing information when the irqfd is deassigned. Zeroing the irqfd's routing info causes arm64 and x86's to not disable posting for the IRQ (kvm_arch_irq_bypass_del_producer() looks for an MSI), incorrectly leaving the IRQ in posted mode (and leading to use-after-free and memory leaks on AMD in particular). This is both the most pressing and scariest, but it's been in -next for a while. - Disable FORTIFY_SOURCE for KVM selftests to prevent the compiler from generating calls to the checked versions of memset() and friends, which leads to unexpected page faults in guest code due e.g. __memset_chk@plt not being resolved. - Explicitly configure the support XSS from within {svm,vmx}_set_cpu_caps() to fix a bug where VMX will compute the reference VMCS configuration with SHSTK and IBT enabled, but then compute each CPUs local config with SHSTK and IBT disabled if not all CET xfeatures are enabled, e.g. if the kernel is built with X86_KERNEL_IBT=n. The mismatch in features results in differing nVMX setting, and ultimately causes kvm-intel.ko to refuse to load with nested=1.
-rw-r--r--arch/x86/kvm/irq.c3
-rw-r--r--arch/x86/kvm/svm/avic.c4
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c30
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--virt/kvm/eventfd.c44
8 files changed, 52 insertions, 36 deletions
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7cc8950005b6..4c7688670c2d 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
*/
spin_lock_irq(&kvm->irqfds.lock);
- if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
+ if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI ||
+ WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) {
ret = kvm_pi_update_irte(irqfd, NULL);
if (ret)
pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 6b77b2033208..0f6c8596719b 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{
+ u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
struct vcpu_svm *svm = to_svm(vcpu);
u32 id = vcpu->vcpu_id;
@@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
* avic_vcpu_load() expects to be called if and only if the vCPU has
* fully initialized AVIC.
*/
- if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
- (id > x2avic_max_physical_id)) {
+ if (id > max_id) {
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
vcpu->arch.apic->apicv_active = false;
return 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 24d59ccfa40d..4394be40fe78 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void)
*/
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
+
+ kvm_setup_xss_caps();
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6b96f7aea20b..8c94241fbcca 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
+
+ kvm_setup_xss_caps();
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63afdb6bb078..72d37c8930ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = {
};
#endif
+void kvm_setup_xss_caps(void)
+{
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ kvm_caps.supported_xss = 0;
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
+
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
{
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!tdp_enabled)
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
- if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
- kvm_caps.supported_xss = 0;
-
- if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
- !kvm_cpu_cap_has(X86_FEATURE_IBT))
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
-
- if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
- kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
- kvm_cpu_cap_clear(X86_FEATURE_IBT);
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
- }
-
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index fdab0ad49098..00de24f55b1f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
+void kvm_setup_xss_caps(void);
+
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
* features for which the current process doesn't (yet) have permission to use.
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index ba5c2b643efa..d45bf4ccb3bf 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0e8b5277be3b..a369b20d47f0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work)
}
-/* assumes kvm->irqfds.lock is held */
-static bool
-irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
+static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
{
+ /*
+ * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must
+ * be held to prevent false positives (on the irqfd being active), and
+ * while false negatives are impossible as irqfds are never added back
+ * to the list once they're deactivated, the caller must at least hold
+ * SRCU to guard against routing changes if the irqfd is deactivated.
+ */
+ lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) ||
+ srcu_read_lock_held(&irqfd->kvm->irq_srcu));
+
return list_empty(&irqfd->list) ? false : true;
}
/*
* Mark the irqfd as inactive and schedule it for removal
- *
- * assumes kvm->irqfds.lock is held
*/
-static void
-irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
+static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
{
+ lockdep_assert_held(&irqfd->kvm->irqfds.lock);
+
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
@@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
- /* An event has been signaled, inject an interrupt */
- if (kvm_arch_set_irq_inatomic(&irq, kvm,
+
+ /*
+ * An event has been signaled, inject an interrupt unless the
+ * irqfd is being deassigned (isn't active), in which case the
+ * routing information may be stale (once the irqfd is removed
+ * from the list, it will stop receiving routing updates).
+ */
+ if (unlikely(!irqfd_is_active(irqfd)) ||
+ kvm_arch_set_irq_inatomic(&irq, kvm,
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
@@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
- if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
- /*
- * This clearing of irq_entry.type is needed for when
- * another thread calls kvm_irq_routing_update before
- * we flush workqueue below (we synchronize with
- * kvm_irq_routing_update using irqfds.lock).
- */
- write_seqcount_begin(&irqfd->irq_entry_sc);
- irqfd->irq_entry.type = 0;
- write_seqcount_end(&irqfd->irq_entry_sc);
+ if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
irqfd_deactivate(irqfd);
- }
}
spin_unlock_irq(&kvm->irqfds.lock);