summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/io.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h3
-rw-r--r--arch/arm64/include/asm/tlbflush.h63
-rw-r--r--arch/arm64/kernel/acpi.c2
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/topology.c21
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/arm.c1
-rw-r--r--arch/arm64/kvm/at.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c37
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c8
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c10
-rw-r--r--arch/arm64/kvm/mmu.c12
-rw-r--r--arch/arm64/kvm/nested.c63
-rw-r--r--arch/arm64/kvm/sys_regs.c3
-rw-r--r--arch/arm64/lib/delay.c6
-rw-r--r--arch/arm64/mm/ioremap.c6
-rw-r--r--arch/arm64/mm/mmap.c12
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/loongarch/kvm/Kconfig1
-rw-r--r--arch/loongarch/kvm/vm.c1
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/mips.c1
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi2
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--arch/s390/include/asm/idle.h4
-rw-r--r--arch/s390/include/asm/vtime.h34
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/idle.c25
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/irq.c20
-rw-r--r--arch/s390/kernel/vtime.c42
-rw-r--r--arch/s390/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/mm/pfault.c4
-rw-r--r--arch/sparc/kernel/iommu.c2
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/um/drivers/ubd_kern.c10
-rw-r--r--arch/x86/entry/entry_fred.c5
-rw-r--r--arch/x86/events/intel/uncore_snbep.c28
-rw-r--r--arch/x86/include/asm/bug.h6
-rw-r--r--arch/x86/include/asm/cfi.h12
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/linkage.h4
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/runtime-const.h6
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/kernel/alternative.c29
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--arch/x86/mm/extable.c7
-rw-r--r--arch/x86/net/bpf_jit_comp.c13
60 files changed, 337 insertions, 267 deletions
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 83e03abbb2ca..8cbd1e96fd50 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -264,19 +264,33 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
pgprot_t *prot);
int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
+void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot);
-#define ioremap_prot ioremap_prot
+static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size,
+ pgprot_t user_prot)
+{
+ pgprot_t prot;
+ ptdesc_t user_prot_val = pgprot_val(user_prot);
+
+ if (WARN_ON_ONCE(!(user_prot_val & PTE_USER)))
+ return NULL;
-#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
+ prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK,
+ user_prot_val & PTE_ATTRINDX_MASK);
+ return __ioremap_prot(phys, size, prot);
+}
+#define ioremap_prot ioremap_prot
+#define ioremap(addr, size) \
+ __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
+ __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
- ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+ __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
#define ioremap_encrypted(addr, size) \
- ioremap_prot((addr), (size), PAGE_KERNEL)
+ __ioremap_prot((addr), (size), PAGE_KERNEL)
/*
* io{read,write}{16,32,64}be() macros
@@ -297,7 +311,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
- return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
+ return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d5a3bbdb95e..2ca264b3db5f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1616,7 +1616,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
#define kvm_has_s1poe(k) \
- (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+ (system_supports_poe() && \
+ kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
#define kvm_has_ras(k) \
(kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 905c658057a4..091544e6af44 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -397,6 +397,8 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
+u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime);
+
#define vncr_fixmap(c) \
({ \
u32 __c = (c); \
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index d27e8872fe3c..2b32639160de 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void)
#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
-#define PAGE_GCS __pgprot(_PAGE_GCS)
-#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
-
#define PIE_E0 ( \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a2d65d7d6aae..1416e652612b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -31,19 +31,11 @@
*/
#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op "\n" \
- ALTERNATIVE("nop\n nop", \
- "dsb ish\n tlbi " #op, \
- ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
- "tlbi " #op ", %0\n" \
- ALTERNATIVE("nop\n nop", \
- "dsb ish\n tlbi " #op ", %0", \
- ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
- : : "r" (arg))
+ "tlbi " #op ", %x0\n" \
+ : : "rZ" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
@@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void)
(__pages >> (5 * (scale) + 1)) - 1; \
})
+#define __repeat_tlbi_sync(op, arg...) \
+do { \
+ if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \
+ break; \
+ __tlbi(op, ##arg); \
+ dsb(ish); \
+} while (0)
+
+/*
+ * Complete broadcast TLB maintenance issued by the host which invalidates
+ * stage 1 information in the host's own translation regime.
+ */
+static inline void __tlbi_sync_s1ish(void)
+{
+ dsb(ish);
+ __repeat_tlbi_sync(vale1is, 0);
+}
+
+/*
+ * Complete broadcast TLB maintenance issued by hyp code which invalidates
+ * stage 1 translation information in any translation regime.
+ */
+static inline void __tlbi_sync_s1ish_hyp(void)
+{
+ dsb(ish);
+ __repeat_tlbi_sync(vale2is, 0);
+}
+
/*
* TLB Invalidation
* ================
@@ -279,7 +299,7 @@ static inline void flush_tlb_all(void)
{
dsb(ishst);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
@@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
- dsb(ish);
+ __tlbi_sync_s1ish();
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
@@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
flush_tlb_page_nosync(vma, uaddr);
- dsb(ish);
+ __tlbi_sync_s1ish();
}
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
- /*
- * TLB flush deferral is not required on systems which are affected by
- * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
- * will have two consecutive TLBI instructions with a dsb(ish) in between
- * defeating the purpose (i.e save overall 'dsb ish' cost).
- */
- if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
- return false;
-
return true;
}
@@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
*/
static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- dsb(ish);
+ __tlbi_sync_s1ish();
}
/*
@@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
{
__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
last_level, tlb_level);
- dsb(ish);
+ __tlbi_sync_s1ish();
}
static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
@@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
@@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ishst);
__tlbi(vaae1is, addr);
- dsb(ish);
+ __tlbi_sync_s1ish();
isb();
}
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index af90128cfed5..a9d884fd1d00 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return ioremap_prot(phys, size, prot);
+ return __ioremap_prot(phys, size, prot);
}
/*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 4a609e9b65de..b9d4998c97ef 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
* We pick the reserved-ASID to minimise the impact.
*/
__tlbi(aside1is, __TLBI_VADDR(0, 0));
- dsb(ish);
+ __tlbi_sync_s1ish();
}
ret = caches_clean_inval_user_pou(start, start + chunk);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 3fe1faab0362..b32f13358fbb 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -400,16 +400,25 @@ static inline
int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
{
/*
- * Abort call on counterless CPU or when interrupts are
- * disabled - can lead to deadlock in smp sync call.
+ * Abort call on counterless CPU.
*/
if (!cpu_has_amu_feat(cpu))
return -EOPNOTSUPP;
- if (WARN_ON_ONCE(irqs_disabled()))
- return -EPERM;
-
- smp_call_function_single(cpu, func, val, 1);
+ if (irqs_disabled()) {
+ /*
+ * When IRQs are disabled (tick path: sched_tick ->
+ * topology_scale_freq_tick or cppc_scale_freq_tick), only local
+ * CPU counter reads are allowed. Remote CPU counter read would
+ * require smp_call_function_single() which is unsafe with IRQs
+ * disabled.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id()))
+ return -EPERM;
+ func(val);
+ } else {
+ smp_call_function_single(cpu, func, val, 1);
+ }
return 0;
}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4f803fd1c99a..7d1f22fd490b 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,7 +21,6 @@ menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
- select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 29f0326f7e00..410ffd41fd73 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -358,7 +358,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_USER_MEMORY:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 885bd5bb2f41..6588ea251ed7 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -540,31 +540,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
- if (wr->nG) {
- u64 asid_ttbr, tcr;
-
- switch (wi->regime) {
- case TR_EL10:
- tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
- asid_ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL1));
- break;
- case TR_EL20:
- tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- asid_ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- break;
- default:
- BUG();
- }
-
- wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- wr->asid &= GENMASK(7, 0);
- }
+ if (wr->nG)
+ wr->asid = get_asid_by_regime(vcpu, wi->regime);
return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index ae8391baebc3..218976287d3f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
*/
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 8e29d7734a15..2f029bfe4755 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -342,6 +342,7 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
hyp_vm->kvm.arch.flags = host_arch_flags;
+ hyp_vm->kvm.arch.flags &= ~BIT_ULL(KVM_ARCH_FLAG_ID_REGS_INITIALIZED);
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
@@ -391,7 +392,7 @@ static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
return;
- sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
+ sve_state = hyp_vcpu->vcpu.arch.sve_state;
hyp_unpin_shared_mem(sve_state,
sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
}
@@ -471,6 +472,35 @@ err:
return ret;
}
+static int vm_copy_id_regs(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
+ const struct kvm *host_kvm = hyp_vm->host_kvm;
+ struct kvm *kvm = &hyp_vm->kvm;
+
+ if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &host_kvm->arch.flags))
+ return -EINVAL;
+
+ if (test_and_set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
+ return 0;
+
+ memcpy(kvm->arch.id_regs, host_kvm->arch.id_regs, sizeof(kvm->arch.id_regs));
+
+ return 0;
+}
+
+static int pkvm_vcpu_init_sysregs(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ int ret = 0;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+ else
+ ret = vm_copy_id_regs(hyp_vcpu);
+
+ return ret;
+}
+
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
struct pkvm_hyp_vm *hyp_vm,
struct kvm_vcpu *host_vcpu)
@@ -490,8 +520,9 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
- if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
- kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+ ret = pkvm_vcpu_init_sysregs(hyp_vcpu);
+ if (ret)
+ goto done;
ret = pkvm_vcpu_init_traps(hyp_vcpu);
if (ret)
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 48da9ca9763f..3dc1ce0d27fe 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0e4ddd28ef5d..9b480f947da2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
*unmapped += granule;
}
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index ec2569818629..35855dadfb1b 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
}
/*
@@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
default:
ret = -EINVAL;
}
- dsb(ish);
+ __tlbi_sync_s1ish_hyp();
isb();
if (mmu)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 070a01e53fcb..ec2eee857208 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1754,14 +1754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
/*
- * Both the canonical IPA and fault IPA must be hugepage-aligned to
- * ensure we find the right PFN and lay down the mapping in the right
- * place.
+ * Both the canonical IPA and fault IPA must be aligned to the
+ * mapping size to ensure we find the right PFN and lay down the
+ * mapping in the right place.
*/
- if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) {
- fault_ipa &= ~(vma_pagesize - 1);
- ipa &= ~(vma_pagesize - 1);
- }
+ fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize);
+ ipa = ALIGN_DOWN(ipa, vma_pagesize);
gfn = ipa >> PAGE_SHIFT;
mte_allowed = kvm_vma_mte_allowed(vma);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 620126d1f0dc..12c9f6e8dfda 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -854,6 +854,33 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
return kvm_inject_nested_sync(vcpu, esr_el2);
}
+u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ enum vcpu_sysreg ttbr_elx;
+ u64 tcr;
+ u16 asid;
+
+ switch (regime) {
+ case TR_EL10:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
+ ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL1 : TTBR0_EL1;
+ break;
+ case TR_EL20:
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL2 : TTBR0_EL2;
+ break;
+ default:
+ BUG();
+ }
+
+ asid = FIELD_GET(TTBRx_EL1_ASID, vcpu_read_sys_reg(vcpu, ttbr_elx));
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
+ !(tcr & TCR_ASID16))
+ asid &= GENMASK(7, 0);
+
+ return asid;
+}
+
static void invalidate_vncr(struct vncr_tlb *vt)
{
vt->valid = false;
@@ -1154,9 +1181,6 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
int i;
- if (!kvm->arch.nested_mmus_size)
- return;
-
for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
@@ -1336,20 +1360,8 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return false;
- if (vt->wr.nG) {
- u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- u64 ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- u16 asid;
-
- asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- asid &= GENMASK(7, 0);
-
- return asid == vt->wr.asid;
- }
+ if (vt->wr.nG)
+ return get_asid_by_regime(vcpu, TR_EL20) == vt->wr.asid;
return true;
}
@@ -1452,21 +1464,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
if (read_vncr_el2(vcpu) != vt->gva)
return;
- if (vt->wr.nG) {
- u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
- u64 ttbr = ((tcr & TCR_A1) ?
- vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
- vcpu_read_sys_reg(vcpu, TTBR0_EL2));
- u16 asid;
-
- asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
- if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
- !(tcr & TCR_ASID16))
- asid &= GENMASK(7, 0);
-
- if (asid != vt->wr.asid)
- return;
- }
+ if (vt->wr.nG && get_asid_by_regime(vcpu, TR_EL20) != vt->wr.asid)
+ return;
vt->cpu = smp_processor_id();
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a7cd0badc20c..1b4cacb6e918 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1816,6 +1816,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1POE |
ID_AA64MMFR3_EL1_S1PIE;
+
+ if (!system_supports_poe())
+ val &= ~ID_AA64MMFR3_EL1_S1POE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ID_MMFR4_EL1_CCIDX;
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index d02341303899..e278e060e78a 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops)
* Note that userspace cannot change the offset behind our back either,
* as the vcpu mutex is held as long as KVM_RUN is in progress.
*/
-#define __delay_cycles() __arch_counter_get_cntvct_stable()
+static cycles_t notrace __delay_cycles(void)
+{
+ guard(preempt_notrace)();
+ return __arch_counter_get_cntvct_stable();
+}
void __delay(unsigned long cycles)
{
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b12cbed9b5ad..6eef48ef6278 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
return 0;
}
-void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- pgprot_t pgprot)
+void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size,
+ pgprot_t pgprot)
{
unsigned long last_addr = phys_addr + size - 1;
@@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
return generic_ioremap_prot(phys_addr, size, pgprot);
}
-EXPORT_SYMBOL(ioremap_prot);
+EXPORT_SYMBOL(__ioremap_prot);
/*
* Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 08ee177432c2..92b2f5097a96 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
};
+static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
+
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
@@ -73,9 +75,11 @@ static int __init adjust_protection_map(void)
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}
- if (lpa2_is_enabled())
+ if (lpa2_is_enabled()) {
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
+ gcs_page_prot &= ~PTE_SHARED;
+ }
return 0;
}
@@ -87,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
- prot = _PAGE_GCS_RO;
+ /* Honour mprotect(PROT_NONE) on shadow stack mappings */
+ if (vm_flags & VM_ACCESS_FLAGS)
+ prot = gcs_page_prot;
+ else
+ prot = pgprot_val(protection_map[VM_NONE]);
} else {
prot = pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 356d33c7a4ae..adf84962d579 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2119,7 +2119,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
- sizeof(u32), &header, &image_ptr,
+ sizeof(u64), &header, &image_ptr,
jit_fill_hole);
if (!ro_header) {
prog = orig_prog;
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index ed4f724db774..8e5213609975 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -28,7 +28,6 @@ config KVM
select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMIO
select VIRT_XFER_TO_GUEST_WORK
select SCHED_INFO
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 63fd40530aa9..5282158b8122 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -118,7 +118,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_READONLY_MEM:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_MP_STATE:
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index cc13cc35f208..b1b9a1d67758 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -23,7 +23,6 @@ config KVM
select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_GENERIC_HARDWARE_ENABLING
select HAVE_KVM_READONLY_MEM
help
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index b0fb92fda4d4..29d9f630edfb 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1035,7 +1035,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_READONLY_MEM:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c9a2d50ff1b0..9a0d1c1aca6c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR_POSSIBLE
bool
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
config KVM_BOOK3S_HV_POSSIBLE
bool
@@ -81,7 +80,6 @@ config KVM_BOOK3S_64_HV
tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_BOOK3S_HV_PMU
select CMA
help
@@ -203,7 +201,6 @@ config KVM_E500V2
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
- select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -220,7 +217,6 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
- select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9a89a6d98f97..00302399fc37 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -623,12 +623,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
!kvmppc_hv_ops->enable_nested(NULL));
break;
-#endif
- case KVM_CAP_SYNC_MMU:
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
- r = 1;
- break;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_HTAB_FD:
r = hv_enabled;
break;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 5c2963e269b8..a0ffedc2d344 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -428,6 +428,7 @@
clocks = <&clkcfg CLK_CAN0>, <&clkcfg CLK_MSSPLL3>;
interrupt-parent = <&plic>;
interrupts = <56>;
+ resets = <&mss_top_sysreg CLK_CAN0>;
status = "disabled";
};
@@ -437,6 +438,7 @@
clocks = <&clkcfg CLK_CAN1>, <&clkcfg CLK_MSSPLL3>;
interrupt-parent = <&plic>;
interrupts = <57>;
+ resets = <&mss_top_sysreg CLK_CAN1>;
status = "disabled";
};
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 77379f77840a..ec2cee0a39e0 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -30,7 +30,6 @@ config KVM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_MMIO
select VIRT_XFER_TO_GUEST_WORK
- select KVM_GENERIC_MMU_NOTIFIER
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
help
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 7cbd2340c190..58bce57dc55b 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -181,7 +181,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_USER_MEMORY:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_READONLY_MEM:
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 09f763b9eb40..32536ee34aa0 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -19,9 +19,9 @@ struct s390_idle_data {
unsigned long mt_cycles_enter[8];
};
+DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
+
extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
-void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
-
#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index 9d25fb35a042..b1db75d14e9d 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -2,6 +2,12 @@
#ifndef _S390_VTIME_H
#define _S390_VTIME_H
+#include <asm/lowcore.h>
+#include <asm/cpu_mf.h>
+#include <asm/idle.h>
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
static inline void update_timer_sys(void)
{
struct lowcore *lc = get_lowcore();
@@ -20,4 +26,32 @@ static inline void update_timer_mcck(void)
lc->last_update_timer = lc->mcck_enter_timer;
}
+static inline void update_timer_idle(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
+ u64 cycles_new[8];
+ int i, mtid;
+
+ mtid = smp_cpu_mtid;
+ if (mtid) {
+ stcctm(MT_DIAG, mtid, cycles_new);
+ for (i = 0; i < mtid; i++)
+ __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+ }
+ /*
+ * This is a bit subtle: Forward last_update_clock so it excludes idle
+ * time. For correct steal time calculation in do_account_vtime() add
+ * passed wall time before idle_enter to steal_timer:
+ * During the passed wall time before idle_enter CPU time may have
+ * been accounted to system, hardirq, softirq, etc. lowcore fields.
+ * The accounted CPU times will be subtracted again from steal_timer
+ * when accumulated steal time is calculated in do_account_vtime().
+ */
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
+}
+
#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index dd55cc6bbc28..fb67b4abe68c 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -56,8 +56,6 @@ long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
-DECLARE_PER_CPU(u64, mt_cycles[8]);
-
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 39cb8d0ae348..1f1b06b6b4ef 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -15,37 +15,22 @@
#include <trace/events/power.h>
#include <asm/cpu_mf.h>
#include <asm/cputime.h>
+#include <asm/idle.h>
#include <asm/nmi.h>
#include <asm/smp.h>
-#include "entry.h"
-static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
- struct lowcore *lc = get_lowcore();
unsigned long idle_time;
- u64 cycles_new[8];
- int i;
- if (smp_cpu_mtid) {
- stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
- for (i = 0; i < smp_cpu_mtid; i++)
- this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
- }
-
- idle_time = lc->int_clock - idle->clock_idle_enter;
-
- lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
- lc->last_update_clock = lc->int_clock;
-
- lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
- lc->last_update_timer = lc->sys_enter_timer;
+ idle_time = get_lowcore()->int_clock - idle->clock_idle_enter;
/* Account time spent with enabled wait psw loaded as idle time. */
- WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
- WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ __atomic64_add(idle_time, &idle->idle_time);
+ __atomic64_add_const(1, &idle->idle_count);
account_idle_time(cputime_to_nsecs(idle_time));
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index dcdc7e274848..049c557c452f 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2377,7 +2377,7 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void s390_reset_system(void)
+void __no_stack_protector s390_reset_system(void)
{
/* Disable prefixing */
set_prefix(0);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f81723bc8856..7fdf960191d3 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -146,6 +146,12 @@ void noinstr do_io_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle) {
+ update_timer_idle();
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+ }
+
irq_enter_rcu();
if (user_mode(regs)) {
@@ -154,7 +160,6 @@ void noinstr do_io_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -171,9 +176,6 @@ void noinstr do_io_irq(struct pt_regs *regs)
set_irq_regs(old_regs);
irqentry_exit(regs, state);
-
- if (from_idle)
- regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
}
void noinstr do_ext_irq(struct pt_regs *regs)
@@ -182,6 +184,12 @@ void noinstr do_ext_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
bool from_idle;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+ if (from_idle) {
+ update_timer_idle();
+ regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+ }
+
irq_enter_rcu();
if (user_mode(regs)) {
@@ -194,7 +202,6 @@ void noinstr do_ext_irq(struct pt_regs *regs)
regs->int_parm = get_lowcore()->ext_params;
regs->int_parm_long = get_lowcore()->ext_params2;
- from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -203,9 +210,6 @@ void noinstr do_ext_irq(struct pt_regs *regs)
irq_exit_rcu();
set_irq_regs(old_regs);
irqentry_exit(regs, state);
-
- if (from_idle)
- regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
}
static void show_msi_interrupt(struct seq_file *p, int irq)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 234a0ba30510..bf48744d0912 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -48,8 +48,7 @@ static inline void set_vtimer(u64 expires)
static inline int virt_timer_forward(u64 elapsed)
{
- BUG_ON(!irqs_disabled());
-
+ lockdep_assert_irqs_disabled();
if (list_empty(&virt_timer_list))
return 0;
elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
@@ -137,23 +136,16 @@ static int do_account_vtime(struct task_struct *tsk)
lc->system_timer += timer;
/* Update MT utilization calculation */
- if (smp_cpu_mtid &&
- time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
/* Calculate cputime delta */
- user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(lc->user_timer));
- guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(lc->guest_timer));
- system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(lc->system_timer));
- hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(lc->hardirq_timer));
- softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(lc->softirq_timer));
- lc->steal_timer +=
- clock - user - guest - system - hardirq - softirq;
+ user = update_tsk_timer(&tsk->thread.user_timer, lc->user_timer);
+ guest = update_tsk_timer(&tsk->thread.guest_timer, lc->guest_timer);
+ system = update_tsk_timer(&tsk->thread.system_timer, lc->system_timer);
+ hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, lc->hardirq_timer);
+ softirq = update_tsk_timer(&tsk->thread.softirq_timer, lc->softirq_timer);
+ lc->steal_timer += clock - user - guest - system - hardirq - softirq;
/* Push account value */
if (user) {
@@ -225,10 +217,6 @@ static u64 vtime_delta(void)
return timer - lc->last_update_timer;
}
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
void vtime_account_kernel(struct task_struct *tsk)
{
struct lowcore *lc = get_lowcore();
@@ -238,27 +226,17 @@ void vtime_account_kernel(struct task_struct *tsk)
lc->guest_timer += delta;
else
lc->system_timer += delta;
-
- virt_timer_forward(delta);
}
EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_softirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->softirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->softirq_timer += vtime_delta();
}
void vtime_account_hardirq(struct task_struct *tsk)
{
- u64 delta = vtime_delta();
-
- get_lowcore()->hardirq_timer += delta;
-
- virt_timer_forward(delta);
+ get_lowcore()->hardirq_timer += vtime_delta();
}
/*
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 917ac740513e..5b835bc6a194 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -28,9 +28,7 @@ config KVM
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
select KVM_VFIO
- select MMU_NOTIFIER
select VIRT_XFER_TO_GUEST_WORK
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMU_LOCKLESS_AGING
help
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7a175d86cef0..bc7d6fa66eaf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -601,7 +601,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
switch (ext) {
case KVM_CAP_S390_PSW:
case KVM_CAP_S390_GMAP:
- case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 2f829448c719..6ecd6b0a22a8 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -62,7 +62,7 @@ int __pfault_init(void)
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
: [rc] "+d" (rc)
- : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+ : [refbk] "a" (virt_to_phys(&pfault_init_refbk)), "m" (pfault_init_refbk)
: "cc");
return rc;
}
@@ -84,7 +84,7 @@ void __pfault_fini(void)
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
:
- : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+ : [refbk] "a" (virt_to_phys(&pfault_fini_refbk)), "m" (pfault_fini_refbk)
: "cc");
}
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 46ef88bc9c26..7613ab0ffb89 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -312,6 +312,8 @@ static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys,
if (direction != DMA_TO_DEVICE)
iopte_protection |= IOPTE_WRITE;
+ phys &= IO_PAGE_MASK;
+
for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE)
iopte_val(*base) = iopte_protection | phys;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 440284cc804e..61f14b4c8f90 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -410,6 +410,8 @@ static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys,
iommu_batch_start(dev, prot, entry);
+ phys &= IO_PAGE_MASK;
+
for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) {
long err = iommu_batch_add(phys, mask);
if (unlikely(err < 0L))
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 012b2bcaa8a0..20fc33300a95 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -69,11 +69,11 @@ struct io_thread_req {
};
-static struct io_thread_req * (*irq_req_buffer)[];
+static struct io_thread_req **irq_req_buffer;
static struct io_thread_req *irq_remainder;
static int irq_remainder_size;
-static struct io_thread_req * (*io_req_buffer)[];
+static struct io_thread_req **io_req_buffer;
static struct io_thread_req *io_remainder;
static int io_remainder_size;
@@ -398,7 +398,7 @@ static int thread_fd = -1;
static int bulk_req_safe_read(
int fd,
- struct io_thread_req * (*request_buffer)[],
+ struct io_thread_req **request_buffer,
struct io_thread_req **remainder,
int *remainder_size,
int max_recs
@@ -465,7 +465,7 @@ static irqreturn_t ubd_intr(int irq, void *dev)
&irq_remainder, &irq_remainder_size,
UBD_REQ_BUFFER_SIZE)) >= 0) {
for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
- ubd_end_request((*irq_req_buffer)[i]);
+ ubd_end_request(irq_req_buffer[i]);
}
if (len < 0 && len != -EAGAIN)
@@ -1512,7 +1512,7 @@ void *io_thread(void *arg)
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
- struct io_thread_req *req = (*io_req_buffer)[count];
+ struct io_thread_req *req = io_req_buffer[count];
int i;
io_count++;
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index a9b72997103d..88c757ac8ccd 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -160,8 +160,6 @@ void __init fred_complete_exception_setup(void)
static noinstr void fred_extint(struct pt_regs *regs)
{
unsigned int vector = regs->fred_ss.vector;
- unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
- NR_SYSTEM_VECTORS);
if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
return;
@@ -170,7 +168,8 @@ static noinstr void fred_extint(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
instrumentation_begin();
- sysvec_table[index](regs);
+ sysvec_table[array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
+ NR_SYSTEM_VECTORS)](regs);
instrumentation_end();
irqentry_exit(regs, state);
} else {
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 5ed6e0b7e715..0a1d08136cc1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = {
.attr_update = uncore_alias_groups,
};
+static struct uncore_event_desc gnr_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type gnr_uncore_imc = {
+ SPR_UNCORE_MMIO_COMMON_FORMAT(),
+ .name = "imc",
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .event_descs = gnr_uncore_imc_events,
+};
+
static struct intel_uncore_type gnr_uncore_pciex8 = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "pciex8",
@@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
NULL,
&spr_uncore_pcu,
&gnr_uncore_ubox,
- &spr_uncore_imc,
+ &gnr_uncore_imc,
NULL,
&gnr_uncore_upi,
NULL,
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 9b4e04690e1a..80c1696d8d59 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -7,7 +7,7 @@
#include <linux/objtool.h>
#include <asm/asm.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct bug_entry;
extern void __WARN_trap(struct bug_entry *bug, ...);
#endif
@@ -137,7 +137,7 @@ do { \
#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/static_call_types.h>
DECLARE_STATIC_CALL(WARN_trap, __WARN_trap);
@@ -153,7 +153,7 @@ struct arch_va_list {
struct sysv_va_list args;
};
extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define __WARN_bug_entry(flags, format) ({ \
struct bug_entry *bug; \
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index c40b9ebc1fb4..ab3fbbd947ed 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -111,6 +111,12 @@ extern bhi_thunk __bhi_args_end[];
struct pt_regs;
+#ifdef CONFIG_CALL_PADDING
+#define CFI_OFFSET (CONFIG_FUNCTION_PADDING_CFI+5)
+#else
+#define CFI_OFFSET 5
+#endif
+
#ifdef CONFIG_CFI
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
#define __bpfcall
@@ -119,11 +125,9 @@ static inline int cfi_get_offset(void)
{
switch (cfi_mode) {
case CFI_FINEIBT:
- return 16;
+ return /* fineibt_prefix_size */ 16;
case CFI_KCFI:
- if (IS_ENABLED(CONFIG_CALL_PADDING))
- return 16;
- return 5;
+ return CFI_OFFSET;
default:
return 0;
}
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a1193e9d65f2..462754b0bf8a 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -77,7 +77,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
#endif
#ifndef CONFIG_PARAVIRT
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
@@ -95,7 +95,7 @@ static __always_inline void halt(void)
{
native_halt();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 9d38ae744a2e..a7294656ad90 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -68,7 +68,7 @@
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
* CFI symbol layout changes.
*
- * Without CALL_THUNKS:
+ * Without CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
@@ -77,7 +77,7 @@
* .long __kcfi_typeid_##name
* name:
*
- * With CALL_THUNKS:
+ * With CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index c55058f3d75e..409981468cba 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -20,7 +20,7 @@
#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
-#else /* !__ASSEMBLY__: */
+#else /* !__ASSEMBLER__: */
#include <linux/args.h>
#include <linux/bits.h>
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index e5a13dc8816e..4cd94fdcb45e 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -6,7 +6,7 @@
#error "Cannot use runtime-const infrastructure from modules"
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro RUNTIME_CONST_PTR sym reg
movq $0x0123456789abcdef, %\reg
@@ -16,7 +16,7 @@
.popsection
.endm
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define runtime_const_ptr(sym) ({ \
typeof(sym) __ret; \
@@ -74,5 +74,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
}
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 869b88061801..3f24cc472ce9 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -25,6 +25,8 @@ extern int ibt_selftest_noendbr(void);
void handle_invalid_op(struct pt_regs *regs);
#endif
+noinstr bool handle_bug(struct pt_regs *regs);
+
static inline int get_si_code(unsigned long condition)
{
if (condition & DR_STEP)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a888ae0f01fb..e87da25d1236 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1182,7 +1182,7 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
poison_endbr(addr);
if (IS_ENABLED(CONFIG_FINEIBT))
- poison_cfi(addr - 16);
+ poison_cfi(addr - CFI_OFFSET);
}
}
@@ -1389,6 +1389,8 @@ extern u8 fineibt_preamble_end[];
#define fineibt_preamble_ud 0x13
#define fineibt_preamble_hash 5
+#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
+
/*
* <fineibt_caller_start>:
* 0: b8 78 56 34 12 mov $0x12345678, %eax
@@ -1634,7 +1636,7 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
* have determined there are no indirect calls to it and we
* don't need no CFI either.
*/
- if (!is_endbr(addr + 16))
+ if (!is_endbr(addr + CFI_OFFSET))
continue;
hash = decode_preamble_hash(addr, &arity);
@@ -1642,6 +1644,15 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
addr, addr, 5, addr))
return -EINVAL;
+ /*
+ * FineIBT relies on being at func-16, so if the preamble is
+ * actually larger than that, place it the tail end.
+ *
+ * NOTE: this is possible with things like DEBUG_CALL_THUNKS
+ * and DEBUG_FORCE_FUNCTION_ALIGN_64B.
+ */
+ addr += CFI_OFFSET - fineibt_prefix_size;
+
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
@@ -1664,10 +1675,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- if (!exact_endbr(addr + 16))
+ if (!exact_endbr(addr + CFI_OFFSET))
continue;
- poison_endbr(addr + 16);
+ poison_endbr(addr + CFI_OFFSET);
}
}
@@ -1772,7 +1783,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
if (FINEIBT_WARN(fineibt_preamble_size, 20) ||
FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) ||
FINEIBT_WARN(fineibt_caller_size, 14) ||
- FINEIBT_WARN(fineibt_paranoid_size, 20))
+ FINEIBT_WARN(fineibt_paranoid_size, 20) ||
+ WARN_ON_ONCE(CFI_OFFSET < fineibt_prefix_size))
return;
if (cfi_mode == CFI_AUTO) {
@@ -1886,6 +1898,11 @@ static void poison_cfi(void *addr)
switch (cfi_mode) {
case CFI_FINEIBT:
/*
+ * FineIBT preamble is at func-16.
+ */
+ addr += CFI_OFFSET - fineibt_prefix_size;
+
+ /*
* FineIBT prefix should start with an ENDBR.
*/
if (!is_endbr(addr))
@@ -1923,8 +1940,6 @@ static void poison_cfi(void *addr)
}
}
-#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
-
/*
* When regs->ip points to a 0xD6 byte in the FineIBT preamble,
* return true and fill out target and type.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5a6a772e0a6c..4dbff8ef9b1c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -397,7 +397,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
ILL_ILLOPN, error_get_trap_addr(regs));
}
-static noinstr bool handle_bug(struct pt_regs *regs)
+noinstr bool handle_bug(struct pt_regs *regs)
{
unsigned long addr = regs->ip;
bool handled = false;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d916bd766c94..801bf9e520db 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,7 +20,6 @@ if VIRTUALIZATION
config KVM_X86
def_tristate KVM if (KVM_INTEL != n || KVM_AMD != n)
select KVM_COMMON
- select KVM_GENERIC_MMU_NOTIFIER
select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
select KVM_MMU_LOCKLESS_AGING
select HAVE_KVM_IRQCHIP
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3fb64905d190..a03530795707 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4805,7 +4805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
- case KVM_CAP_SYNC_MMU:
case KVM_CAP_USER_NMI:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IOEVENTFD:
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 2fdc1f1f5adb..6b9ff1c6cafa 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -411,14 +411,11 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
return;
if (trapnr == X86_TRAP_UD) {
- if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
- /* Skip the ud2. */
- regs->ip += LEN_UD2;
+ if (handle_bug(regs))
return;
- }
/*
- * If this was a BUG and report_bug returns or if this
+ * If this was a BUG and handle_bug returns or if this
* was just a normal #UD, we want to continue onward and
* crash.
*/
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8f10080e6fe3..e9b78040d703 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -438,17 +438,8 @@ static void emit_kcfi(u8 **pprog, u32 hash)
EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
#ifdef CONFIG_CALL_PADDING
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
+ for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++)
+ EMIT1(0x90);
#endif
EMIT_ENDBR();