summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/GEN-for-each-reg.h31
-rw-r--r--arch/x86/include/asm/Kbuild9
-rw-r--r--arch/x86/include/asm/a.out-core.h67
-rw-r--r--arch/x86/include/asm/acenv.h19
-rw-r--r--arch/x86/include/asm/acpi.h109
-rw-r--r--arch/x86/include/asm/acrn.h92
-rw-r--r--arch/x86/include/asm/agp.h10
-rw-r--r--arch/x86/include/asm/alternative-asm.h103
-rw-r--r--arch/x86/include/asm/alternative.h382
-rw-r--r--arch/x86/include/asm/amd/hsmp.h16
-rw-r--r--arch/x86/include/asm/amd/ibs.h158
-rw-r--r--arch/x86/include/asm/amd/nb.h77
-rw-r--r--arch/x86/include/asm/amd/node.h59
-rw-r--r--arch/x86/include/asm/amd_nb.h127
-rw-r--r--arch/x86/include/asm/apb_timer.h49
-rw-r--r--arch/x86/include/asm/apic.h504
-rw-r--r--arch/x86/include/asm/apic_flat_64.h8
-rw-r--r--arch/x86/include/asm/apicdef.h297
-rw-r--r--arch/x86/include/asm/arch_hweight.h16
-rw-r--r--arch/x86/include/asm/archrandom.h98
-rw-r--r--arch/x86/include/asm/asm-prototypes.h32
-rw-r--r--arch/x86/include/asm/asm.h219
-rw-r--r--arch/x86/include/asm/atomic.h152
-rw-r--r--arch/x86/include/asm/atomic64_32.h319
-rw-r--r--arch/x86/include/asm/atomic64_64.h180
-rw-r--r--arch/x86/include/asm/audit.h14
-rw-r--r--arch/x86/include/asm/barrier.h57
-rw-r--r--arch/x86/include/asm/bitops.h381
-rw-r--r--arch/x86/include/asm/boot.h84
-rw-r--r--arch/x86/include/asm/bootparam_utils.h67
-rw-r--r--arch/x86/include/asm/bug.h202
-rw-r--r--arch/x86/include/asm/bugs.h10
-rw-r--r--arch/x86/include/asm/cache.h2
-rw-r--r--arch/x86/include/asm/cacheflush.h2
-rw-r--r--arch/x86/include/asm/cacheinfo.h14
-rw-r--r--arch/x86/include/asm/calgary.h70
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cfi.h164
-rw-r--r--arch/x86/include/asm/checksum.h13
-rw-r--r--arch/x86/include/asm/checksum_32.h45
-rw-r--r--arch/x86/include/asm/checksum_64.h23
-rw-r--r--arch/x86/include/asm/clocksource.h20
-rw-r--r--arch/x86/include/asm/cmdline.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h79
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h214
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h83
-rw-r--r--arch/x86/include/asm/coco.h50
-rw-r--r--arch/x86/include/asm/compat.h177
-rw-r--r--arch/x86/include/asm/cpu.h67
-rw-r--r--arch/x86/include/asm/cpu_device_id.h201
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h114
-rw-r--r--arch/x86/include/asm/cpufeature.h174
-rw-r--r--arch/x86/include/asm/cpufeatures.h776
-rw-r--r--arch/x86/include/asm/cpuid/api.h292
-rw-r--r--arch/x86/include/asm/cpuid/types.h127
-rw-r--r--arch/x86/include/asm/cpuidle_haltpoll.h8
-rw-r--r--arch/x86/include/asm/cpumask.h39
-rw-r--r--arch/x86/include/asm/crash.h3
-rw-r--r--arch/x86/include/asm/crash_reserve.h44
-rw-r--r--arch/x86/include/asm/crypto/aes.h12
-rw-r--r--arch/x86/include/asm/crypto/camellia.h96
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h122
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h42
-rw-r--r--arch/x86/include/asm/crypto/serpent-sse2.h64
-rw-r--r--arch/x86/include/asm/crypto/twofish.h28
-rw-r--r--arch/x86/include/asm/current.h16
-rw-r--r--arch/x86/include/asm/debugreg.h131
-rw-r--r--arch/x86/include/asm/delay.h4
-rw-r--r--arch/x86/include/asm/desc.h92
-rw-r--r--arch/x86/include/asm/desc_defs.h92
-rw-r--r--arch/x86/include/asm/device.h16
-rw-r--r--arch/x86/include/asm/disabled-features.h89
-rw-r--r--arch/x86/include/asm/div64.h52
-rw-r--r--arch/x86/include/asm/dma-direct.h9
-rw-r--r--arch/x86/include/asm/dma-mapping.h26
-rw-r--r--arch/x86/include/asm/dma.h10
-rw-r--r--arch/x86/include/asm/doublefault.h17
-rw-r--r--arch/x86/include/asm/dwarf2.h50
-rw-r--r--arch/x86/include/asm/e820/api.h3
-rw-r--r--arch/x86/include/asm/e820/types.h11
-rw-r--r--arch/x86/include/asm/edac.h2
-rw-r--r--arch/x86/include/asm/efi.h453
-rw-r--r--arch/x86/include/asm/elf.h78
-rw-r--r--arch/x86/include/asm/elfcore-compat.h31
-rw-r--r--arch/x86/include/asm/emulate_prefix.h14
-rw-r--r--arch/x86/include/asm/enclu.h9
-rw-r--r--arch/x86/include/asm/entry-common.h112
-rw-r--r--arch/x86/include/asm/entry_arch.h56
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/extable.h42
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h71
-rw-r--r--arch/x86/include/asm/fb.h22
-rw-r--r--arch/x86/include/asm/fixmap.h36
-rw-r--r--arch/x86/include/asm/floppy.h28
-rw-r--r--arch/x86/include/asm/fpu.h13
-rw-r--r--arch/x86/include/asm/fpu/api.h156
-rw-r--r--arch/x86/include/asm/fpu/internal.h610
-rw-r--r--arch/x86/include/asm/fpu/regset.h9
-rw-r--r--arch/x86/include/asm/fpu/sched.h55
-rw-r--r--arch/x86/include/asm/fpu/signal.h21
-rw-r--r--arch/x86/include/asm/fpu/types.h360
-rw-r--r--arch/x86/include/asm/fpu/xcr.h35
-rw-r--r--arch/x86/include/asm/fpu/xstate.h138
-rw-r--r--arch/x86/include/asm/frame.h74
-rw-r--r--arch/x86/include/asm/fred.h119
-rw-r--r--arch/x86/include/asm/fsgsbase.h53
-rw-r--r--arch/x86/include/asm/ftrace.h117
-rw-r--r--arch/x86/include/asm/futex.h124
-rw-r--r--arch/x86/include/asm/gart.h5
-rw-r--r--arch/x86/include/asm/geode.h5
-rw-r--r--arch/x86/include/asm/gsseg.h66
-rw-r--r--arch/x86/include/asm/hardirq.h29
-rw-r--r--arch/x86/include/asm/highmem.h24
-rw-r--r--arch/x86/include/asm/hpet.h13
-rw-r--r--arch/x86/include/asm/hugetlb.h14
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h138
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h869
-rw-r--r--arch/x86/include/asm/hyperv_timer.h9
-rw-r--r--arch/x86/include/asm/hypervisor.h15
-rw-r--r--arch/x86/include/asm/i8259.h4
-rw-r--r--arch/x86/include/asm/ia32.h37
-rw-r--r--arch/x86/include/asm/ia32_unistd.h12
-rw-r--r--arch/x86/include/asm/ibt.h117
-rw-r--r--arch/x86/include/asm/idtentry.h775
-rw-r--r--arch/x86/include/asm/imr.h6
-rw-r--r--arch/x86/include/asm/inat.h56
-rw-r--r--arch/x86/include/asm/inat_types.h16
-rw-r--r--arch/x86/include/asm/init.h3
-rw-r--r--arch/x86/include/asm/insn-eval.h26
-rw-r--r--arch/x86/include/asm/insn.h199
-rw-r--r--arch/x86/include/asm/inst.h165
-rw-r--r--arch/x86/include/asm/intel-family.h258
-rw-r--r--arch/x86/include/asm/intel-mid.h117
-rw-r--r--arch/x86/include/asm/intel_ds.h16
-rw-r--r--arch/x86/include/asm/intel_mid_vrtc.h10
-rw-r--r--arch/x86/include/asm/intel_pconfig.h65
-rw-r--r--arch/x86/include/asm/intel_pmc_ipc.h91
-rw-r--r--arch/x86/include/asm/intel_pt.h6
-rw-r--r--arch/x86/include/asm/intel_punit_ipc.h7
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h82
-rw-r--r--arch/x86/include/asm/intel_telemetry.h57
-rw-r--r--arch/x86/include/asm/invpcid.h7
-rw-r--r--arch/x86/include/asm/io.h155
-rw-r--r--arch/x86/include/asm/io_apic.h88
-rw-r--r--arch/x86/include/asm/io_bitmap.h52
-rw-r--r--arch/x86/include/asm/iomap.h29
-rw-r--r--arch/x86/include/asm/iommu.h30
-rw-r--r--arch/x86/include/asm/iommu_table.h102
-rw-r--r--arch/x86/include/asm/iosf_mbi.h10
-rw-r--r--arch/x86/include/asm/ipi.h110
-rw-r--r--arch/x86/include/asm/irq.h29
-rw-r--r--arch/x86/include/asm/irq_regs.h32
-rw-r--r--arch/x86/include/asm/irq_remapping.h59
-rw-r--r--arch/x86/include/asm/irq_stack.h241
-rw-r--r--arch/x86/include/asm/irq_vectors.h28
-rw-r--r--arch/x86/include/asm/irq_work.h2
-rw-r--r--arch/x86/include/asm/irqdomain.h15
-rw-r--r--arch/x86/include/asm/irqflags.h160
-rw-r--r--arch/x86/include/asm/ist.h11
-rw-r--r--arch/x86/include/asm/jump_label.h91
-rw-r--r--arch/x86/include/asm/kasan.h5
-rw-r--r--arch/x86/include/asm/kaslr.h2
-rw-r--r--arch/x86/include/asm/kdebug.h6
-rw-r--r--arch/x86/include/asm/kexec.h190
-rw-r--r--arch/x86/include/asm/kfence.h73
-rw-r--r--arch/x86/include/asm/kmap_types.h13
-rw-r--r--arch/x86/include/asm/kmsan.h102
-rw-r--r--arch/x86/include/asm/kprobes.h65
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h154
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h27
-rw-r--r--arch/x86/include/asm/kvm_emulate.h457
-rw-r--r--arch/x86/include/asm/kvm_host.h2157
-rw-r--r--arch/x86/include/asm/kvm_page_track.h63
-rw-r--r--arch/x86/include/asm/kvm_para.h66
-rw-r--r--arch/x86/include/asm/kvm_types.h22
-rw-r--r--arch/x86/include/asm/kvm_vcpu_regs.h25
-rw-r--r--arch/x86/include/asm/kvmclock.h14
-rw-r--r--arch/x86/include/asm/linkage.h143
-rw-r--r--arch/x86/include/asm/livepatch.h40
-rw-r--r--arch/x86/include/asm/local.h62
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/mc146818rtc.h2
-rw-r--r--arch/x86/include/asm/mce.h243
-rw-r--r--arch/x86/include/asm/mcsafe_test.h75
-rw-r--r--arch/x86/include/asm/mem_encrypt.h57
-rw-r--r--arch/x86/include/asm/memtype.h29
-rw-r--r--arch/x86/include/asm/microcode.h168
-rw-r--r--arch/x86/include/asm/microcode_amd.h58
-rw-r--r--arch/x86/include/asm/microcode_intel.h85
-rw-r--r--arch/x86/include/asm/mman.h15
-rw-r--r--arch/x86/include/asm/mmu.h45
-rw-r--r--arch/x86/include/asm/mmu_context.h263
-rw-r--r--arch/x86/include/asm/mmx.h15
-rw-r--r--arch/x86/include/asm/mmzone.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h56
-rw-r--r--arch/x86/include/asm/mmzone_64.h18
-rw-r--r--arch/x86/include/asm/module.h66
-rw-r--r--arch/x86/include/asm/mpspec.h113
-rw-r--r--arch/x86/include/asm/mpx.h115
-rw-r--r--arch/x86/include/asm/mshyperv.h528
-rw-r--r--arch/x86/include/asm/msi.h59
-rw-r--r--arch/x86/include/asm/msidef.h57
-rw-r--r--arch/x86/include/asm/msr-index.h613
-rw-r--r--arch/x86/include/asm/msr.h309
-rw-r--r--arch/x86/include/asm/mtrr.h88
-rw-r--r--arch/x86/include/asm/mwait.h104
-rw-r--r--arch/x86/include/asm/nmi.h57
-rw-r--r--arch/x86/include/asm/nops.h198
-rw-r--r--arch/x86/include/asm/nospec-branch.h646
-rw-r--r--arch/x86/include/asm/numa.h35
-rw-r--r--arch/x86/include/asm/numa_32.h13
-rw-r--r--arch/x86/include/asm/olpc.h31
-rw-r--r--arch/x86/include/asm/orc_header.h19
-rw-r--r--arch/x86/include/asm/orc_lookup.h14
-rw-r--r--arch/x86/include/asm/orc_types.h71
-rw-r--r--arch/x86/include/asm/page.h25
-rw-r--r--arch/x86/include/asm/page_32.h22
-rw-r--r--arch/x86/include/asm/page_32_types.h36
-rw-r--r--arch/x86/include/asm/page_64.h72
-rw-r--r--arch/x86/include/asm/page_64_types.h57
-rw-r--r--arch/x86/include/asm/page_types.h34
-rw-r--r--arch/x86/include/asm/paravirt.h565
-rw-r--r--arch/x86/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/x86/include/asm/paravirt_types.h475
-rw-r--r--arch/x86/include/asm/pat.h27
-rw-r--r--arch/x86/include/asm/pc-conf-reg.h33
-rw-r--r--arch/x86/include/asm/pci.h75
-rw-r--r--arch/x86/include/asm/pci_64.h28
-rw-r--r--arch/x86/include/asm/pci_x86.h35
-rw-r--r--arch/x86/include/asm/percpu.h1023
-rw-r--r--arch/x86/include/asm/perf_event.h575
-rw-r--r--arch/x86/include/asm/perf_event_p4.h4
-rw-r--r--arch/x86/include/asm/pgalloc.h103
-rw-r--r--arch/x86/include/asm/pgtable-2level.h26
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h14
-rw-r--r--arch/x86/include/asm/pgtable-3level.h246
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h20
-rw-r--r--arch/x86/include/asm/pgtable-invert.h4
-rw-r--r--arch/x86/include/asm/pgtable.h844
-rw-r--r--arch/x86/include/asm/pgtable_32.h56
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h53
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h56
-rw-r--r--arch/x86/include/asm/pgtable_64.h80
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h94
-rw-r--r--arch/x86/include/asm/pgtable_areas.h22
-rw-r--r--arch/x86/include/asm/pgtable_types.h304
-rw-r--r--arch/x86/include/asm/pkeys.h24
-rw-r--r--arch/x86/include/asm/pkru.h62
-rw-r--r--arch/x86/include/asm/platform_sst_audio.h8
-rw-r--r--arch/x86/include/asm/posted_intr.h187
-rw-r--r--arch/x86/include/asm/preempt.h65
-rw-r--r--arch/x86/include/asm/processor-cyrix.h29
-rw-r--r--arch/x86/include/asm/processor-flags.h6
-rw-r--r--arch/x86/include/asm/processor.h788
-rw-r--r--arch/x86/include/asm/prom.h14
-rw-r--r--arch/x86/include/asm/proto.h20
-rw-r--r--arch/x86/include/asm/pti.h8
-rw-r--r--arch/x86/include/asm/ptrace.h238
-rw-r--r--arch/x86/include/asm/purgatory.h14
-rw-r--r--arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--arch/x86/include/asm/pvclock.h6
-rw-r--r--arch/x86/include/asm/qspinlock.h41
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h64
-rw-r--r--arch/x86/include/asm/realmode.h31
-rw-r--r--arch/x86/include/asm/reboot.h12
-rw-r--r--arch/x86/include/asm/refcount.h112
-rw-r--r--arch/x86/include/asm/required-features.h106
-rw-r--r--arch/x86/include/asm/resctrl.h203
-rw-r--r--arch/x86/include/asm/resctrl_sched.h93
-rw-r--r--arch/x86/include/asm/rio.h64
-rw-r--r--arch/x86/include/asm/rmwcc.h37
-rw-r--r--arch/x86/include/asm/rqspinlock.h33
-rw-r--r--arch/x86/include/asm/runtime-const.h78
-rw-r--r--arch/x86/include/asm/rwsem.h237
-rw-r--r--arch/x86/include/asm/seccomp.h22
-rw-r--r--arch/x86/include/asm/sections.h6
-rw-r--r--arch/x86/include/asm/segment.h88
-rw-r--r--arch/x86/include/asm/set_memory.h73
-rw-r--r--arch/x86/include/asm/setup.h100
-rw-r--r--arch/x86/include/asm/setup_data.h32
-rw-r--r--arch/x86/include/asm/sev-common.h261
-rw-r--r--arch/x86/include/asm/sev-internal.h87
-rw-r--r--arch/x86/include/asm/sev.h682
-rw-r--r--arch/x86/include/asm/sgx.h430
-rw-r--r--arch/x86/include/asm/shared/io.h34
-rw-r--r--arch/x86/include/asm/shared/msr.h30
-rw-r--r--arch/x86/include/asm/shared/tdx.h191
-rw-r--r--arch/x86/include/asm/shstk.h46
-rw-r--r--arch/x86/include/asm/sigframe.h6
-rw-r--r--arch/x86/include/asm/sighandling.h32
-rw-r--r--arch/x86/include/asm/signal.h16
-rw-r--r--arch/x86/include/asm/simd.h6
-rw-r--r--arch/x86/include/asm/smap.h115
-rw-r--r--arch/x86/include/asm/smp.h143
-rw-r--r--arch/x86/include/asm/softirq_stack.h11
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/include/asm/spec-ctrl.h23
-rw-r--r--arch/x86/include/asm/special_insns.h241
-rw-r--r--arch/x86/include/asm/spinlock_types.h22
-rw-r--r--arch/x86/include/asm/sta2x11.h13
-rw-r--r--arch/x86/include/asm/stackprotector.h98
-rw-r--r--arch/x86/include/asm/stacktrace.h32
-rw-r--r--arch/x86/include/asm/static_call.h83
-rw-r--r--arch/x86/include/asm/string.h26
-rw-r--r--arch/x86/include/asm/string_32.h152
-rw-r--r--arch/x86/include/asm/string_64.h122
-rw-r--r--arch/x86/include/asm/suspend_32.h15
-rw-r--r--arch/x86/include/asm/suspend_64.h15
-rw-r--r--arch/x86/include/asm/svm.h528
-rw-r--r--arch/x86/include/asm/swiotlb.h30
-rw-r--r--arch/x86/include/asm/switch_to.h56
-rw-r--r--arch/x86/include/asm/sync_bitops.h31
-rw-r--r--arch/x86/include/asm/sync_core.h91
-rw-r--r--arch/x86/include/asm/syscall.h210
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h287
-rw-r--r--arch/x86/include/asm/syscalls.h38
-rw-r--r--arch/x86/include/asm/sysfb.h98
-rw-r--r--arch/x86/include/asm/tce.h48
-rw-r--r--arch/x86/include/asm/tdx.h240
-rw-r--r--arch/x86/include/asm/tdx_global_metadata.h44
-rw-r--r--arch/x86/include/asm/text-patching.h210
-rw-r--r--arch/x86/include/asm/thermal.h15
-rw-r--r--arch/x86/include/asm/thread_info.h173
-rw-r--r--arch/x86/include/asm/time.h3
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/timex.h9
-rw-r--r--arch/x86/include/asm/tlb.h152
-rw-r--r--arch/x86/include/asm/tlbbatch.h5
-rw-r--r--arch/x86/include/asm/tlbflush.h692
-rw-r--r--arch/x86/include/asm/topology.h189
-rw-r--r--arch/x86/include/asm/trace/common.h16
-rw-r--r--arch/x86/include/asm/trace/exceptions.h54
-rw-r--r--arch/x86/include/asm/trace/fpu.h37
-rw-r--r--arch/x86/include/asm/trace/hyperv.h17
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h18
-rw-r--r--arch/x86/include/asm/trace/mpx.h134
-rw-r--r--arch/x86/include/asm/trap_pf.h32
-rw-r--r--arch/x86/include/asm/trapnr.h44
-rw-r--r--arch/x86/include/asm/traps.h167
-rw-r--r--arch/x86/include/asm/tsc.h71
-rw-r--r--arch/x86/include/asm/uaccess.h930
-rw-r--r--arch/x86/include/asm/uaccess_32.h30
-rw-r--r--arch/x86/include/asm/uaccess_64.h308
-rw-r--r--arch/x86/include/asm/umip.h4
-rw-r--r--arch/x86/include/asm/unaccepted_memory.h27
-rw-r--r--arch/x86/include/asm/unaligned.h15
-rw-r--r--arch/x86/include/asm/unistd.h22
-rw-r--r--arch/x86/include/asm/unwind.h36
-rw-r--r--arch/x86/include/asm/unwind_hints.h110
-rw-r--r--arch/x86/include/asm/unwind_user.h41
-rw-r--r--arch/x86/include/asm/uprobes.h31
-rw-r--r--arch/x86/include/asm/user_32.h4
-rw-r--r--arch/x86/include/asm/user_64.h4
-rw-r--r--arch/x86/include/asm/uv/bios.h111
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h861
-rw-r--r--arch/x86/include/asm/uv/uv_geo.h103
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h304
-rw-r--r--arch/x86/include/asm/uv/uv_irq.h1
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h7733
-rw-r--r--arch/x86/include/asm/vdso.h23
-rw-r--r--arch/x86/include/asm/vdso/clocksource.h12
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h32
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h336
-rw-r--r--arch/x86/include/asm/vdso/processor.h27
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h22
-rw-r--r--arch/x86/include/asm/vermagic.h64
-rw-r--r--arch/x86/include/asm/vgtod.h92
-rw-r--r--arch/x86/include/asm/video.h23
-rw-r--r--arch/x86/include/asm/virtext.h129
-rw-r--r--arch/x86/include/asm/vm86.h3
-rw-r--r--arch/x86/include/asm/vmalloc.h26
-rw-r--r--arch/x86/include/asm/vmware.h327
-rw-r--r--arch/x86/include/asm/vmx.h292
-rw-r--r--arch/x86/include/asm/vmxfeatures.h93
-rw-r--r--arch/x86/include/asm/vsyscall.h16
-rw-r--r--arch/x86/include/asm/vvar.h51
-rw-r--r--arch/x86/include/asm/word-at-a-time.h86
-rw-r--r--arch/x86/include/asm/x86_init.h115
-rw-r--r--arch/x86/include/asm/xen/cpuid.h23
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/hypercall.h308
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h58
-rw-r--r--arch/x86/include/asm/xen/interface.h24
-rw-r--r--arch/x86/include/asm/xen/interface_32.h4
-rw-r--r--arch/x86/include/asm/xen/interface_64.h6
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h38
-rw-r--r--arch/x86/include/asm/xen/page.h42
-rw-r--r--arch/x86/include/asm/xen/pci.h18
-rw-r--r--arch/x86/include/asm/xen/swiotlb-xen.h16
-rw-r--r--arch/x86/include/asm/xor.h52
-rw-r--r--arch/x86/include/asm/xor_32.h52
-rw-r--r--arch/x86/include/asm/xor_avx.h36
-rw-r--r--arch/x86/include/uapi/asm/Kbuild3
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h477
-rw-r--r--arch/x86/include/uapi/asm/auxvec.h4
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h59
-rw-r--r--arch/x86/include/uapi/asm/byteorder.h2
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h22
-rw-r--r--arch/x86/include/uapi/asm/e820.h4
-rw-r--r--arch/x86/include/uapi/asm/elf.h16
-rw-r--r--arch/x86/include/uapi/asm/errno.h1
-rw-r--r--arch/x86/include/uapi/asm/fcntl.h1
-rw-r--r--arch/x86/include/uapi/asm/hwcap2.h9
-rw-r--r--arch/x86/include/uapi/asm/ioctl.h1
-rw-r--r--arch/x86/include/uapi/asm/ioctls.h1
-rw-r--r--arch/x86/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h685
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h36
-rw-r--r--arch/x86/include/uapi/asm/ldt.h4
-rw-r--r--arch/x86/include/uapi/asm/mce.h4
-rw-r--r--arch/x86/include/uapi/asm/mman.h23
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h11
-rw-r--r--arch/x86/include/uapi/asm/msr.h4
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h14
-rw-r--r--arch/x86/include/uapi/asm/param.h1
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h26
-rw-r--r--arch/x86/include/uapi/asm/prctl.h44
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h17
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h6
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h4
-rw-r--r--arch/x86/include/uapi/asm/resource.h1
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h6
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h94
-rw-r--r--arch/x86/include/uapi/asm/sgx.h238
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h14
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/x86/include/uapi/asm/sigcontext32.h2
-rw-r--r--arch/x86/include/uapi/asm/signal.h35
-rw-r--r--arch/x86/include/uapi/asm/socket.h1
-rw-r--r--arch/x86/include/uapi/asm/sockios.h1
-rw-r--r--arch/x86/include/uapi/asm/svm.h73
-rw-r--r--arch/x86/include/uapi/asm/termbits.h1
-rw-r--r--arch/x86/include/uapi/asm/termios.h1
-rw-r--r--arch/x86/include/uapi/asm/types.h7
-rw-r--r--arch/x86/include/uapi/asm/unistd.h9
-rw-r--r--arch/x86/include/uapi/asm/vm86.h4
-rw-r--r--arch/x86/include/uapi/asm/vmx.h30
439 files changed, 29410 insertions, 23460 deletions
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
new file mode 100644
index 000000000000..07949102a08d
--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are in machine order; things rely on that.
+ */
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rcx)
+GEN(rdx)
+GEN(rbx)
+GEN(rsp)
+GEN(rbp)
+GEN(rsi)
+GEN(rdi)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ecx)
+GEN(edx)
+GEN(ebx)
+GEN(esp)
+GEN(ebp)
+GEN(esi)
+GEN(edi)
+#endif
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a0ab9ab61c75..4566000e15c4 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,13 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
generated-y += syscalls_32.h
generated-y += syscalls_64.h
+generated-y += syscalls_x32.h
generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
+generated-y += cpufeaturemasks.h
-generic-y += dma-contiguous.h
generic-y += early_ioremap.h
-generic-y += export.h
+generic-y += fprobe.h
generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
deleted file mode 100644
index 7d3ece8bfb61..000000000000
--- a/arch/x86/include/asm/a.out-core.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_X86_A_OUT_CORE_H
-#define _ASM_X86_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-#ifdef CONFIG_X86_32
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/mm_types.h>
-
-#include <asm/debugreg.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1)))
- >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- aout_dump_debugregs(dump);
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
- >> PAGE_SHIFT;
-
- dump->regs.bx = regs->bx;
- dump->regs.cx = regs->cx;
- dump->regs.dx = regs->dx;
- dump->regs.si = regs->si;
- dump->regs.di = regs->di;
- dump->regs.bp = regs->bp;
- dump->regs.ax = regs->ax;
- dump->regs.ds = (u16)regs->ds;
- dump->regs.es = (u16)regs->es;
- dump->regs.fs = (u16)regs->fs;
- dump->regs.gs = get_user_gs(regs);
- dump->regs.orig_ax = regs->orig_ax;
- dump->regs.ip = regs->ip;
- dump->regs.cs = (u16)regs->cs;
- dump->regs.flags = regs->flags;
- dump->regs.sp = regs->sp;
- dump->regs.ss = (u16)regs->ss;
-
- dump->u_fpvalid = dump_fpu(regs, &dump->i387);
-}
-
-#endif /* CONFIG_X86_32 */
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_A_OUT_CORE_H */
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 1b010a859b8b..d937c55e717e 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* X86 specific ACPICA environments and implementation
*
* Copyright (C) 2014, Intel Corporation
* Author: Lv Zheng <lv.zheng@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _ASM_X86_ACENV_H
@@ -16,7 +13,19 @@
/* Asm macros */
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE() \
+do { \
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \
+ wbinvd(); \
+} while (0)
int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2f01eb4d6208..a03aa6f999d1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -1,37 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_ACPI_H
#define _ASM_X86_ACPI_H
/*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
#include <asm/numa.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
-#include <asm/realmode.h>
#include <asm/x86_init.h>
+#include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -48,6 +35,7 @@ extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
extern u8 acpi_sci_flags;
extern u32 acpi_sci_override_gsi;
@@ -68,6 +56,8 @@ static inline void disable_acpi(void)
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+extern int acpi_blacklisted(void);
+
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline void acpi_disable_pci(void)
{
@@ -79,7 +69,21 @@ static inline void acpi_disable_pci(void)
extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
-#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
+unsigned long acpi_get_wakeup_address(void);
+
+static inline bool acpi_skip_set_wakeup_address(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+
+#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+
+union acpi_subtable_headers;
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end);
+
+void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
/*
* Check if the CPU can handle C2 and deeper
@@ -110,23 +114,42 @@ static inline bool arch_has_acpi_pdc(void)
c->x86_vendor == X86_VENDOR_CENTAUR);
}
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
{
struct cpuinfo_x86 *c = &cpu_data(0);
- buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+ *cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP;
+
+ /* Enable coordination with firmware's _TSD info */
+ *cap |= ACPI_PROC_CAP_SMP_T_SWCOORD;
if (cpu_has(c, X86_FEATURE_EST))
- buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+ *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP;
if (cpu_has(c, X86_FEATURE_ACPI))
- buf[2] |= ACPI_PDC_T_FFH;
+ *cap |= ACPI_PROC_CAP_T_FFH;
+
+ if (cpu_has(c, X86_FEATURE_HWP))
+ *cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF;
/*
- * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+ * If mwait/monitor is unsupported, C_C1_FFH and
+ * C2/C3_FFH will be disabled.
*/
- if (!cpu_has(c, X86_FEATURE_MWAIT))
- buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+ if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+ boot_option_idle_override == IDLE_NOMWAIT)
+ *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+ if (xen_initial_domain()) {
+ /*
+ * When Linux is running as Xen dom0, the hypervisor is the
+ * entity in charge of the processor power management, and so
+ * Xen needs to check the OS capabilities reported in the
+ * processor capabilities buffer matches what the hypervisor
+ * driver supports.
+ */
+ xen_sanitize_proc_cap_bits(cap);
+ }
}
static inline bool acpi_has_cpu_in_madt(void)
@@ -134,16 +157,31 @@ static inline bool acpi_has_cpu_in_madt(void)
return !!acpi_lapic;
}
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static __always_inline void acpi_arch_set_root_pointer(u64 addr)
+{
+ x86_init.acpi.set_root_pointer(addr);
+}
+
#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
-static inline u64 acpi_arch_get_root_pointer(void)
+static __always_inline u64 acpi_arch_get_root_pointer(void)
{
return x86_init.acpi.get_root_pointer();
}
void acpi_generic_reduced_hw_init(void);
+void x86_default_set_root_pointer(u64 addr);
u64 x86_default_get_root_pointer(void);
+#ifdef CONFIG_XEN_PV
+/* A Xen PV domain needs a special acpi_os_ioremap() handling. */
+extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys,
+ acpi_size size);
+void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+#endif
+
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
@@ -155,6 +193,8 @@ static inline void disable_acpi(void) { }
static inline void acpi_generic_reduced_hw_init(void) { }
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
static inline u64 x86_default_get_root_pointer(void)
{
return 0;
@@ -168,7 +208,7 @@ static inline u64 x86_default_get_root_pointer(void)
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
-#define acpi_unlazy_tlb(x) leave_mm(x)
+struct cper_ia_proc_ctx;
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
@@ -188,6 +228,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
*/
return PAGE_KERNEL_NOENC;
}
+
+int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+ u64 lapic_id);
+#else
+static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+ u64 lapic_id)
+{
+ return -EINVAL;
+}
#endif
#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644
index 000000000000..fab11192c60a
--- /dev/null
+++ b/arch/x86/include/asm/acrn.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+/*
+ * This CPUID returns feature bitmaps in EAX.
+ * Guest VM uses this to detect the appropriate feature bit.
+ */
+#define ACRN_CPUID_FEATURES 0x40000001
+/* Bit 0 indicates whether guest VM is privileged */
+#define ACRN_FEATURE_PRIVILEGED_VM BIT(0)
+
+/*
+ * Timing Information.
+ * This leaf returns the current TSC frequency in kHz.
+ *
+ * EAX: (Virtual) TSC frequency in kHz.
+ * EBX, ECX, EDX: RESERVED (reserved fields are set to zero).
+ */
+#define ACRN_CPUID_TIMING_INFO 0x40000010
+
+void acrn_setup_intr_handler(void (*handler)(void));
+void acrn_remove_intr_handler(void);
+
+static inline u32 acrn_cpuid_base(void)
+{
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return cpuid_base_hypervisor("ACRNACRNACRN", 0);
+
+ return 0;
+}
+
+static inline unsigned long acrn_get_tsc_khz(void)
+{
+ return cpuid_eax(ACRN_CPUID_TIMING_INFO);
+}
+
+/*
+ * Hypercalls for ACRN
+ *
+ * - VMCALL instruction is used to implement ACRN hypercalls.
+ * - ACRN hypercall ABI:
+ * - Hypercall number is passed in R8 register.
+ * - Up to 2 arguments are passed in RDI, RSI.
+ * - Return value will be placed in RAX.
+ *
+ * Because GCC doesn't support R8 register as direct register constraints, use
+ * supported constraint as input with a explicit MOV to R8 in beginning of asm.
+ */
+static inline long acrn_hypercall0(unsigned long hcall_id)
+{
+ long result;
+
+ asm volatile("movl %1, %%r8d\n\t"
+ "vmcall\n\t"
+ : "=a" (result)
+ : "g" (hcall_id)
+ : "r8", "memory");
+
+ return result;
+}
+
+static inline long acrn_hypercall1(unsigned long hcall_id,
+ unsigned long param1)
+{
+ long result;
+
+ asm volatile("movl %1, %%r8d\n\t"
+ "vmcall\n\t"
+ : "=a" (result)
+ : "g" (hcall_id), "D" (param1)
+ : "r8", "memory");
+
+ return result;
+}
+
+static inline long acrn_hypercall2(unsigned long hcall_id,
+ unsigned long param1,
+ unsigned long param2)
+{
+ long result;
+
+ asm volatile("movl %1, %%r8d\n\t"
+ "vmcall\n\t"
+ : "=a" (result)
+ : "g" (hcall_id), "D" (param1), "S" (param2)
+ : "r8", "memory");
+
+ return result;
+}
+
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index 8e25bf4f323a..c8c111d8fbd7 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -2,14 +2,14 @@
#ifndef _ASM_X86_AGP_H
#define _ASM_X86_AGP_H
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#include <asm/cacheflush.h>
/*
* Functions to keep the agpgart mappings coherent with the MMU. The
* GART gives the CPU a physical alias of pages in memory. The alias
* region is mapped uncacheable. Make sure there are no conflicting
- * mappings with different cachability attributes for the same
+ * mappings with different cacheability attributes for the same
* page. This avoids data corruption on some CPUs.
*/
@@ -23,10 +23,4 @@
*/
#define flush_agp_cache() wbinvd()
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order) \
- ((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order) \
- free_pages((unsigned long)(table), (order))
-
#endif /* _ASM_X86_AGP_H */
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 31b627b43a8e..000000000000
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-
-#ifdef CONFIG_SMP
- .macro LOCK_PREFIX
-672: lock
- .pushsection .smp_locks,"a"
- .balign 4
- .long 672b - .
- .popsection
- .endm
-#else
- .macro LOCK_PREFIX
- .endm
-#endif
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
- .long \orig - .
- .long \alt - .
- .word \feature
- .byte \orig_len
- .byte \alt_len
- .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
-.endm
-
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
- \oldinstr
-141:
- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 0660e14690c8..03364510d5fe 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -2,12 +2,23 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
-
#include <linux/types.h>
-#include <linux/stddef.h>
#include <linux/stringify.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
+#include <asm/bug.h>
+
+#define ALT_FLAGS_SHIFT 16
+
+#define ALT_FLAG_NOT (1 << 0)
+#define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_FLAG_DIRECT_CALL (1 << 1)
+#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/stddef.h>
/*
* Alternative inline assembly for SMP.
@@ -38,22 +49,46 @@
".popsection\n" \
"671:"
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock "
#else /* ! CONFIG_SMP */
#define LOCK_PREFIX_HERE ""
#define LOCK_PREFIX ""
#endif
+/*
+ * The patching flags are part of the upper bits of the @ft_flags parameter when
+ * specifying them. The split is currently like this:
+ *
+ * [31... flags ...16][15... CPUID feature bit ...0]
+ *
+ * but since this is all hidden in the macros argument being split, those fields can be
+ * extended in the future to fit in a u64 or however the need arises.
+ */
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
- u16 cpuid; /* cpuid bit set for replacement */
+
+ union {
+ struct {
+ u32 cpuid: 16; /* CPUID bit set for replacement */
+ u32 flags: 16; /* patching control flags */
+ };
+ u32 ft_flags;
+ };
+
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction */
- u8 padlen; /* length of build-time padding */
} __packed;
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[], __return_sites_end[];
+extern s32 __cfi_sites[], __cfi_sites_end[];
+extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
+extern s32 __smp_locks[], __smp_locks_end[];
+
/*
* Debug flag that can be tested to see whether alternative
* instructions were patched in already:
@@ -62,9 +97,71 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
+extern void apply_seal_endbr(s32 *start, s32 *end);
+extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
+ s32 *start_cfi, s32 *end_cfi);
struct module;
+struct callthunk_sites {
+ s32 *call_start, *call_end;
+};
+
+#ifdef CONFIG_CALL_THUNKS
+extern void callthunks_patch_builtin_calls(void);
+extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
+ struct module *mod);
+extern void *callthunks_translate_call_dest(void *dest);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
+#else
+static __always_inline void callthunks_patch_builtin_calls(void) {}
+static __always_inline void
+callthunks_patch_module_calls(struct callthunk_sites *sites,
+ struct module *mod) {}
+static __always_inline void *callthunks_translate_call_dest(void *dest)
+{
+ return dest;
+}
+static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
+ void *func, void *ip)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+ WARN_ONCE(1, "ITS not compiled in");
+
+ return NULL;
+}
+#endif
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
+extern bool cpu_wants_rethunk(void);
+extern bool cpu_wants_rethunk_at(void *addr);
+#else
+static __always_inline bool cpu_wants_rethunk(void)
+{
+ return false;
+}
+static __always_inline bool cpu_wants_rethunk_at(void *addr)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
@@ -85,74 +182,55 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
-
-#define alt_end_marker "663"
-#define alt_slen "662b-661b"
-#define alt_pad_len alt_end_marker"b-662b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-
-#define __OLDINSTR(oldinstr, num) \
- "661:\n\t" oldinstr "\n662:\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
-
-#define OLDINSTR(oldinstr, num) \
- __OLDINSTR(oldinstr, num) \
- alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "661:\n\t" oldinstr "\n662:\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(feature, num) \
- " .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
- " .word " __stringify(feature) "\n" /* feature bit */ \
+#define ALT_CALL_INSTR "call BUG_func"
+
+#define alt_slen "772b-771b"
+#define alt_total_slen "773b-771b"
+#define alt_rlen "775f-774f"
+
+#define OLDINSTR(oldinstr) \
+ "# ALT: oldinstr\n" \
+ "771:\n\t" oldinstr "\n772:\n" \
+ "# ALT: padding\n" \
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "773:\n"
+
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions, \"aM\", @progbits, " \
+ __stringify(ALT_INSTR_SIZE) "\n" \
+ " .long 771b - .\n" /* label */ \
+ " .long 774f - .\n" /* new instruction */ \
+ " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */ \
- " .byte " alt_pad_len "\n" /* pad len */
-
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
-
-/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, feature) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+ " .byte " alt_rlen "\n" /* replacement len */ \
".popsection\n"
-#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature1, 1) \
- ALTINSTR_ENTRY(feature2, 2) \
- ".popsection\n" \
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ANNOTATE_DATA_SPECIAL "\n" \
+ "# ALT: replacement\n" \
+ "774:\n\t" newinstr "\n775:\n" \
".popsection\n"
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ OLDINSTR(oldinstr) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
+ ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
+ newinstr3, ft_flags3)
+
/*
* Alternative instructions for different CPU types or capabilities.
*
@@ -165,11 +243,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+#define alternative(oldinstr, newinstr, ft_flags) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
-#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
/*
* Alternative inline assembly with input.
@@ -177,38 +255,33 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Peculiarities:
* No memory clobber here.
* Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
* Leaving an unused argument 0 to keep API compatibility.
*/
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
- : : "i" (0), ## input)
-
-/*
- * This is similar to alternative_input. But it has two features and
- * respective instructions.
- *
- * If CPU has feature2, newinstr2 is used.
- * Otherwise, if CPU has feature1, newinstr1 is used.
- * Otherwise, oldinstr is used.
- */
-#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
- feature2, input...) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
- newinstr2, feature2) \
+#define alternative_input(oldinstr, newinstr, ft_flags, input...) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
+#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: output : "i" (0), ## input)
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, feature, output, input...) \
- asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+/*
+ * Like alternative_io, but for replacing a direct call with another one.
+ *
+ * Use the %c operand modifier which is the generic way to print a bare
+ * constant expression with all syntax-specific punctuation omitted. %P
+ * is the x86-specific variant which can handle constants too, for
+ * historical reasons, but it should be used primarily for PIC
+ * references: i.e., if used for a function, it would add the PLT
+ * suffix.
+ */
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \
+ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new] "i" (newfunc) \
+ COMMA(input) \
+ : clobbers)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -216,26 +289,113 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
- output, input...) \
- asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
- "call %P[new2]", feature2) \
- : output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input)
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
+ output, input, clobbers...) \
+ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
+ "call %c[new2]", ft_flags2) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2) \
+ COMMA(input) \
+ : clobbers)
+
+#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
+
+/* Macro for creating assembler functions avoiding any C magic. */
+#define DEFINE_ASM_FUNC(func, instr, sec) \
+ asm (".pushsection " #sec ", \"ax\"\n" \
+ ".global " #func "\n\t" \
+ ".type " #func ", @function\n\t" \
+ ASM_FUNC_ALIGN "\n" \
+ #func ":\n\t" \
+ ASM_ENDBR \
+ instr "\n\t" \
+ ASM_RET \
+ ".size " #func ", . - " #func "\n\t" \
+ ".popsection")
+
+void BUG_func(void);
+void nop_func(void);
+
+#else /* __ASSEMBLER__ */
+
+#ifdef CONFIG_SMP
+ .macro LOCK_PREFIX
+672: lock
+ .pushsection .smp_locks,"a"
+ .balign 4
+ .long 672b - .
+ .popsection
+ .endm
+#else
+ .macro LOCK_PREFIX
+ .endm
+#endif
/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
*/
-#define ASM_OUTPUT2(a...) a
+.macro altinstr_entry orig alt ft_flags orig_len alt_len
+ .long \orig - .
+ .long \alt - .
+ .4byte \ft_flags
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+.macro ALT_CALL_INSTR
+ call BUG_func
+.endm
/*
- * use this macro if you need clobbers but no inputs in
- * alternative_{input,io,call}()
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
*/
-#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#define __ALTERNATIVE(oldinst, newinst, flag) \
+740: \
+ oldinst ; \
+741: \
+ .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\
+742: \
+ .pushsection .altinstructions, "aM", @progbits, ALT_INSTR_SIZE ;\
+ altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+743: \
+ ANNOTATE_DATA_SPECIAL ; \
+ newinst ; \
+744: \
+ .popsection ;
+
+.macro ALTERNATIVE oldinstr, newinstr, ft_flags
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
+.endm
-#endif /* __ASSEMBLY__ */
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2)
+.endm
+
+.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
+ __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2),
+ \newinstr3, \ft_flags3)
+.endm
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
+ newinstr_yes, ft_flags
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd/hsmp.h b/arch/x86/include/asm/amd/hsmp.h
new file mode 100644
index 000000000000..2137f62853ed
--- /dev/null
+++ b/arch/x86/include/asm/amd/hsmp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_AMD_HSMP_H_
+#define _ASM_X86_AMD_HSMP_H_
+
+#include <uapi/asm/amd_hsmp.h>
+
+#if IS_ENABLED(CONFIG_AMD_HSMP)
+int hsmp_send_message(struct hsmp_message *msg);
+#else
+static inline int hsmp_send_message(struct hsmp_message *msg)
+{
+ return -ENODEV;
+}
+#endif
+
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/asm/amd/ibs.h b/arch/x86/include/asm/amd/ibs.h
new file mode 100644
index 000000000000..3ee5903982c2
--- /dev/null
+++ b/arch/x86/include/asm/amd/ibs.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_IBS_H
+#define _ASM_X86_AMD_IBS_H
+
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include <asm/msr-index.h>
+
+/* IBS_OP_DATA2 DataSrc */
+#define IBS_DATA_SRC_LOC_CACHE 2
+#define IBS_DATA_SRC_DRAM 3
+#define IBS_DATA_SRC_REM_CACHE 4
+#define IBS_DATA_SRC_IO 7
+
+/* IBS_OP_DATA2 DataSrc Extension */
+#define IBS_DATA_SRC_EXT_LOC_CACHE 1
+#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2
+#define IBS_DATA_SRC_EXT_DRAM 3
+#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5
+#define IBS_DATA_SRC_EXT_PMEM 6
+#define IBS_DATA_SRC_EXT_IO 7
+#define IBS_DATA_SRC_EXT_EXT_MEM 8
+#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+ __u64 val;
+ struct {
+ __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+ fetch_cnt:16, /* 16-31: instruction fetch count */
+ fetch_lat:16, /* 32-47: instruction fetch latency */
+ fetch_en:1, /* 48: instruction fetch enable */
+ fetch_val:1, /* 49: instruction fetch valid */
+ fetch_comp:1, /* 50: instruction fetch complete */
+ ic_miss:1, /* 51: i-cache miss */
+ phy_addr_valid:1,/* 52: physical address valid */
+ l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
+ * (needs IbsPhyAddrValid) */
+ l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
+ l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
+ rand_en:1, /* 57: random tagging enable */
+ fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+ * (needs IbsFetchComp) */
+ l3_miss_only:1, /* 59: Collect L3 miss samples only */
+ fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
+ fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
+ reserved:2; /* 62-63: reserved */
+ };
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+ __u64 val;
+ struct {
+ __u64 opmaxcnt:16, /* 0-15: periodic op max. count */
+ l3_miss_only:1, /* 16: Collect L3 miss samples only */
+ op_en:1, /* 17: op sampling enable */
+ op_val:1, /* 18: op sample valid */
+ cnt_ctl:1, /* 19: periodic op counter control */
+ opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
+ reserved0:5, /* 27-31: reserved */
+ opcurcnt:27, /* 32-58: periodic op counter current count */
+ ldlat_thrsh:4, /* 59-62: Load Latency threshold */
+ ldlat_en:1; /* 63: Load Latency enabled */
+ };
+};
+
+/* MSR 0xc0011035: IBS Op Data 1 */
+union ibs_op_data {
+ __u64 val;
+ struct {
+ __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
+ tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
+ reserved1:2, /* 32-33: reserved */
+ op_return:1, /* 34: return op */
+ op_brn_taken:1, /* 35: taken branch op */
+ op_brn_misp:1, /* 36: mispredicted branch op */
+ op_brn_ret:1, /* 37: branch op retired */
+ op_rip_invalid:1, /* 38: RIP is invalid */
+ op_brn_fuse:1, /* 39: fused branch op */
+ op_microcode:1, /* 40: microcode op */
+ reserved2:23; /* 41-63: reserved */
+ };
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+ __u64 val;
+ struct {
+ __u64 data_src_lo:3, /* 0-2: data source low */
+ reserved0:1, /* 3: reserved */
+ rmt_node:1, /* 4: destination node */
+ cache_hit_st:1, /* 5: cache hit state */
+ data_src_hi:2, /* 6-7: data source high */
+ reserved1:56; /* 8-63: reserved */
+ };
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+ __u64 val;
+ struct {
+ __u64 ld_op:1, /* 0: load op */
+ st_op:1, /* 1: store op */
+ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
+ dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
+ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
+ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
+ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
+ dc_miss:1, /* 7: data cache miss */
+ dc_mis_acc:1, /* 8: misaligned access */
+ reserved:4, /* 9-12: reserved */
+ dc_wc_mem_acc:1, /* 13: write combining memory access */
+ dc_uc_mem_acc:1, /* 14: uncacheable memory access */
+ dc_locked_op:1, /* 15: locked operation */
+ dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
+ dc_lin_addr_valid:1, /* 17: data cache linear address valid */
+ dc_phy_addr_valid:1, /* 18: data cache physical address valid */
+ dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
+ l2_miss:1, /* 20: L2 cache miss */
+ sw_pf:1, /* 21: software prefetch */
+ op_mem_width:4, /* 22-25: load/store size in bytes */
+ op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
+ dc_miss_lat:16, /* 32-47: data cache miss latency */
+ tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
+ };
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+ __u64 val;
+ struct {
+ __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
+ reserved:48; /* 16-63: reserved */
+ };
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+#endif /* _ASM_X86_AMD_IBS_H */
diff --git a/arch/x86/include/asm/amd/nb.h b/arch/x86/include/asm/amd/nb.h
new file mode 100644
index 000000000000..ddb5108cf46c
--- /dev/null
+++ b/arch/x86/include/asm/amd/nb.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_NB_H
+#define _ASM_X86_AMD_NB_H
+
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <asm/amd/node.h>
+
+struct amd_nb_bus_dev_range {
+ u8 bus;
+ u8 dev_base;
+ u8 dev_limit;
+};
+
+extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
+
+extern bool early_is_amd_nb(u32 value);
+extern struct resource *amd_get_mmconfig_range(struct resource *res);
+extern void amd_flush_garts(void);
+extern int amd_numa_init(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, unsigned long);
+
+struct amd_l3_cache {
+ unsigned indices;
+ u8 subcaches[4];
+};
+
+struct amd_northbridge {
+ struct pci_dev *misc;
+ struct pci_dev *link;
+ struct amd_l3_cache l3_cache;
+};
+
+struct amd_northbridge_info {
+ u16 num;
+ u64 flags;
+ struct amd_northbridge *nb;
+};
+
+#define AMD_NB_GART BIT(0)
+#define AMD_NB_L3_INDEX_DISABLE BIT(1)
+#define AMD_NB_L3_PARTITIONING BIT(2)
+
+#ifdef CONFIG_AMD_NB
+
+u16 amd_nb_num(void);
+bool amd_nb_has_feature(unsigned int feature);
+struct amd_northbridge *node_to_amd_nb(int node);
+
+static inline bool amd_gart_present(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return false;
+
+ /* GART present only on Fam15h, up to model 0fh */
+ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+ (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+ return true;
+
+ return false;
+}
+
+#else
+
+#define amd_nb_num(x) 0
+#define amd_nb_has_feature(x) false
+static inline struct amd_northbridge *node_to_amd_nb(int node)
+{
+ return NULL;
+}
+#define amd_gart_present(x) false
+
+#endif
+
+
+#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h
new file mode 100644
index 000000000000..a672b8765fa8
--- /dev/null
+++ b/arch/x86/include/asm/amd/node.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Node helper functions and common defines
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ *
+ * Note:
+ * Items in this file may only be used in a single place.
+ * However, it's prudent to keep all AMD Node functionality
+ * in a unified place rather than spreading throughout the
+ * kernel.
+ */
+
+#ifndef _ASM_X86_AMD_NODE_H_
+#define _ASM_X86_AMD_NODE_H_
+
+#include <linux/pci.h>
+
+#define MAX_AMD_NUM_NODES 8
+#define AMD_NODE0_PCI_SLOT 0x18
+
+struct pci_dev *amd_node_get_func(u16 node, u8 func);
+
+static inline u16 amd_num_nodes(void)
+{
+ return topology_amd_nodes_per_pkg() * topology_max_packages();
+}
+
+#ifdef CONFIG_AMD_NODE
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);
+
+/* Should only be used by the HSMP driver. */
+int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write);
+#else
+static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; }
+static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; }
+
+static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_AMD_NODE */
+
+/* helper for use with read_poll_timeout */
+static inline int smn_read_register(u32 reg)
+{
+ int data, rc;
+
+ rc = amd_smn_read(0, reg, &data);
+ if (rc)
+ return rc;
+
+ return data;
+}
+#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
deleted file mode 100644
index 1ae4e5791afa..000000000000
--- a/arch/x86/include/asm/amd_nb.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_AMD_NB_H
-#define _ASM_X86_AMD_NB_H
-
-#include <linux/ioport.h>
-#include <linux/pci.h>
-#include <linux/refcount.h>
-
-struct amd_nb_bus_dev_range {
- u8 bus;
- u8 dev_base;
- u8 dev_limit;
-};
-
-extern const struct pci_device_id amd_nb_misc_ids[];
-extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
-
-extern bool early_is_amd_nb(u32 value);
-extern struct resource *amd_get_mmconfig_range(struct resource *res);
-extern int amd_cache_northbridges(void);
-extern void amd_flush_garts(void);
-extern int amd_numa_init(void);
-extern int amd_get_subcaches(int);
-extern int amd_set_subcaches(int, unsigned long);
-
-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
-extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
-
-struct amd_l3_cache {
- unsigned indices;
- u8 subcaches[4];
-};
-
-struct threshold_block {
- unsigned int block; /* Number within bank */
- unsigned int bank; /* MCA bank the block belongs to */
- unsigned int cpu; /* CPU which controls MCA bank */
- u32 address; /* MSR address for the block */
- u16 interrupt_enable; /* Enable/Disable APIC interrupt */
- bool interrupt_capable; /* Bank can generate an interrupt. */
-
- u16 threshold_limit; /*
- * Value upon which threshold
- * interrupt is generated.
- */
-
- struct kobject kobj; /* sysfs object */
- struct list_head miscj; /*
- * List of threshold blocks
- * within a bank.
- */
-};
-
-struct threshold_bank {
- struct kobject *kobj;
- struct threshold_block *blocks;
-
- /* initialized to the number of CPUs on the node sharing this bank */
- refcount_t cpus;
-};
-
-struct amd_northbridge {
- struct pci_dev *root;
- struct pci_dev *misc;
- struct pci_dev *link;
- struct amd_l3_cache l3_cache;
- struct threshold_bank *bank4;
-};
-
-struct amd_northbridge_info {
- u16 num;
- u64 flags;
- struct amd_northbridge *nb;
-};
-
-#define AMD_NB_GART BIT(0)
-#define AMD_NB_L3_INDEX_DISABLE BIT(1)
-#define AMD_NB_L3_PARTITIONING BIT(2)
-
-#ifdef CONFIG_AMD_NB
-
-u16 amd_nb_num(void);
-bool amd_nb_has_feature(unsigned int feature);
-struct amd_northbridge *node_to_amd_nb(int node);
-
-static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
-{
- struct pci_dev *misc;
- int i;
-
- for (i = 0; i != amd_nb_num(); i++) {
- misc = node_to_amd_nb(i)->misc;
-
- if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
- PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
- return i;
- }
-
- WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
- return 0;
-}
-
-static inline bool amd_gart_present(void)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- return false;
-
- /* GART present only on Fam15h, upto model 0fh */
- if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
- return true;
-
- return false;
-}
-
-#else
-
-#define amd_nb_num(x) 0
-#define amd_nb_has_feature(x) false
-#define node_to_amd_nb(x) NULL
-#define amd_gart_present(x) false
-
-#endif
-
-
-#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
deleted file mode 100644
index 0acbac299e49..000000000000
--- a/arch/x86/include/asm/apb_timer.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
- *
- * (C) Copyright 2009 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * Note:
- */
-
-#ifndef ASM_X86_APBT_H
-#define ASM_X86_APBT_H
-#include <linux/sfi.h>
-
-#ifdef CONFIG_APB_TIMER
-
-/* default memory mapped register base */
-#define LNW_SCU_ADDR 0xFF100000
-#define LNW_EXT_TIMER_OFFSET 0x1B800
-#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET)
-#define LNW_EXT_TIMER_PGOFFSET 0x800
-
-/* APBT clock speed range from PCLK to fabric base, 25-100MHz */
-#define APBT_MAX_FREQ 50000000
-#define APBT_MIN_FREQ 1000000
-#define APBT_MMAP_SIZE 1024
-
-#define APBT_DEV_USED 1
-
-extern void apbt_time_init(void);
-extern unsigned long apbt_quick_calibrate(void);
-extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
-extern void apbt_setup_secondary_clock(void);
-
-extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
-extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
-extern int sfi_mtimer_num;
-
-#else /* CONFIG_APB_TIMER */
-
-static inline unsigned long apbt_quick_calibrate(void) {return 0; }
-static inline void apbt_time_init(void) { }
-
-#endif
-#endif /* ASM_X86_APBT_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 130e81e10fc7..a26e66d66444 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -1,7 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_APIC_H
#define _ASM_X86_APIC_H
#include <linux/cpumask.h>
+#include <linux/static_call.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
@@ -11,9 +13,16 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
+#include <asm/posted_intr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP 0 /* Default */
+#define APIC_EXTNMI_ALL 1
+#define APIC_EXTNMI_NONE 2
+
/*
* Debugging macros
*/
@@ -21,38 +30,40 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
-/* Macros for apic_extnmi which controls external NMI masking */
-#define APIC_EXTNMI_BSP 0 /* Default */
-#define APIC_EXTNMI_ALL 1
-#define APIC_EXTNMI_NONE 2
-
/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
+ * Define the default level of output to be very little This can be turned
+ * up by using apic=verbose for more information and apic=debug for _lots_
+ * of information. apic_verbosity is defined in apic.c
*/
-#define apic_printk(v, s, a...) do { \
- if ((v) <= apic_verbosity) \
- printk(s, ##a); \
- } while (0)
-
+#define apic_printk(v, s, a...) \
+do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+} while (0)
+
+#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a)
+#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a)
+#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a)
+/* Unconditional debug prints for code which is guarded by apic_verbosity already */
+#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a)
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-extern void generic_apic_probe(void);
+extern void x86_32_probe_apic(void);
#else
-static inline void generic_apic_probe(void)
-{
-}
+static inline void x86_32_probe_apic(void) { }
#endif
+extern u32 cpuid_to_apicid[];
+
+#define CPU_ACPIID_INVALID U32_MAX
+
#ifdef CONFIG_X86_LOCAL_APIC
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
extern int local_apic_timer_c2_ok;
-extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern bool apic_is_disabled;
+extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
@@ -63,20 +74,6 @@ enum apic_intr_mode_id {
APIC_SYMMETRIC_IO_NO_ROUTING
};
-#ifdef CONFIG_SMP
-extern void __inquire_remote_apic(int apicid);
-#else /* CONFIG_SMP */
-static inline void __inquire_remote_apic(int apicid)
-{
-}
-#endif /* CONFIG_SMP */
-
-static inline void default_inquire_remote_apic(int apicid)
-{
- if (apic_verbosity >= APIC_DEBUG)
- __inquire_remote_apic(apicid);
-}
-
/*
* With 82489DX we can't rely on apic feature bit
* retrieved via cpuid but still have to deal with
@@ -87,7 +84,7 @@ static inline void default_inquire_remote_apic(int apicid)
*/
static inline bool apic_from_smp_config(void)
{
- return smp_found_config && !disable_apic;
+ return smp_found_config && !apic_is_disabled;
}
/*
@@ -97,24 +94,25 @@ static inline bool apic_from_smp_config(void)
#include <asm/paravirt.h>
#endif
-extern int setup_profiling_timer(unsigned int);
-
static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
- ASM_OUTPUT2("=r" (v), "=m" (*addr)),
- ASM_OUTPUT2("0" (v), "m" (*addr)));
+ alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+ ASM_OUTPUT("=r" (v), "=m" (*addr)),
+ ASM_INPUT("0" (v), "m" (*addr)));
}
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
+}
+
+static inline void native_apic_mem_eoi(void)
+{
+ native_apic_mem_write(APIC_EOI, APIC_EOI_ACK);
}
-extern void native_apic_wait_icr_idle(void);
-extern u32 native_safe_apic_wait_icr_idle(void);
extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);
@@ -122,22 +120,22 @@ static inline bool apic_is_x2apic_enabled(void)
{
u64 msr;
- if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
+ if (rdmsrq_safe(MSR_IA32_APICBASE, &msr))
return false;
return msr & X2APIC_ENABLE;
}
extern void enable_IR_x2apic(void);
-extern int get_physical_broadcast(void);
-
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
+extern void apic_soft_disable(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
+extern void apic_intr_mode_select(void);
extern void apic_intr_mode_init(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
@@ -146,15 +144,14 @@ extern void setup_secondary_APIC_clock(void);
extern void lapic_update_tsc_freq(void);
#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
+static inline bool apic_force_enable(unsigned long addr)
{
- return -1;
+ return false;
}
#else
-extern int apic_force_enable(unsigned long addr);
+extern bool apic_force_enable(unsigned long addr);
#endif
-extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
@@ -172,8 +169,20 @@ static inline int apic_is_clustered_box(void)
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
+
+extern void apic_send_IPI_allbutself(unsigned int vector);
+
+extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
+extern void topology_register_boot_apic(u32 apic_id);
+extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
+extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
@@ -184,34 +193,28 @@ static inline void disable_local_APIC(void) { }
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
static inline void init_bsp_APIC(void) { }
+static inline void apic_intr_mode_select(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
-/*
- * Make previous memory operations globally visible before
- * sending the IPI through x2apic wrmsr. We need a serializing instruction or
- * mfence for this.
- */
-static inline void x2apic_wrmsr_fence(void)
-{
- asm volatile("mfence" : : : "memory");
-}
-
static inline void native_apic_msr_write(u32 reg, u32 v)
{
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
reg == APIC_LVR)
return;
- wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+ wrmsrq(APIC_BASE_MSR + (reg >> 4), v);
}
-static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
+static inline void native_apic_msr_eoi(void)
{
- __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+ native_wrmsrq(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK);
}
static inline u32 native_apic_msr_read(u32 reg)
@@ -221,38 +224,26 @@ static inline u32 native_apic_msr_read(u32 reg)
if (reg == APIC_DFR)
return -1;
- rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
+ rdmsrq(APIC_BASE_MSR + (reg >> 4), msr);
return (u32)msr;
}
-static inline void native_x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return;
-}
-
-static inline u32 native_safe_x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return 0;
-}
-
static inline void native_x2apic_icr_write(u32 low, u32 id)
{
- wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+ wrmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}
static inline u64 native_x2apic_icr_read(void)
{
unsigned long val;
- rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ rdmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), val);
return val;
}
extern int x2apic_mode;
extern int x2apic_phys;
-extern void __init check_x2apic(void);
+extern void __init x2apic_set_max_apicid(u32 apicid);
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
@@ -261,19 +252,18 @@ static inline int x2apic_enabled(void)
#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC))
#else /* !CONFIG_X86_X2APIC */
-static inline void check_x2apic(void) { }
static inline void x2apic_setup(void) { }
static inline int x2apic_enabled(void) { return 0; }
-
+static inline u32 native_apic_msr_read(u32 reg) { BUG(); }
#define x2apic_mode (0)
#define x2apic_supported() (0)
#endif /* !CONFIG_X86_X2APIC */
+extern void __init check_x2apic(void);
struct irq_data;
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Generic APIC sub-arch data struct.
*
@@ -283,8 +273,8 @@ struct irq_data;
*/
struct apic {
/* Hotpath functions first */
- void (*eoi_write)(u32 reg, u32 v);
- void (*native_eoi_write)(u32 reg, u32 v);
+ void (*eoi)(void);
+ void (*native_eoi)(void);
void (*write)(u32 reg, u32 v);
u32 (*read)(u32 reg);
@@ -299,11 +289,10 @@ struct apic {
void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);
- /* dest_logical is used by the IPI functions */
- u32 dest_logical;
- u32 disable_esr;
- u32 irq_delivery_mode;
- u32 irq_dest_mode;
+ u32 disable_esr : 1,
+ dest_mode_logical : 1,
+ x2apic_set_max_apicid : 1,
+ nmi_to_offline_cpu : 1;
u32 (*calc_dest_apicid)(unsigned int cpu);
@@ -311,45 +300,47 @@ struct apic {
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
+ /* The limit of the APIC ID space. */
+ u32 max_apic_id;
+
/* Probe, setup and smpboot functions */
int (*probe)(void);
+ void (*setup)(void);
+ void (*teardown)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- int (*apic_id_valid)(u32 apicid);
- int (*apic_id_registered)(void);
- bool (*check_apicid_used)(physid_mask_t *map, int apicid);
void (*init_apic_ldr)(void);
- void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
- void (*setup_apic_routing)(void);
- int (*cpu_present_to_apicid)(int mps_cpu);
- void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
- int (*check_phys_apicid_present)(int phys_apicid);
- int (*phys_pkg_id)(int cpuid_apic, int index_msb);
+ u32 (*cpu_present_to_apicid)(int mps_cpu);
- u32 (*get_apic_id)(unsigned long x);
- u32 (*set_apic_id)(unsigned int id);
+ u32 (*get_apic_id)(u32 id);
/* wakeup_secondary_cpu */
- int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
-
- void (*inquire_remote_apic)(int apicid);
-
-#ifdef CONFIG_X86_32
- /*
- * Called very early during boot from get_smp_config(). It should
- * return the logical apicid. x86_[bios]_cpu_to_apicid is
- * initialized before this function is called.
- *
- * If logical apicid can't be determined that early, the function
- * may return BAD_APICID. Logical apicid will be configured after
- * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
- * won't be applied properly during early boot in this case.
- */
- int (*x86_32_early_logical_apicid)(int cpu);
-#endif
+ int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+ /* wakeup secondary CPU using 64-bit wakeup point */
+ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+
+ void (*update_vector)(unsigned int cpu, unsigned int vector, bool set);
+
char *name;
};
+struct apic_override {
+ void (*eoi)(void);
+ void (*native_eoi)(void);
+ void (*write)(u32 reg, u32 v);
+ u32 (*read)(u32 reg);
+ void (*send_IPI)(int cpu, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+ void (*send_IPI_self)(int vector);
+ u64 (*icr_read)(void);
+ void (*icr_write)(u32 low, u32 high);
+ int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+};
+
/*
* Pointer to the local APIC driver in use on this system (there's
* always just one such driver in use - the kernel decides via an
@@ -361,19 +352,11 @@ extern struct apic *apic;
* APIC drivers are probed based on how they are listed in the .apicdrivers
* section. So the order is important and enforced by the ordering
* of different apic driver files in the Makefile.
- *
- * For the files having two apic drivers, we use apic_drivers()
- * to enforce the order with in them.
*/
#define apic_driver(sym) \
static const struct apic *__apicdrivers_##sym __used \
__aligned(sizeof(struct apic *)) \
- __section(.apicdrivers) = { &sym }
-
-#define apic_drivers(sym1, sym2) \
- static struct apic *__apicdrivers_##sym1##sym2[2] __used \
- __aligned(sizeof(struct apic *)) \
- __section(.apicdrivers) = { &sym1, &sym2 }
+ __section(".apicdrivers") = { &sym }
extern struct apic *__apicdrivers[], *__apicdrivers_end[];
@@ -381,48 +364,121 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
* APIC functionality to boot other CPUs - only used on SMP:
*/
#ifdef CONFIG_SMP
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
extern int lapic_can_unplug_cpu(void);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
+extern struct apic_override __x86_apic_override;
+
+void __init apic_setup_apic_calls(void);
+void __init apic_install_driver(struct apic *driver);
+
+#define apic_update_callback(_callback, _fn) { \
+ __x86_apic_override._callback = _fn; \
+ apic->_callback = _fn; \
+ static_call_update(apic_call_##_callback, _fn); \
+ pr_info("APIC: %s() replaced with %ps()\n", #_callback, _fn); \
+}
+
+#define DECLARE_APIC_CALL(__cb) \
+ DECLARE_STATIC_CALL(apic_call_##__cb, *apic->__cb)
+
+DECLARE_APIC_CALL(eoi);
+DECLARE_APIC_CALL(native_eoi);
+DECLARE_APIC_CALL(icr_read);
+DECLARE_APIC_CALL(icr_write);
+DECLARE_APIC_CALL(read);
+DECLARE_APIC_CALL(send_IPI);
+DECLARE_APIC_CALL(send_IPI_mask);
+DECLARE_APIC_CALL(send_IPI_mask_allbutself);
+DECLARE_APIC_CALL(send_IPI_allbutself);
+DECLARE_APIC_CALL(send_IPI_all);
+DECLARE_APIC_CALL(send_IPI_self);
+DECLARE_APIC_CALL(wait_icr_idle);
+DECLARE_APIC_CALL(wakeup_secondary_cpu);
+DECLARE_APIC_CALL(wakeup_secondary_cpu_64);
+DECLARE_APIC_CALL(write);
+
+static __always_inline u32 apic_read(u32 reg)
+{
+ return static_call(apic_call_read)(reg);
+}
+
+static __always_inline void apic_write(u32 reg, u32 val)
+{
+ static_call(apic_call_write)(reg, val);
+}
+
+static __always_inline void apic_eoi(void)
+{
+ static_call(apic_call_eoi)();
+}
+
+static __always_inline void apic_native_eoi(void)
+{
+ static_call(apic_call_native_eoi)();
+}
+
+static __always_inline u64 apic_icr_read(void)
+{
+ return static_call(apic_call_icr_read)();
+}
+
+static __always_inline void apic_icr_write(u32 low, u32 high)
+{
+ static_call(apic_call_icr_write)(low, high);
+}
+
+static __always_inline void __apic_send_IPI(int cpu, int vector)
+{
+ static_call(apic_call_send_IPI)(cpu, vector);
+}
-static inline u32 apic_read(u32 reg)
+static __always_inline void __apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
- return apic->read(reg);
+ static_call_mod(apic_call_send_IPI_mask)(mask, vector);
}
-static inline void apic_write(u32 reg, u32 val)
+static __always_inline void __apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
- apic->write(reg, val);
+ static_call(apic_call_send_IPI_mask_allbutself)(mask, vector);
}
-static inline void apic_eoi(void)
+static __always_inline void __apic_send_IPI_allbutself(int vector)
{
- apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
+ static_call(apic_call_send_IPI_allbutself)(vector);
}
-static inline u64 apic_icr_read(void)
+static __always_inline void __apic_send_IPI_all(int vector)
{
- return apic->icr_read();
+ static_call(apic_call_send_IPI_all)(vector);
}
-static inline void apic_icr_write(u32 low, u32 high)
+static __always_inline void __apic_send_IPI_self(int vector)
{
- apic->icr_write(low, high);
+ static_call_mod(apic_call_send_IPI_self)(vector);
}
-static inline void apic_wait_icr_idle(void)
+static __always_inline void apic_wait_icr_idle(void)
{
- apic->wait_icr_idle();
+ static_call_cond(apic_call_wait_icr_idle)();
}
-static inline u32 safe_apic_wait_icr_idle(void)
+static __always_inline u32 safe_apic_wait_icr_idle(void)
{
- return apic->safe_wait_icr_idle();
+ return apic->safe_wait_icr_idle ? apic->safe_wait_icr_idle() : 0;
}
-extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v));
+static __always_inline bool apic_id_valid(u32 apic_id)
+{
+ return apic_id <= apic->max_apic_id;
+}
+
+static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ if (apic->update_vector)
+ apic->update_vector(cpu, vector, set);
+}
#else /* CONFIG_X86_LOCAL_APIC */
@@ -433,29 +489,88 @@ static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
-static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}
+static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
+static inline void apic_setup_apic_calls(void) { }
+static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { }
+
+#define apic_update_callback(_callback, _fn) do { } while (0)
#endif /* CONFIG_X86_LOCAL_APIC */
extern void apic_ack_irq(struct irq_data *data);
-static inline void ack_APIC_irq(void)
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+ u32 irr = apic_read(APIC_IRR + APIC_VECTOR_TO_REG_OFFSET(vector));
+
+ return !!(irr & (1U << APIC_VECTOR_TO_BIT_NUMBER(vector)));
+}
+
+static inline bool is_vector_pending(unsigned int vector)
+{
+ return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
+}
+
+#define MAX_APIC_VECTOR 256
+#define APIC_VECTORS_PER_REG 32
+
+/*
+ * Vector states are maintained by APIC in 32-bit registers that are
+ * 16 bytes aligned. The status of each vector is kept in a single
+ * bit.
+ */
+static inline int apic_find_highest_vector(void *bitmap)
+{
+ int vec;
+ u32 *reg;
+
+ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; vec >= 0; vec -= APIC_VECTORS_PER_REG) {
+ reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec);
+ if (*reg)
+ return __fls(*reg) + vec;
+ }
+
+ return -1;
+}
+
+static inline u32 apic_get_reg(void *regs, int reg)
+{
+ return *((u32 *) (regs + reg));
+}
+
+static inline void apic_set_reg(void *regs, int reg, u32 val)
+{
+ *((u32 *) (regs + reg)) = val;
+}
+
+static __always_inline u64 apic_get_reg64(void *regs, int reg)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ return *((u64 *) (regs + reg));
+}
+
+static __always_inline void apic_set_reg64(void *regs, int reg, u64 val)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ *((u64 *) (regs + reg)) = val;
+}
+
+static inline void apic_clear_vector(int vec, void *bitmap)
{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction
- * ... yummie.
- */
- apic_eoi();
+ clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
}
-static inline unsigned default_get_apic_id(unsigned long x)
+static inline void apic_set_vector(int vec, void *bitmap)
{
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+ set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
+}
- if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
- return (x >> 24) & 0xFF;
- else
- return (x >> 24) & 0x0F;
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+ return test_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
}
/*
@@ -464,83 +579,52 @@ static inline unsigned default_get_apic_id(unsigned long x)
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
-#ifdef CONFIG_X86_64
-extern void apic_send_IPI_self(int vector);
-
-DECLARE_PER_CPU(int, x2apic_extra_bits);
-#endif
-
-extern void generic_bigsmp_probe(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
-#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
-
extern struct apic apic_noop;
-static inline unsigned int read_apic_id(void)
+static inline u32 read_apic_id(void)
{
- unsigned int reg = apic_read(APIC_ID);
+ u32 reg = apic_read(APIC_ID);
return apic->get_apic_id(reg);
}
-extern int default_apic_id_valid(u32 apicid);
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
extern int default_acpi_madt_oem_check(char *, char *);
-extern void default_setup_apic_routing(void);
+extern void x86_64_probe_apic(void);
+#else
+static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
+static inline void x86_64_probe_apic(void) { }
+#endif
extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);
-extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
-extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
-extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int phys_apicid);
+extern u32 default_cpu_present_to_apicid(int mps_cpu);
-#endif /* CONFIG_X86_LOCAL_APIC */
+void apic_send_nmi_to_offline_cpu(unsigned int cpu);
-#ifdef CONFIG_SMP
-bool apic_id_is_primary_thread(unsigned int id);
-#else
-static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
-#endif
-
-extern void irq_enter(void);
-extern void irq_exit(void);
+#else /* CONFIG_X86_LOCAL_APIC */
-static inline void entering_irq(void)
-{
- irq_enter();
- kvm_set_cpu_l1tf_flush_l1d();
-}
+static inline u32 read_apic_id(void) { return 0; }
-static inline void entering_ack_irq(void)
-{
- entering_irq();
- ack_APIC_irq();
-}
+#endif /* !CONFIG_X86_LOCAL_APIC */
-static inline void ipi_entering_ack_irq(void)
-{
- irq_enter();
- ack_APIC_irq();
- kvm_set_cpu_l1tf_flush_l1d();
-}
+#ifdef CONFIG_SMP
+void apic_smt_update(void);
+#else
+static inline void apic_smt_update(void) { }
+#endif
-static inline void exiting_irq(void)
-{
- irq_exit();
-}
+struct msi_msg;
+struct irq_cfg;
-static inline void exiting_ack_irq(void)
-{
- ack_APIC_irq();
- irq_exit();
-}
+extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
+ bool dmar);
extern void ioapic_zap_locks(void);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
deleted file mode 100644
index d3a2b3876ce6..000000000000
--- a/arch/x86/include/asm/apic_flat_64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 05e694ed8386..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_APICDEF_H
#define _ASM_X86_APICDEF_H
+#include <linux/bits.h>
+
/*
* Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
*
@@ -18,6 +20,13 @@
*/
#define IO_APIC_SLOT_SIZE 1024
+#define APIC_DELIVERY_MODE_FIXED 0
+#define APIC_DELIVERY_MODE_LOWESTPRIO 1
+#define APIC_DELIVERY_MODE_SMI 2
+#define APIC_DELIVERY_MODE_NMI 4
+#define APIC_DELIVERY_MODE_INIT 5
+#define APIC_DELIVERY_MODE_EXTINT 7
+
#define APIC_ID 0x20
#define APIC_LVR 0x30
@@ -89,18 +98,12 @@
#define APIC_DM_EXTINT 0x00700
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+#define GET_XAPIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
+#define SET_XAPIC_DEST_FIELD(x) ((x) << 24)
#define APIC_LVTT 0x320
#define APIC_LVTTHMR 0x330
#define APIC_LVTPC 0x340
#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18)
-#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3)
-#define SET_APIC_TIMER_BASE(x) (((x) << 18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
@@ -132,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
@@ -144,9 +149,10 @@
#define APIC_EILVT_MASKED (1 << 16)
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#define APIC_BASE_MSR 0x800
-#define XAPIC_ENABLE (1UL << 11)
-#define X2APIC_ENABLE (1UL << 10)
+#define APIC_BASE_MSR 0x800
+#define APIC_X2APIC_ID_MSR 0x802
+#define XAPIC_ENABLE BIT(11)
+#define X2APIC_ENABLE BIT(10)
#ifdef CONFIG_X86_32
# define MAX_IO_APICS 64
@@ -168,279 +174,10 @@
#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { /* LVT - Thermal Sensor */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_thermal;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
#ifdef CONFIG_X86_32
#define BAD_APICID 0xFFu
#else
#define BAD_APICID 0xFFFFu
#endif
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index fc0693569f7a..b5982b94bdba 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -12,15 +12,14 @@
#define REG_OUT "a"
#endif
-#define __HAVE_ARCH_SW_HWEIGHT
-
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
- asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE("call __sw_hweight32",
+ "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
@@ -46,9 +45,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res;
- asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE("call __sw_hweight64",
+ "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 3ac991d81e74..4c305305871b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* This file is part of the Linux kernel.
*
* Copyright (c) 2011-2014, Intel Corporation
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#ifndef ASM_X86_ARCHRANDOM_H
@@ -28,101 +15,46 @@
#define RDRAND_RETRY_LOOPS 10
-#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
-#define RDSEED_INT ".byte 0x0f,0xc7,0xf8"
-#ifdef CONFIG_X86_64
-# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
-# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8"
-#else
-# define RDRAND_LONG RDRAND_INT
-# define RDSEED_LONG RDSEED_INT
-#endif
-
/* Unconditional execution of RDRAND and RDSEED */
-static inline bool rdrand_long(unsigned long *v)
-{
- bool ok;
- unsigned int retry = RDRAND_RETRY_LOOPS;
- do {
- asm volatile(RDRAND_LONG
- CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
- if (ok)
- return true;
- } while (--retry);
- return false;
-}
-
-static inline bool rdrand_int(unsigned int *v)
+static inline bool __must_check rdrand_long(unsigned long *v)
{
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_INT
- CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ asm volatile("rdrand %[out]"
+ : "=@ccc" (ok), [out] "=r" (*v));
if (ok)
return true;
} while (--retry);
return false;
}
-static inline bool rdseed_long(unsigned long *v)
+static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
- asm volatile(RDSEED_LONG
- CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ asm volatile("rdseed %[out]"
+ : "=@ccc" (ok), [out] "=r" (*v));
return ok;
}
-static inline bool rdseed_int(unsigned int *v)
-{
- bool ok;
- asm volatile(RDSEED_INT
- CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
- return ok;
-}
-
-/* Conditional execution based on CPU type */
-#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
-
/*
* These are the generic interfaces; they must not be declared if the
- * stubs in <linux/random.h> are to be invoked,
- * i.e. CONFIG_ARCH_RANDOM is not defined.
+ * stubs in <linux/random.h> are to be invoked.
*/
-#ifdef CONFIG_ARCH_RANDOM
-
-static inline bool arch_get_random_long(unsigned long *v)
-{
- return arch_has_random() ? rdrand_long(v) : false;
-}
-
-static inline bool arch_get_random_int(unsigned int *v)
-{
- return arch_has_random() ? rdrand_int(v) : false;
-}
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
- return arch_has_random_seed() ? rdseed_long(v) : false;
+ return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0;
}
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
{
- return arch_has_random_seed() ? rdseed_int(v) : false;
+ return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0;
}
-extern void x86_init_rdrand(struct cpuinfo_x86 *c);
-
-#else /* !CONFIG_ARCH_RANDOM */
-
-static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
-
-#endif /* !CONFIG_ARCH_RANDOM */
+#ifndef CONFIG_UML
+void x86_init_rdrand(struct cpuinfo_x86 *c);
+#endif
#endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..11c6fecc3ad7 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -1,41 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/ftrace.h>
#include <linux/uaccess.h>
+#include <linux/pgtable.h>
#include <asm/string.h>
#include <asm/page.h>
#include <asm/checksum.h>
+#include <asm/mce.h>
#include <asm-generic/asm-prototypes.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
#include <asm/asm.h>
+#include <asm/fred.h>
+#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
-#ifndef CONFIG_X86_CMPXCHG64
+#ifndef CONFIG_X86_CX8
extern void cmpxchg8b_emu(void);
#endif
-#ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
+#ifdef CONFIG_STACKPROTECTOR
+extern unsigned long __ref_stack_chk_guard;
#endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
-#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..0e8c611bc9e2 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -2,24 +2,31 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
-# define __ASM_FORM(x) x
-# define __ASM_FORM_RAW(x) x
-# define __ASM_FORM_COMMA(x) x,
+#include <linux/annotate.h>
+
+#ifdef __ASSEMBLER__
+# define __ASM_FORM(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
+# define __ASM_REGPFX %
#else
-# define __ASM_FORM(x) " " #x " "
-# define __ASM_FORM_RAW(x) #x
-# define __ASM_FORM_COMMA(x) " " #x ","
+#include <linux/stringify.h>
+# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) ","
+# define __ASM_REGPFX %%
#endif
+#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
#ifndef __x86_64__
/* 32 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(a)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+# define __ASM_SEL(a,b) __ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
/* 64 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(b)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
@@ -46,6 +53,9 @@
#define _ASM_SI __ASM_REG(si)
#define _ASM_DI __ASM_REG(di)
+/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */
+#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip))
+
#ifndef __x86_64__
/* 32 bit */
@@ -105,102 +115,106 @@
#endif
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
-/* Exception table entry */
-#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- .pushsection "__ex_table","a" ; \
- .balign 4 ; \
- .long (from) - . ; \
- .long (to) - . ; \
- .long (handler) - . ; \
- .popsection
+#ifndef __ASSEMBLER__
+static __always_inline __pure void *rip_rel_ptr(void *p)
+{
+ asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
-# define _ASM_EXTABLE(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+ return p;
+}
+#endif
-# define _ASM_EXTABLE_UA(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+#ifdef __KERNEL__
-# define _ASM_EXTABLE_FAULT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+#ifndef COMPILE_OFFSETS
+#include <asm/asm-offsets.h>
+#endif
-# define _ASM_EXTABLE_EX(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+# include <asm/extable_fixup_types.h>
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+/* Exception table entry */
+#ifdef __ASSEMBLER__
+
+# define _ASM_EXTABLE_TYPE(from, to, type) \
+ .pushsection "__ex_table", "aM", @progbits, EXTABLE_SIZE ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .long type ; \
+ .popsection
-# define _ASM_NOKPROBE(entry) \
+# ifdef CONFIG_KPROBES
+# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
_ASM_PTR (entry); \
.popsection
-
-.macro ALIGN_DESTINATION
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
- .endm
-
-#else
-# define _EXPAND_EXTABLE_HANDLE(x) #x
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- " .pushsection \"__ex_table\",\"a\"\n" \
+# else
+# define _ASM_NOKPROBE(entry)
+# endif
+
+#else /* ! __ASSEMBLER__ */
+
+# define DEFINE_EXTABLE_TYPE_REG \
+ ".macro extable_type_reg type:req reg:req\n" \
+ ".set .Lfound, 0\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".long \\type + (.Lregnr << 8)\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".long \\type + (.Lregnr << 8)\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".if (.Lfound != 1)\n" \
+ ".error \"extable_type_reg: bad register argument\"\n" \
+ ".endif\n" \
+ ".endm\n"
+
+# define UNDEFINE_EXTABLE_TYPE_REG \
+ ".purgem extable_type_reg\n"
+
+# define _ASM_EXTABLE_TYPE(from, to, type) \
+ " .pushsection __ex_table, \"aM\", @progbits, " \
+ __stringify(EXTABLE_SIZE) "\n" \
" .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
- " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .long " __stringify(type) " \n" \
" .popsection\n"
-# define _ASM_EXTABLE(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_UA(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
-
-# define _ASM_EXTABLE_FAULT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \
+ " .pushsection __ex_table, \"aM\", @progbits, " \
+ __stringify(EXTABLE_SIZE) "\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ DEFINE_EXTABLE_TYPE_REG \
+ "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\
+ UNDEFINE_EXTABLE_TYPE_REG \
+ " .popsection\n"
-# define _ASM_EXTABLE_EX(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+/* For C file, we already have NOKPROBE_SYMBOL macro */
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+/* Insert a comma if args are non-empty */
+#define COMMA(x...) __COMMA(x)
+#define __COMMA(...) , ##__VA_ARGS__
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-#endif
+/*
+ * Combine multiple asm inline constraint args into a single arg for passing to
+ * another macro.
+ */
+#define ASM_OUTPUT(x...) x
+#define ASM_INPUT(x...) x
-#ifndef __ASSEMBLY__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -209,6 +223,35 @@
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif /* __ASSEMBLER__ */
+
+#define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
+
+#define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
+
+#define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
+
+/*
+ * Both i386 and x86_64 returns 64-bit values in edx:eax for certain
+ * instructions, but GCC's "A" constraint has different meanings.
+ * For i386, "A" means exactly edx:eax, while for x86_64 it
+ * means rax *or* rdx.
+ *
+ * These helpers wrapping these semantic differences save one instruction
+ * clearing the high half of 'low':
+ */
+#ifdef CONFIG_X86_64
+# define EAX_EDX_DECLARE_ARGS(val, low, high) unsigned long low, high
+# define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
+# define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
+#else
+# define EAX_EDX_DECLARE_ARGS(val, low, high) u64 val
+# define EAX_EDX_VAL(val, low, high) (val)
+# define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
+#endif /* __KERNEL__ */
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..75743f1dfd4e 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -14,206 +14,115 @@
* resource counting etc..
*/
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * arch_atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
static __always_inline int arch_atomic_read(const atomic_t *v)
{
/*
* Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
* it's non-inlined function that increases binary size and stack usage.
*/
- return READ_ONCE((v)->counter);
+ return __READ_ONCE((v)->counter);
}
-/**
- * arch_atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
- WRITE_ONCE(v->counter, i);
+ __WRITE_ONCE(v->counter, i);
}
-/**
- * arch_atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "addl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "addl %1, %0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub - subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "subl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subl %1, %0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test
-/**
- * arch_atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic_inc(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
+ asm_inline volatile(LOCK_PREFIX "incl %0"
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
-/**
- * arch_atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic_dec(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
+ asm_inline volatile(LOCK_PREFIX "decl %0"
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
-/**
- * arch_atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test
-/**
- * arch_atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
-/**
- * arch_atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
}
#define arch_atomic_add_negative arch_atomic_add_negative
-/**
- * arch_atomic_add_return - add integer and return
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
+#define arch_atomic_add_return arch_atomic_add_return
-/**
- * arch_atomic_sub_return - subtract integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to subtract
- *
- * Atomically subtracts @i from @v and returns @v - @i
- */
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
- return arch_atomic_add_return(-i, v);
-}
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return xadd(&v->counter, i);
}
+#define arch_atomic_fetch_add arch_atomic_fetch_add
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
- return xadd(&v->counter, -i);
-}
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return arch_cmpxchg(&v->counter, old, new);
}
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
-#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
- return try_cmpxchg(&v->counter, old, new);
+ return arch_try_cmpxchg(&v->counter, old, new);
}
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
-static inline int arch_atomic_xchg(atomic_t *v, int new)
+static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
{
return arch_xchg(&v->counter, new);
}
+#define arch_atomic_xchg arch_atomic_xchg
-static inline void arch_atomic_and(int i, atomic_t *v)
+static __always_inline void arch_atomic_and(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "andl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
}
-static inline int arch_atomic_fetch_and(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
{
int val = arch_atomic_read(v);
@@ -221,16 +130,17 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v)
return val;
}
+#define arch_atomic_fetch_and arch_atomic_fetch_and
-static inline void arch_atomic_or(int i, atomic_t *v)
+static __always_inline void arch_atomic_or(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "orl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
}
-static inline int arch_atomic_fetch_or(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
{
int val = arch_atomic_read(v);
@@ -238,16 +148,17 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v)
return val;
}
+#define arch_atomic_fetch_or arch_atomic_fetch_or
-static inline void arch_atomic_xor(int i, atomic_t *v)
+static __always_inline void arch_atomic_xor(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "xorl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
}
-static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
{
int val = arch_atomic_read(v);
@@ -255,6 +166,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
return val;
}
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
@@ -262,6 +174,4 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
# include <asm/atomic64_64.h>
#endif
-#include <asm-generic/atomic-instrumented.h>
-
#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..ab838205c1c6 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,11 +9,37 @@
/* An 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do {
+ * if (condition(val))
+ * break;
+ * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+ /* See comment in arch_atomic_read(). */
+ return __READ_ONCE(v->counter);
+}
+
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
@@ -22,16 +48,20 @@ typedef struct {
ATOMIC64_EXPORT(atomic64_##sym)
#endif
-#ifdef CONFIG_X86_CMPXCHG64
-#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %P[func]" \
- : out : [func] "i" (atomic64_##g##_cx8), ## in)
+#ifdef CONFIG_X86_CX8
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ asm volatile("call %c[func]" \
+ : ALT_OUTPUT_SP(out) \
+ : [func] "i" (atomic64_##g##_cx8) \
+ COMMA(in) \
+ : clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define __alternative_atomic64(f, g, out, in...) \
- alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
- X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, ASM_OUTPUT(out), \
+ ASM_INPUT(in), clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
ATOMIC64_DECL_ONE(sym##_386)
@@ -42,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386);
ATOMIC64_DECL_ONE(dec_386);
#endif
-#define alternative_atomic64(f, out, in...) \
- __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+#define alternative_atomic64(f, out, in, clobbers...) \
+ __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers)
ATOMIC64_DECL(read);
ATOMIC64_DECL(set);
@@ -61,204 +91,154 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-/**
- * arch_atomic64_cmpxchg - cmpxchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @o: expected value
- * @n: new value
- *
- * Atomically sets @v to @n if it was equal to @o and returns
- * the old value.
- */
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+ return arch_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
- long long n)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
- return arch_cmpxchg64(&v->counter, o, n);
+ return arch_try_cmpxchg64(&v->counter, old, new);
}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-/**
- * arch_atomic64_xchg - xchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @n: value to assign
- *
- * Atomically xchgs the value of @v to @n and returns
- * the old value.
- */
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
- long long o;
+ s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- alternative_atomic64(xchg, "=&A" (o),
- "S" (v), "b" (low), "c" (high)
- : "memory");
+ alternative_atomic64(xchg,
+ "=&A" (o),
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "memory");
return o;
}
+#define arch_atomic64_xchg arch_atomic64_xchg
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: value to assign
- *
- * Atomically sets the value of @v to @n.
- */
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- alternative_atomic64(set, /* no output */,
- "S" (v), "b" (low), "c" (high)
- : "eax", "edx", "memory");
+ alternative_atomic64(set,
+ /* no output */,
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "eax", "edx", "memory");
}
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically reads the value of @v and returns it.
- */
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
- long long r;
- alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+ s64 r;
+ alternative_atomic64(read, "=&A" (r), "c" (v), "memory");
return r;
}
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + *@v
- */
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
+#define arch_atomic64_add_return arch_atomic64_add_return
-/*
- * Other variants with different arithmetic operators:
- */
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
+#define arch_atomic64_sub_return arch_atomic64_sub_return
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
- long long a;
- alternative_atomic64(inc_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ s64 a;
+ alternative_atomic64(inc_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
- long long a;
- alternative_atomic64(dec_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ s64 a;
+ alternative_atomic64(dec_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
- return i;
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
- return i;
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
-static inline void arch_atomic64_inc(atomic64_t *v)
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- __alternative_atomic64(inc, inc_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(inc, inc_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
-static inline void arch_atomic64_dec(atomic64_t *v)
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- __alternative_atomic64(dec, dec_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(dec, dec_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if the add was done, zero otherwise.
- */
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
- long long u)
+static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
alternative_atomic64(add_unless,
- ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
- "S" (v) : "memory");
+ ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)),
+ "S" (v),
+ "memory");
return (int)a;
}
+#define arch_atomic64_add_unless arch_atomic64_add_unless
-static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
+static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- alternative_atomic64(inc_not_zero, "=&a" (r),
- "S" (v) : "ecx", "edx", "memory");
+ alternative_atomic64(inc_not_zero,
+ "=&a" (r),
+ "S" (v),
+ "ecx", "edx", "memory");
return r;
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long long r;
- alternative_atomic64(dec_if_positive, "=&A" (r),
- "S" (v) : "ecx", "memory");
+ s64 r;
+ alternative_atomic64(dec_if_positive,
+ "=&A" (r),
+ "S" (v),
+ "ecx", "memory");
return r;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
@@ -266,69 +246,66 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
- return old;
+ return val;
}
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v))
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..87b496325b5b 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -10,196 +10,113 @@
#define ATOMIC64_INIT(i) { (i) }
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
-static inline long arch_atomic64_read(const atomic64_t *v)
-{
- return READ_ONCE((v)->counter);
-}
-
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-static inline void arch_atomic64_set(atomic64_t *v, long i)
-{
- WRITE_ONCE(v->counter, i);
-}
-
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
-{
- asm volatile(LOCK_PREFIX "addq %1,%0"
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+ return __READ_ONCE((v)->counter);
+}
+
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
+{
+ __WRITE_ONCE(v->counter, i);
+}
+
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
+{
+ asm_inline volatile(LOCK_PREFIX "addq %1, %0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "subq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subq %1, %0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "incq %0"
+ asm_inline volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "decq %0"
+ asm_inline volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
-/**
- * arch_atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
#define arch_atomic64_add_negative arch_atomic64_add_negative
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
+#define arch_atomic64_add_return arch_atomic64_add_return
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
-{
- return arch_atomic64_add_return(-i, v);
-}
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
-{
- return xadd(&v->counter, -i);
-}
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg(&v->counter, old, new);
}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
- return try_cmpxchg(&v->counter, old, new);
+ return arch_try_cmpxchg(&v->counter, old, new);
}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
}
+#define arch_atomic64_xchg arch_atomic64_xchg
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "andq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
}
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -207,16 +124,17 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
} while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
return val;
}
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "orq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
}
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -224,16 +142,17 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
} while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
return val;
}
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "xorq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
}
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -241,5 +160,6 @@ static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
} while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
return val;
}
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
new file mode 100644
index 000000000000..fa918f01333e
--- /dev/null
+++ b/arch/x86/include/asm/audit.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AUDIT_H
+#define _ASM_X86_AUDIT_H
+
+int ia32_classify_syscall(unsigned int syscall);
+
+extern unsigned ia32_dir_class[];
+extern unsigned ia32_write_class[];
+extern unsigned ia32_read_class[];
+extern unsigned ia32_chattr_class[];
+extern unsigned ia32_signal_class[];
+
+
+#endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..db70832232d4 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -12,16 +12,16 @@
*/
#ifdef CONFIG_X86_32
-#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
+#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
+#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
+#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#else
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
-#define wmb() asm volatile("sfence" ::: "memory")
+#define __mb() asm volatile("mfence":::"memory")
+#define __rmb() asm volatile("lfence":::"memory")
+#define __wmb() asm volatile("sfence" ::: "memory")
#endif
/**
@@ -33,33 +33,25 @@
* Returns:
* 0 - (index < size)
*/
-static inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
-{
- unsigned long mask;
-
- asm volatile ("cmp %1,%2; sbb %0,%0;"
- :"=r" (mask)
- :"g"(size),"r" (index)
- :"cc");
- return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({ \
+ typeof((idx)+(sz)) __idx = (idx); \
+ typeof(__idx) __sz = (sz); \
+ unsigned long __mask; \
+ asm volatile ("cmp %1,%2; sbb %0,%0" \
+ :"=r" (__mask) \
+ :ASM_INPUT_G (__sz), \
+ "r" (__idx) \
+ :"cc"); \
+ __mask; })
/* Prevent speculative execution past this barrier. */
-#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
- "lfence", X86_FEATURE_LFENCE_RDTSC)
+#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
-#define dma_rmb() barrier()
-#define dma_wmb() barrier()
+#define __dma_rmb() barrier()
+#define __dma_wmb() barrier()
+
+#define __smp_mb() asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
-#ifdef CONFIG_X86_32
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
-#else
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
-#endif
#define __smp_rmb() dma_rmb()
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
@@ -80,8 +72,11 @@ do { \
})
/* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic() barrier()
-#define __smp_mb__after_atomic() barrier()
+#define __smp_mb__before_atomic() do { } while (0)
+#define __smp_mb__after_atomic() do { } while (0)
+
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm() do { } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ad7b210aa3f6..c2ce213f2b9b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,245 +36,130 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
- versions. */
-#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
-#else
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
-#endif
+#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
-#define ADDR BITOP_ADDR(addr)
+#define ADDR RLONG_ADDR(addr)
/*
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
-#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
static __always_inline void
-set_bit(long nr, volatile unsigned long *addr)
+arch_set_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
- asm volatile(LOCK_PREFIX "orb %1,%0"
+ if (__builtin_constant_p(nr)) {
+ asm_inline volatile(LOCK_PREFIX "orb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" ((u8)CONST_MASK(nr))
+ : "iq" (CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
- : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
static __always_inline void
-clear_bit(long nr, volatile unsigned long *addr)
+arch_clear_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
- asm volatile(LOCK_PREFIX "andb %1,%0"
+ if (__builtin_constant_p(nr)) {
+ asm_inline volatile(LOCK_PREFIX "andb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" ((u8)~CONST_MASK(nr)));
+ : "iq" (~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
- clear_bit(nr, addr);
+ arch_clear_bit(nr, addr);
}
-static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "andb %2,%1"
- CC_SET(s)
- : CC_OUT(s) (negative), ADDR
- : "ir" ((char) ~(1 << nr)) : "memory");
+ asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
+ : "=@ccs" (negative), WBYTE_ADDR(addr)
+ : "iq" ((char)mask) : "memory");
return negative;
}
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
-// Let everybody know we have it
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- barrier();
- __clear_bit(nr, addr);
+ arch___clear_bit(nr, addr);
}
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __always_inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_change_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
- asm volatile(LOCK_PREFIX "xorb %1,%0"
+ if (__builtin_constant_p(nr)) {
+ asm_inline volatile(LOCK_PREFIX "xorb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" ((u8)CONST_MASK(nr)));
+ : "iq" (CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_set_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
static __always_inline bool
-test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
- return test_and_set_bit(nr, addr);
+ return arch_test_and_set_bit(nr, addr);
}
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
asm(__ASM_SIZE(bts) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : "=@ccc" (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- *
+/*
* Note: the operation is performed atomically with respect to
* the local CPU, but not other CPUs. Portable code should not
* rely on this behaviour.
@@ -282,39 +167,31 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
asm volatile(__ASM_SIZE(btr) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : "=@ccc" (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
-/* WARNING: non atomic and it can be reordered! */
-static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
asm volatile(__ASM_SIZE(btc) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr) : "memory");
+ : "=@ccc" (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_change_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
@@ -325,31 +202,51 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
+static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("testb %2,%1"
+ : "=@ccnz" (oldbit)
+ : "m" (((unsigned char *)addr)[nr >> 3]),
+ "i" (1 << (nr & 7))
+ :"memory");
+
+ return oldbit;
+}
+
static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
{
bool oldbit;
asm volatile(__ASM_SIZE(bt) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
- : "m" (*(unsigned long *)addr), "Ir" (nr));
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
}
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static bool test_bit(int nr, const volatile unsigned long *addr);
-#endif
+static __always_inline bool
+arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
+{
+ return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
+ variable_test_bit(nr, addr);
+}
-#define test_bit(nr, addr) \
- (__builtin_constant_p((nr)) \
- ? constant_test_bit((nr), (addr)) \
- : variable_test_bit((nr), (addr)))
+static __always_inline bool
+arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
+ variable_test_bit(nr, addr);
+}
+
+static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word)
+{
+ asm("tzcnt %1,%0"
+ : "=r" (word)
+ : ASM_INPUT_RM (word));
+ return word;
+}
/**
* __ffs - find first set bit in word
@@ -357,12 +254,14 @@ static bool test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __ffs(unsigned long word)
+#define __ffs(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)__builtin_ctzl(word) : \
+ variable__ffs(word))
+
+static __always_inline __attribute_const__ unsigned long variable_ffz(unsigned long word)
{
- asm("rep; bsf %1,%0"
- : "=r" (word)
- : "rm" (word));
- return word;
+ return variable__ffs(~word);
}
/**
@@ -371,13 +270,10 @@ static __always_inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
-static __always_inline unsigned long ffz(unsigned long word)
-{
- asm("rep; bsf %1,%0"
- : "=r" (word)
- : "r" (~word));
- return word;
-}
+#define ffz(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)__builtin_ctzl(~word) : \
+ variable_ffz(word))
/*
* __fls: find last set bit in word
@@ -385,29 +281,21 @@ static __always_inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned long __fls(unsigned long word)
{
+ if (__builtin_constant_p(word))
+ return BITS_PER_LONG - 1 - __builtin_clzl(word);
+
asm("bsr %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
#undef ADDR
#ifdef __KERNEL__
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static __always_inline int ffs(int x)
+static __always_inline __attribute_const__ int variable_ffs(int x)
{
int r;
@@ -423,7 +311,7 @@ static __always_inline int ffs(int x)
*/
asm("bsfl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
@@ -438,6 +326,19 @@ static __always_inline int ffs(int x)
}
/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+/**
* fls - find last set bit in word
* @x: the word to search
*
@@ -448,10 +349,13 @@ static __always_inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static __always_inline int fls(unsigned int x)
+static __always_inline __attribute_const__ int fls(unsigned int x)
{
int r;
+ if (__builtin_constant_p(x))
+ return x ? 32 - __builtin_clz(x) : 0;
+
#ifdef CONFIG_X86_64
/*
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
@@ -464,7 +368,7 @@ static __always_inline int fls(unsigned int x)
*/
asm("bsrl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
@@ -490,9 +394,12 @@ static __always_inline int fls(unsigned int x)
* at position 64.
*/
#ifdef CONFIG_X86_64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
int bitpos = -1;
+
+ if (__builtin_constant_p(x))
+ return x ? 64 - __builtin_clzll(x) : 0;
/*
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
@@ -500,21 +407,23 @@ static __always_inline int fls64(__u64 x)
*/
asm("bsrq %1,%q0"
: "+r" (bitpos)
- : "rm" (x));
+ : ASM_INPUT_RM (x));
return bitpos + 1;
}
#else
#include <asm-generic/bitops/fls64.h>
#endif
-#include <asm-generic/bitops/find.h>
-
#include <asm-generic/bitops/sched.h>
#include <asm/arch_hweight.h>
#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 680c320363db..f7b67cb73915 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -6,11 +6,6 @@
#include <asm/pgtable_types.h>
#include <uapi/asm/boot.h>
-/* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
- + (CONFIG_PHYSICAL_ALIGN - 1)) \
- & ~(CONFIG_PHYSICAL_ALIGN - 1))
-
/* Minimum kernel alignment, as a power of two */
#ifdef CONFIG_X86_64
# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
@@ -24,35 +19,82 @@
# error "Invalid value for CONFIG_PHYSICAL_ALIGN"
#endif
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
# define BOOT_HEAP_SIZE 0x400000
-#else /* !CONFIG_KERNEL_BZIP2 */
+#elif defined(CONFIG_KERNEL_ZSTD)
+/*
+ * Zstd needs to allocate the ZSTD_DCtx in order to decompress the kernel.
+ * The ZSTD_DCtx is ~160KB, so set the heap size to 192KB because it is a
+ * round number and to allow some slack.
+ */
+# define BOOT_HEAP_SIZE 0x30000
+#else
# define BOOT_HEAP_SIZE 0x10000
#endif
#ifdef CONFIG_X86_64
# define BOOT_STACK_SIZE 0x4000
+/*
+ * Used by decompressor's startup_32() to allocate page tables for identity
+ * mapping of the 4G of RAM in 4-level paging mode:
+ * - 1 level4 table;
+ * - 1 level3 table;
+ * - 4 level2 table that maps everything with 2M pages;
+ *
+ * The additional level5 table needed for 5-level paging is allocated from
+ * trampoline_32bit memory.
+ */
# define BOOT_INIT_PGT_SIZE (6*4096)
-# ifdef CONFIG_RANDOMIZE_BASE
+
/*
- * Assuming all cross the 512GB boundary:
- * 1 page for level4
- * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
- * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
- * Total is 19 pages.
+ * Total number of page tables kernel_add_identity_map() can allocate,
+ * including page tables consumed by startup_32().
+ *
+ * Worst-case scenario:
+ * - 5-level paging needs 1 level5 table;
+ * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel,
+ * assuming all of them cross 256T boundary:
+ * + 4*2 level4 table;
+ * + 4*2 level3 table;
+ * + 4*2 level2 table;
+ * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM):
+ * + 1 level4 table;
+ * + 1 level3 table;
+ * + 1 level2 table;
+ * Total: 28 tables
+ *
+ * Add 4 spare table in case decompressor touches anything beyond what is
+ * accounted above. Warn if it happens.
*/
-# ifdef CONFIG_X86_VERBOSE_BOOTUP
-# define BOOT_PGT_SIZE (19*4096)
-# else /* !CONFIG_X86_VERBOSE_BOOTUP */
-# define BOOT_PGT_SIZE (17*4096)
-# endif
-# else /* !CONFIG_RANDOMIZE_BASE */
-# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE
-# endif
+# define BOOT_PGT_SIZE_WARN (28*4096)
+# define BOOT_PGT_SIZE (32*4096)
#else /* !CONFIG_X86_64 */
# define BOOT_STACK_SIZE 0x1000
#endif
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
+
+#ifndef __ASSEMBLER__
+extern unsigned int output_len;
+extern const unsigned long kernel_text_size;
+extern const unsigned long kernel_inittext_offset;
+extern const unsigned long kernel_inittext_size;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ void (*error)(char *x));
+
+extern struct boot_params *boot_params_ptr;
+extern unsigned long *trampoline_32bit;
+extern const u16 trampoline_ljmp_imm_offset;
+
+void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
+#endif
+
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index f6f6ef436599..d90ae472fb76 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -18,13 +18,27 @@
* Note: efi_info is commonly left uninitialized, but that field has a
* private magic, so it is better to leave it unchanged.
*/
+
+#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
+
+#define BOOT_PARAM_PRESERVE(struct_member) \
+ { \
+ .start = offsetof(struct boot_params, struct_member), \
+ .len = sizeof_mbr(struct boot_params, struct_member), \
+ }
+
+struct boot_params_to_save {
+ unsigned int start;
+ unsigned int len;
+};
+
static void sanitize_boot_params(struct boot_params *boot_params)
{
/*
* IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
* this field. The purpose of this field is to guarantee
* compliance with the x86 boot spec located in
- * Documentation/x86/boot.txt . That spec says that the
+ * Documentation/arch/x86/boot.rst . That spec says that the
* *whole* structure should be cleared, after which only the
* portion defined by struct setup_header (boot_params->hdr)
* should be copied in.
@@ -35,21 +49,42 @@ static void sanitize_boot_params(struct boot_params *boot_params)
* problems again.
*/
if (boot_params->sentinel) {
- /* fields in boot_params are left uninitialized, clear them */
- boot_params->acpi_rsdp_addr = 0;
- memset(&boot_params->ext_ramdisk_image, 0,
- (char *)&boot_params->efi_info -
- (char *)&boot_params->ext_ramdisk_image);
- memset(&boot_params->kbd_status, 0,
- (char *)&boot_params->hdr -
- (char *)&boot_params->kbd_status);
- memset(&boot_params->_pad7[0], 0,
- (char *)&boot_params->edd_mbr_sig_buffer[0] -
- (char *)&boot_params->_pad7[0]);
- memset(&boot_params->_pad8[0], 0,
- (char *)&boot_params->eddbuf[0] -
- (char *)&boot_params->_pad8[0]);
- memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+ static struct boot_params scratch;
+ char *bp_base = (char *)boot_params;
+ char *save_base = (char *)&scratch;
+ int i;
+
+ const struct boot_params_to_save to_save[] = {
+ BOOT_PARAM_PRESERVE(screen_info),
+ BOOT_PARAM_PRESERVE(apm_bios_info),
+ BOOT_PARAM_PRESERVE(tboot_addr),
+ BOOT_PARAM_PRESERVE(ist_info),
+ BOOT_PARAM_PRESERVE(hd0_info),
+ BOOT_PARAM_PRESERVE(hd1_info),
+ BOOT_PARAM_PRESERVE(sys_desc_table),
+ BOOT_PARAM_PRESERVE(olpc_ofw_header),
+ BOOT_PARAM_PRESERVE(efi_info),
+ BOOT_PARAM_PRESERVE(alt_mem_k),
+ BOOT_PARAM_PRESERVE(scratch),
+ BOOT_PARAM_PRESERVE(e820_entries),
+ BOOT_PARAM_PRESERVE(eddbuf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+ BOOT_PARAM_PRESERVE(secure_boot),
+ BOOT_PARAM_PRESERVE(hdr),
+ BOOT_PARAM_PRESERVE(e820_table),
+ BOOT_PARAM_PRESERVE(eddbuf),
+ BOOT_PARAM_PRESERVE(cc_blob_address),
+ };
+
+ memset(&scratch, 0, sizeof(scratch));
+
+ for (i = 0; i < ARRAY_SIZE(to_save); i++) {
+ memcpy(save_base + to_save[i].start,
+ bp_base + to_save[i].start, to_save[i].len);
+ }
+
+ memcpy(boot_params, save_base, sizeof(*boot_params));
}
}
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 6804d6642767..d561a8443c13 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -3,83 +3,193 @@
#define _ASM_X86_BUG_H
#include <linux/stringify.h>
+#include <linux/instrumentation.h>
+#include <linux/objtool.h>
+#include <asm/asm.h>
+
+#ifndef __ASSEMBLY__
+struct bug_entry;
+extern void __WARN_trap(struct bug_entry *bug, ...);
+#endif
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
- *
- * Since various instruction decoders/specs disagree on the encoding of
- * UD0/UD1.
*/
-
-#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
-#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
-#define ASM_UD2 ".byte 0x0f, 0x0b"
-
-#define INSN_UD0 0xff0f
+#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
#define INSN_UD2 0x0b0f
-
#define LEN_UD2 2
+#define ASM_UDB _ASM_BYTES(0xd6)
+#define INSN_UDB 0xd6
+#define LEN_UDB 1
+
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_ASOP 0x67
+#define INSN_LOCK 0xf0
+#define OPCODE_ESCAPE 0x0f
+#define SECOND_BYTE_OPCODE_UD1 0xb9
+#define SECOND_BYTE_OPCODE_UD2 0x0b
+
+#define BUG_NONE 0xffff
+#define BUG_UD2 0xfffe
+#define BUG_UD1 0xfffd
+#define BUG_UD1_UBSAN 0xfffc
+#define BUG_UD1_WARN 0xfffb
+#define BUG_UDB 0xffd6
+#define BUG_LOCK 0xfff0
+
#ifdef CONFIG_GENERIC_BUG
-#ifdef CONFIG_X86_32
-# define __BUG_REL(val) ".long " __stringify(val)
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __BUG_ENTRY_VERBOSE(file, line) \
+ "\t.long " file " - .\t# bug_entry::file\n" \
+ "\t.word " line "\t# bug_entry::line\n"
#else
-# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
+#define __BUG_ENTRY_VERBOSE(file, line)
#endif
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+#if defined(CONFIG_X86_64) || defined(CONFIG_DEBUG_BUGVERBOSE_DETAILED)
+#define HAVE_ARCH_BUG_FORMAT
+#define __BUG_ENTRY_FORMAT(format) \
+ "\t.long " format " - .\t# bug_entry::format\n"
+#else
+#define __BUG_ENTRY_FORMAT(format)
+#endif
-#define _BUG_FLAGS(ins, flags) \
-do { \
- asm volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
- "\t.word %c1" "\t# bug_entry::line\n" \
- "\t.word %c2" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c3\n" \
- ".popsection" \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
+#ifdef CONFIG_X86_64
+#define HAVE_ARCH_BUG_FORMAT_ARGS
+#endif
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
+#define __BUG_ENTRY(format, file, line, flags) \
+ "\t.long 1b - ." "\t# bug_entry::bug_addr\n" \
+ __BUG_ENTRY_FORMAT(format) \
+ __BUG_ENTRY_VERBOSE(file, line) \
+ "\t.word " flags "\t# bug_entry::flags\n"
+
+#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \
+ ".pushsection __bug_table,\"aw\"\n\t" \
+ ANNOTATE_DATA_SPECIAL "\n\t" \
+ "2:\n\t" \
+ __BUG_ENTRY(format, file, line, flags) \
+ "\t.org 2b + " size "\n" \
+ ".popsection\n" \
+ extra
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED
+#define WARN_CONDITION_STR(cond_str) cond_str
+#else
+#define WARN_CONDITION_STR(cond_str) ""
+#endif
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(cond_str, ins, flags, extra) \
do { \
- asm volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t.word %c0" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c1\n" \
- ".popsection" \
- : : "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
+ asm_inline volatile("1:\t" ins "\n" \
+ _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
+ "%c[line]", "%c[fl]", \
+ "%c[size]", extra) \
+ : : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \
+ [file] "i" (__FILE__), \
+ [line] "i" (__LINE__), \
+ [fl] "i" (flags), \
+ [size] "i" (sizeof(struct bug_entry))); \
} while (0)
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
+#define ARCH_WARN_ASM(file, line, flags, size) \
+ ".pushsection .rodata.str1.1, \"aMS\", @progbits, 1\n" \
+ "99:\n" \
+ "\t.string \"\"\n" \
+ ".popsection\n" \
+ "1:\t " ASM_UD2 "\n" \
+ _BUG_FLAGS_ASM("99b", file, line, flags, size, "")
#else
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+#define _BUG_FLAGS(cond_str, ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
#define HAVE_ARCH_BUG
#define BUG() \
do { \
- _BUG_FLAGS(ASM_UD2, 0); \
- unreachable(); \
+ instrumentation_begin(); \
+ _BUG_FLAGS("", ASM_UD2, 0, ""); \
+ __builtin_unreachable(); \
} while (0)
-#define __WARN_FLAGS(flags) \
-do { \
- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
- annotate_reachable(); \
+/*
+ * This instrumentation_begin() is strictly speaking incorrect; but it
+ * suppresses the complaints from WARN()s in noinstr code. If such a WARN()
+ * were to trigger, we'd rather wreck the machine in an attempt to get the
+ * message out than not know about it.
+ */
+
+#define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b)
+
+#define __WARN_FLAGS(cond_str, flags) \
+do { \
+ auto __flags = BUGFLAG_WARNING|(flags); \
+ instrumentation_begin(); \
+ _BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE); \
+ instrumentation_end(); \
+} while (0)
+
+#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
+
+#ifndef __ASSEMBLY__
+#include <linux/static_call_types.h>
+DECLARE_STATIC_CALL(WARN_trap, __WARN_trap);
+
+struct pt_regs;
+struct sysv_va_list { /* from AMD64 System V ABI */
+ unsigned int gp_offset;
+ unsigned int fp_offset;
+ void *overflow_arg_area;
+ void *reg_save_area;
+};
+struct arch_va_list {
+ unsigned long regs[6];
+ struct sysv_va_list args;
+};
+extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
+#endif /* __ASSEMBLY__ */
+
+#define __WARN_bug_entry(flags, format) ({ \
+ struct bug_entry *bug; \
+ asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \
+ _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
+ "%c[line]", "%c[fl]", \
+ "%c[size]", "") \
+ : [addr] "=r" (bug) \
+ : [fmt] "i" (format), \
+ [file] "i" (__FILE__), \
+ [line] "i" (__LINE__), \
+ [fl] "i" (flags), \
+ [size] "i" (sizeof(struct bug_entry))); \
+ bug; })
+
+#define __WARN_print_arg(flags, format, arg...) \
+do { \
+ int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \
+ static_call_mod(WARN_trap)(__WARN_bug_entry(__flags, format), ## arg); \
+ asm (""); /* inhibit tail-call optimization */ \
} while (0)
+#define __WARN_printf(taint, fmt, arg...) \
+ __WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg)
+
+#define WARN_ONCE(cond, format, arg...) ({ \
+ int __ret_warn_on = !!(cond); \
+ if (unlikely(__ret_warn_on)) { \
+ __WARN_print_arg(BUGFLAG_ONCE|BUGFLAG_TAINT(TAINT_WARN),\
+ format, ## arg); \
+ } \
+ __ret_warn_on; \
+})
+
+#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */
+
#include <asm-generic/bug.h>
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..f25ca2d709d4 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -4,18 +4,12 @@
#include <asm/processor.h>
-extern void check_bugs(void);
-
-#if defined(CONFIG_CPU_SUP_INTEL)
-void check_mpx_erratum(struct cpuinfo_x86 *c);
-#else
-static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
-#endif
-
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
static inline int ppro_with_ram_bug(void) { return 0; }
#endif
+extern void cpu_bugs_smt_update(void);
+
#endif /* _ASM_X86_BUGS_H */
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index abe08690a887..69404eae9983 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -8,7 +8,7 @@
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(".data..read_mostly")
#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 63feaf2a5f93..b192d917a6d0 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_CACHEFLUSH_H
#define _ASM_X86_CACHEFLUSH_H
+#include <linux/mm.h>
+
/* Caches aren't brain-dead on the intel. */
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
index 86b63c7feab7..5aa061199866 100644
--- a/arch/x86/include/asm/cacheinfo.h
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -2,7 +2,17 @@
#ifndef _ASM_X86_CACHEINFO_H
#define _ASM_X86_CACHEINFO_H
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
-void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+/* Kernel controls MTRR and/or PAT MSRs. */
+extern unsigned int memory_caching_control;
+#define CACHE_MTRR 0x01
+#define CACHE_PAT 0x02
+
+void cache_disable(void);
+void cache_enable(void);
+void set_cache_aps_delayed_init(bool val);
+bool get_cache_aps_delayed_init(void);
+void cache_bp_init(void);
+void cache_bp_restore(void);
+void cache_aps_init(void);
#endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
deleted file mode 100644
index a8303ebe089f..000000000000
--- a/arch/x86/include/asm/calgary.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Derived from include/asm-powerpc/iommu.h
- *
- * Copyright IBM Corporation, 2006-2007
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _ASM_X86_CALGARY_H
-#define _ASM_X86_CALGARY_H
-
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/timer.h>
-#include <asm/types.h>
-
-struct iommu_table {
- const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
- unsigned long it_base; /* mapped address of tce table */
- unsigned long it_hint; /* Hint for next alloc */
- unsigned long *it_map; /* A simple allocation bitmap for now */
- void __iomem *bbar; /* Bridge BAR */
- u64 tar_val; /* Table Address Register */
- struct timer_list watchdog_timer;
- spinlock_t it_lock; /* Protects it_map */
- unsigned int it_size; /* Size of iommu table in entries */
- unsigned char it_busno; /* Bus number this table belongs to */
-};
-
-struct cal_chipset_ops {
- void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev);
- void (*tce_cache_blast)(struct iommu_table *tbl);
- void (*dump_error_regs)(struct iommu_table *tbl);
-};
-
-#define TCE_TABLE_SIZE_UNSPECIFIED ~0
-#define TCE_TABLE_SIZE_64K 0
-#define TCE_TABLE_SIZE_128K 1
-#define TCE_TABLE_SIZE_256K 2
-#define TCE_TABLE_SIZE_512K 3
-#define TCE_TABLE_SIZE_1M 4
-#define TCE_TABLE_SIZE_2M 5
-#define TCE_TABLE_SIZE_4M 6
-#define TCE_TABLE_SIZE_8M 7
-
-extern int use_calgary;
-
-#ifdef CONFIG_CALGARY_IOMMU
-extern int detect_calgary(void);
-#else
-static inline int detect_calgary(void) { return -ENODEV; }
-#endif
-
-#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
index 2930f560d7f3..e1f965bb1e31 100644
--- a/arch/x86/include/asm/ce4100.h
+++ b/arch/x86/include/asm/ce4100.h
@@ -4,4 +4,10 @@
int ce4100_pci_init(void);
+#ifdef CONFIG_SERIAL_8250
+void __init sdv_serial_fixup(void);
+#else
+static inline void sdv_serial_fixup(void) {};
+#endif
+
#endif
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
new file mode 100644
index 000000000000..c40b9ebc1fb4
--- /dev/null
+++ b/arch/x86/include/asm/cfi.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CFI_H
+#define _ASM_X86_CFI_H
+
+/*
+ * Clang Control Flow Integrity (CFI) support.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#include <linux/bug.h>
+#include <asm/ibt.h>
+
+/*
+ * An overview of the various calling conventions...
+ *
+ * Traditional:
+ *
+ * foo:
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * IBT:
+ *
+ * foo:
+ * endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * kCFI:
+ *
+ * __cfi_foo:
+ * movl $0x12345678, %eax
+ * # 11 nops when CONFIG_CALL_PADDING
+ * foo:
+ * endbr64 # when IBT
+ * ... code here ...
+ * ret
+ *
+ * direct call:
+ * call foo # / call foo+4 when IBT
+ *
+ * indirect call:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $(-0x12345678), %r10d
+ * addl -4(%r11), %r10d # -15 when CONFIG_CALL_PADDING
+ * jz 1f
+ * ud2
+ * 1:call *%r11
+ *
+ *
+ * FineIBT (builds as kCFI + CALL_PADDING + IBT + RETPOLINE and runtime patches into):
+ *
+ * __cfi_foo:
+ * endbr64
+ * subl 0x12345678, %eax
+ * jne.32,pn foo+3
+ * foo:
+ * nopl -42(%rax) # was endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $0x12345678, %eax
+ * lea -0x10(%r11), %r11
+ * nop5
+ * call *%r11
+ *
+ */
+enum cfi_mode {
+ CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */
+ CFI_OFF, /* Taditional / IBT depending on .config */
+ CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */
+ CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */
+};
+
+extern enum cfi_mode cfi_mode;
+
+#ifdef CONFIG_FINEIBT_BHI
+extern bool cfi_bhi;
+#else
+#define cfi_bhi (0)
+#endif
+
+typedef u8 bhi_thunk[32];
+extern bhi_thunk __bhi_args[];
+extern bhi_thunk __bhi_args_end[];
+
+struct pt_regs;
+
+#ifdef CONFIG_CFI
+enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+
+static inline int cfi_get_offset(void)
+{
+ switch (cfi_mode) {
+ case CFI_FINEIBT:
+ return 16;
+ case CFI_KCFI:
+ if (IS_ENABLED(CONFIG_CALL_PADDING))
+ return 16;
+ return 5;
+ default:
+ return 0;
+ }
+}
+#define cfi_get_offset cfi_get_offset
+
+extern u32 cfi_get_func_hash(void *func);
+#define cfi_get_func_hash cfi_get_func_hash
+
+extern int cfi_get_func_arity(void *func);
+
+#ifdef CONFIG_FINEIBT
+extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+#else
+static inline bool
+decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ return false;
+}
+
+#endif
+
+#else
+static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+{
+ return BUG_TRAP_TYPE_NONE;
+}
+static inline int cfi_get_func_arity(void *func)
+{
+ return 0;
+}
+#endif /* CONFIG_CFI */
+
+#if HAS_KERNEL_IBT == 1
+#define CFI_NOSEAL(x) asm(IBT_NOSEAL(__stringify(x)))
+#endif
+
+#endif /* _ASM_X86_CFI_H */
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index d79d1e622dcf..6df6ece8a28e 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,6 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/checksum_32.h>
+#ifdef CONFIG_GENERIC_CSUM
+# include <asm-generic/checksum.h>
#else
-# include <asm/checksum_64.h>
+# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+# define HAVE_CSUM_COPY_USER
+# define _HAVE_ARCH_CSUM_AND_COPY
+# ifdef CONFIG_X86_32
+# include <asm/checksum_32.h>
+# else
+# include <asm/checksum_64.h>
+# endif
#endif
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index f57b94e02c57..17da95387997 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -27,9 +27,7 @@ asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum);
* better 64-bit) boundary
*/
-asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
- int len, __wsum sum,
- int *src_err_ptr, int *dst_err_ptr);
+asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
/*
* Note: when you get a NULL pointer exception here this means someone
@@ -38,24 +36,21 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
* If you use these functions directly please don't forget the
* access_ok().
*/
-static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum)
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
- return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+ return csum_partial_copy_generic(src, dst, len);
}
-static inline __wsum csum_partial_copy_from_user(const void __user *src,
- void *dst,
- int len, __wsum sum,
- int *err_ptr)
+static inline __wsum csum_and_copy_from_user(const void __user *src,
+ void *dst, int len)
{
__wsum ret;
might_sleep();
- stac();
- ret = csum_partial_copy_generic((__force void *)src, dst,
- len, sum, err_ptr, NULL);
- clac();
+ if (!user_access_begin(src, len))
+ return 0;
+ ret = csum_partial_copy_generic((__force void *)src, dst, len);
+ user_access_end();
return ret;
}
@@ -173,27 +168,19 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
/*
* Copy and checksum to user
*/
-#define HAVE_CSUM_COPY_USER
static inline __wsum csum_and_copy_to_user(const void *src,
void __user *dst,
- int len, __wsum sum,
- int *err_ptr)
+ int len)
{
__wsum ret;
might_sleep();
- if (access_ok(dst, len)) {
- stac();
- ret = csum_partial_copy_generic(src, (__force void *)dst,
- len, sum, NULL, err_ptr);
- clac();
- return ret;
- }
-
- if (len)
- *err_ptr = -EFAULT;
-
- return (__force __wsum)-1; /* invalid checksum */
+ if (!user_access_begin(dst, len))
+ return 0;
+
+ ret = csum_partial_copy_generic(src, (__force void *)dst, len);
+ user_access_end();
+ return ret;
}
#endif /* _ASM_X86_CHECKSUM_32_H */
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 3ec6d3267cf9..4d4a47a3a8ab 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -9,7 +9,6 @@
*/
#include <linux/compiler.h>
-#include <linux/uaccess.h>
#include <asm/byteorder.h>
/**
@@ -129,26 +128,12 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER 1
-
-
/* Do not call this directly. Use the wrappers below */
-extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
- int len, __wsum sum,
- int *src_err_ptr, int *dst_err_ptr);
-
-
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst,
- int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum);
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
-/* Old names. To be removed. */
-#define csum_and_copy_to_user csum_partial_copy_to_user
-#define csum_and_copy_from_user csum_partial_copy_from_user
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
/**
* ip_compute_csum - Compute an 16bit IP checksum.
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index dc4cfc888d6d..dc9dc7b3911a 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -4,14 +4,18 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
-#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
-#define VCLOCK_MAX 3
+#include <asm/vdso/clocksource.h>
-struct arch_clocksource_data {
- int vclock_mode;
-};
+extern unsigned int vclocks_used;
+
+static inline bool vclock_was_used(int vclock)
+{
+ return READ_ONCE(vclocks_used) & (1U << vclock);
+}
+
+static inline void vclocks_set_used(unsigned int which)
+{
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which));
+}
#endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index 6faaf27e8899..6cbd9ae58b21 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,6 +2,10 @@
#ifndef _ASM_X86_CMDLINE_H
#define _ASM_X86_CMDLINE_H
+#include <asm/setup.h>
+
+extern char builtin_cmdline[COMMAND_LINE_SIZE];
+
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
int cmdline_find_option(const char *cmdline_ptr, const char *option,
char *buffer, int bufsize);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index a8bfac131256..a88b06f1c35e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -22,7 +22,7 @@ extern void __add_wrong_size(void)
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
- * case statements guaranteeed dead code which the compiler will
+ * case statements guaranteed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
@@ -44,22 +44,22 @@ extern void __add_wrong_size(void)
__typeof__ (*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
- asm volatile (lock #op "b %b0, %1\n" \
+ asm_inline volatile (lock #op "b %b0, %1" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
- asm volatile (lock #op "w %w0, %1\n" \
+ asm_inline volatile (lock #op "w %w0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
- asm volatile (lock #op "l %0, %1\n" \
+ asm_inline volatile (lock #op "l %0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
- asm volatile (lock #op "q %q0, %1\n" \
+ asm_inline volatile (lock #op "q %q0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
@@ -91,7 +91,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
- asm volatile(lock "cmpxchgb %2,%1" \
+ asm_inline volatile(lock "cmpxchgb %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
@@ -100,7 +100,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
- asm volatile(lock "cmpxchgw %2,%1" \
+ asm_inline volatile(lock "cmpxchgw %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -109,7 +109,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
- asm volatile(lock "cmpxchgl %2,%1" \
+ asm_inline volatile(lock "cmpxchgl %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -118,7 +118,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
- asm volatile(lock "cmpxchgq %2,%1" \
+ asm_inline volatile(lock "cmpxchgq %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -134,7 +134,7 @@ extern void __add_wrong_size(void)
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define __sync_cmpxchg(ptr, old, new, size) \
- __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+ __raw_cmpxchg((ptr), (old), (new), (size), "lock ")
#define __cmpxchg_local(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "")
@@ -165,9 +165,8 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
- asm volatile(lock "cmpxchgb %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
@@ -177,9 +176,8 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
- asm volatile(lock "cmpxchgw %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -189,9 +187,8 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
- asm volatile(lock "cmpxchgl %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -201,9 +198,8 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
- asm volatile(lock "cmpxchgq %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -221,9 +217,21 @@ extern void __add_wrong_size(void)
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
-#define try_cmpxchg(ptr, pold, new) \
+#define __sync_try_cmpxchg(ptr, pold, new, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ")
+
+#define __try_cmpxchg_local(ptr, pold, new, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "")
+
+#define arch_try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define arch_sync_try_cmpxchg(ptr, pold, new) \
+ __sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
+#define arch_try_cmpxchg_local(ptr, pold, new) \
+ __try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
+
/*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
* value of "*ptr".
@@ -233,29 +241,4 @@ extern void __add_wrong_size(void)
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
- __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
- VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
- asm volatile(pfx "cmpxchg%c5b %1" \
- CC_SET(e) \
- : CC_OUT(e) (__ret), \
- "+m" (*(p1)), "+m" (*(p2)), \
- "+a" (__old1), "+d" (__old2) \
- : "i" (2 * sizeof(long)), \
- "b" (__new1), "c" (__new2)); \
- __ret; \
-})
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
-
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 1a2eafca7038..1f80a62be969 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -3,113 +3,153 @@
#define _ASM_X86_CMPXCHG_32_H
/*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
+ * Note: if you use __cmpxchg64(), or their variants,
* you need to test for the feature in boot_cpu_data.
*/
-/*
- * CMPXCHG8B only writes to the target if we had the previous
- * value in registers, otherwise it acts as a read and gives us the
- * "new previous" value. That is why there is a loop. Preloading
- * EDX:EAX is a performance optimization: in the common case it means
- * we need only one locked operation.
- *
- * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
- * least an FPU save and/or %cr0.ts manipulation.
- *
- * cmpxchg8b must be used with the lock prefix here to allow the
- * instruction to be executed atomically. We need to have the reader
- * side to see the coherent 64bit value.
- */
-static inline void set_64bit(volatile u64 *ptr, u64 value)
+union __u64_halves {
+ u64 full;
+ struct {
+ u32 low, high;
+ };
+};
+
+#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+
+static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
- u32 low = value;
- u32 high = value >> 32;
- u64 prev = *ptr;
-
- asm volatile("\n1:\t"
- LOCK_PREFIX "cmpxchg8b %0\n\t"
- "jnz 1b"
- : "=m" (*ptr), "+A" (prev)
- : "b" (low), "c" (high)
- : "memory");
+ return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
}
-#ifdef CONFIG_X86_CMPXCHG64
-#define arch_cmpxchg64(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#define arch_cmpxchg64_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#endif
+static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64(ptr, old, new,);
+}
+
+#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
+ : "=@ccz" (ret), \
+ [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
-static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
- u64 prev;
- asm volatile(LOCK_PREFIX "cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
}
-static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
- u64 prev;
- asm volatile("cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_try_cmpxchg64(ptr, oldp, new,);
}
-#ifndef CONFIG_X86_CMPXCHG64
+#ifdef CONFIG_X86_CX8
+
+#define arch_cmpxchg64 __cmpxchg64
+
+#define arch_cmpxchg64_local __cmpxchg64_local
+
+#define arch_try_cmpxchg64 __try_cmpxchg64
+
+#define arch_try_cmpxchg64_local __try_cmpxchg64_local
+
+#else
+
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
-#define arch_cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io(LOCK_PREFIX_HERE \
- "call cmpxchg8b_emu", \
- "lock; cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
-
-
-#define arch_cmpxchg64_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io("call cmpxchg8b_emu", \
- "cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
+#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_cmpxchg64 arch_cmpxchg64
+
+static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, ,);
+}
+#define arch_cmpxchg64_local arch_cmpxchg64_local
+
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("=@ccz" (ret), \
+ "+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
+static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
+}
+#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
#endif
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
+#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8)
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..5afea056fb20 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -2,11 +2,6 @@
#ifndef _ASM_X86_CMPXCHG_64_H
#define _ASM_X86_CMPXCHG_64_H
-static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
- *ptr = val;
-}
-
#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
@@ -19,6 +14,82 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
arch_cmpxchg_local((ptr), (o), (n)); \
})
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg((ptr), (po), (n)); \
+})
+
+#define arch_try_cmpxchg64_local(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg_local((ptr), (po), (n)); \
+})
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX);
+}
+#define arch_cmpxchg128 arch_cmpxchg128
+
+static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new,);
+}
+#define arch_cmpxchg128_local arch_cmpxchg128_local
+
+#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
+ : "=@ccz" (ret), \
+ [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX);
+}
+#define arch_try_cmpxchg128 arch_try_cmpxchg128
+
+static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new,);
+}
+#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local
+
+#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 000000000000..e1dbf8df1b69
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+#include <asm/asm.h>
+#include <asm/types.h>
+
+enum cc_vendor {
+ CC_VENDOR_NONE,
+ CC_VENDOR_AMD,
+ CC_VENDOR_INTEL,
+};
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+extern enum cc_vendor cc_vendor;
+extern u64 cc_mask;
+
+static inline u64 cc_get_mask(void)
+{
+ return cc_mask;
+}
+
+static inline void cc_set_mask(u64 mask)
+{
+ cc_mask = mask;
+}
+
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+void cc_random_init(void);
+#else
+#define cc_vendor (CC_VENDOR_NONE)
+static inline u64 cc_get_mask(void)
+{
+ return 0;
+}
+
+static inline u64 cc_mkenc(u64 val)
+{
+ return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+ return val;
+}
+static inline void cc_random_init(void) { }
+#endif
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 22c4dfe65992..b1221da477b7 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,34 +12,35 @@
#include <asm/user32.h>
#include <asm/unistd.h>
-#include <asm-generic/compat.h>
-
-#define COMPAT_USER_HZ 100
-#define COMPAT_UTS_MACHINE "i686\0\0"
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+#define __compat_uid_t __compat_uid_t
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
+
+#define compat_dev_t compat_dev_t
typedef u16 compat_dev_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_statfs compat_statfs
+
+#include <asm-generic/compat.h>
+
+#define COMPAT_UTS_MACHINE "i686\0\0"
+
typedef u16 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
-typedef __kernel_fsid_t compat_fsid_t;
-typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u64 __attribute__((aligned(4))) compat_u64;
struct compat_stat {
- compat_dev_t st_dev;
- u16 __pad1;
+ u32 st_dev;
compat_ino_t st_ino;
compat_mode_t st_mode;
compat_nlink_t st_nlink;
__compat_uid_t st_uid;
__compat_gid_t st_gid;
- compat_dev_t st_rdev;
- u16 __pad2;
+ u32 st_rdev;
u32 st_size;
u32 st_blksize;
u32 st_blocks;
@@ -53,29 +54,11 @@ struct compat_stat {
u32 __unused5;
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
/*
- * IA32 uses 4 byte alignment for 64 bit quantities,
- * so we need to pack this structure.
+ * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the
+ * compat flock64 structure.
*/
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-} __attribute__((packed));
+#define __ARCH_NEED_COMPAT_FLOCK64_PACKED
struct compat_statfs {
int f_type;
@@ -92,122 +75,11 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-typedef u32 compat_old_sigset_t; /* at least 32 bits */
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-
-struct compat_ipc64_perm {
- compat_key_t key;
- __compat_uid32_t uid;
- __compat_gid32_t gid;
- __compat_uid32_t cuid;
- __compat_gid32_t cgid;
- unsigned short mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- compat_ulong_t unused1;
- compat_ulong_t unused2;
-};
-
-struct compat_semid64_ds {
- struct compat_ipc64_perm sem_perm;
- compat_ulong_t sem_otime;
- compat_ulong_t sem_otime_high;
- compat_ulong_t sem_ctime;
- compat_ulong_t sem_ctime_high;
- compat_ulong_t sem_nsems;
- compat_ulong_t __unused3;
- compat_ulong_t __unused4;
-};
-
-struct compat_msqid64_ds {
- struct compat_ipc64_perm msg_perm;
- compat_ulong_t msg_stime;
- compat_ulong_t msg_stime_high;
- compat_ulong_t msg_rtime;
- compat_ulong_t msg_rtime_high;
- compat_ulong_t msg_ctime;
- compat_ulong_t msg_ctime_high;
- compat_ulong_t msg_cbytes;
- compat_ulong_t msg_qnum;
- compat_ulong_t msg_qbytes;
- compat_pid_t msg_lspid;
- compat_pid_t msg_lrpid;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
-struct compat_shmid64_ds {
- struct compat_ipc64_perm shm_perm;
- compat_size_t shm_segsz;
- compat_ulong_t shm_atime;
- compat_ulong_t shm_atime_high;
- compat_ulong_t shm_dtime;
- compat_ulong_t shm_dtime_high;
- compat_ulong_t shm_ctime;
- compat_ulong_t shm_ctime_high;
- compat_pid_t shm_cpid;
- compat_pid_t shm_lpid;
- compat_ulong_t shm_nattch;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
-/*
- * The type of struct elf_prstatus.pr_reg in compatible core dumps.
- */
-typedef struct user_regs_struct compat_elf_gregset_t;
-
-/* Full regset -- prstatus on x32, otherwise on ia32 */
-#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
-#define SET_PR_FPVALID(S, V, R) \
- do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
- while (0)
-
#ifdef CONFIG_X86_X32_ABI
#define COMPAT_USE_64BIT_TIME \
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
#endif
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
- compat_uptr_t sp;
-
- if (test_thread_flag(TIF_IA32)) {
- sp = task_pt_regs(current)->sp;
- } else {
- /* -128 for the x32 ABI redzone */
- sp = task_pt_regs(current)->sp - 128;
- }
-
- return (void __user *)round_down(sp - len, 16);
-}
-
static inline bool in_x32_syscall(void)
{
#ifdef CONFIG_X86_X32_ABI
@@ -228,10 +100,15 @@ static inline bool in_compat_syscall(void)
return in_32bit_syscall();
}
#define in_compat_syscall in_compat_syscall /* override the generic impl */
+#define compat_need_64bit_alignment_fixup in_ia32_syscall
#endif
struct compat_siginfo;
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
- const kernel_siginfo_t *from, bool x32_ABI);
+
+#ifdef CONFIG_X86_X32_ABI
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+ const kernel_siginfo_t *from);
+#define copy_siginfo_to_user32 copy_siginfo_to_user32
+#endif /* CONFIG_X86_X32_ABI */
#endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index adc6cc86b062..ad235dda1ded 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,37 +7,64 @@
#include <linux/topology.h>
#include <linux/nodemask.h>
#include <linux/percpu.h>
+#include <asm/ibt.h>
-#ifdef CONFIG_SMP
-
-extern void prefill_possible_map(void);
-
-#else /* CONFIG_SMP */
-
-static inline void prefill_possible_map(void) {}
-
+#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define cpu_acpi_id(cpu) 0
-#define safe_smp_processor_id() 0
-
#endif /* CONFIG_SMP */
-struct x86_cpu {
- struct cpu cpu;
-};
-
#ifdef CONFIG_HOTPLUG_CPU
-extern int arch_register_cpu(int num);
-extern void arch_unregister_cpu(int);
-extern void start_cpu0(void);
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-extern int _debug_hotplug_cpu(int cpu, int action);
-#endif
+extern void soft_restart_cpu(void);
#endif
+extern void ap_init_aperfmperf(void);
+
int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
+extern void __init sld_setup(struct cpuinfo_x86 *c);
+extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+extern bool handle_guest_split_lock(unsigned long ip);
+extern void handle_bus_lock(struct pt_regs *regs);
+void split_lock_init(void);
+void bus_lock_init(void);
+#else
+static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
+static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ return false;
+}
+
+static inline bool handle_guest_split_lock(unsigned long ip)
+{
+ return false;
+}
+
+static inline void handle_bus_lock(struct pt_regs *regs) {}
+static inline void split_lock_init(void) {}
+static inline void bus_lock_init(void) {}
+#endif
+
+#ifdef CONFIG_IA32_FEAT_CTL
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
+#else
+static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
+#endif
+
+extern __noendbr void cet_disable(void);
+
+struct cpu_signature;
+
+void intel_collect_cpu_info(struct cpu_signature *sig);
+
+extern u64 x86_read_arch_cap_msr(void);
+bool intel_find_matching_signature(void *mc, struct cpu_signature *sig);
+int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
+
+extern struct cpumask cpus_stop_mask;
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index baeba0567126..6be777a06944 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -1,14 +1,207 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CPU_DEVICE_ID
-#define _CPU_DEVICE_ID 1
+#ifndef _ASM_X86_CPU_DEVICE_ID
+#define _ASM_X86_CPU_DEVICE_ID
+
+/*
+ * Can't use <linux/bitfield.h> because it generates expressions that
+ * cannot be used in structure initializers. Bitfield construction
+ * here must match the union in struct cpuinfo_86:
+ * union {
+ * struct {
+ * __u8 x86_model;
+ * __u8 x86;
+ * __u8 x86_vendor;
+ * __u8 x86_reserved;
+ * };
+ * __u32 x86_vfm;
+ * };
+ */
+#define VFM_MODEL_BIT 0
+#define VFM_FAMILY_BIT 8
+#define VFM_VENDOR_BIT 16
+#define VFM_RSVD_BIT 24
+
+#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
+#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
+#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
+
+#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
+#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
+#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
+
+#define VFM_MAKE(_vendor, _family, _model) ( \
+ ((_model) << VFM_MODEL_BIT) | \
+ ((_family) << VFM_FAMILY_BIT) | \
+ ((_vendor) << VFM_VENDOR_BIT) \
+)
/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
+ *
+ * The wildcard initializers are in mod_devicetable.h because
+ * file2alias needs them. Sigh.
*/
-
#include <linux/mod_devicetable.h>
+/* Get the INTEL_FAM* model defines */
+#include <asm/intel-family.h>
+/* And the X86_VENDOR_* ones */
+#include <asm/processor.h>
+
+/* Centaur FAM6 models */
+#define X86_CENTAUR_FAM6_C7_A 0xa
+#define X86_CENTAUR_FAM6_C7_D 0xd
+#define X86_CENTAUR_FAM6_NANO 0xf
+
+/* x86_cpu_id::flags */
+#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0)
+
+/**
+ * X86_MATCH_CPU - Base macro for CPU matching
+ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@_vendor
+ * @_family: The family number or X86_FAMILY_ANY
+ * @_model: The model number, model constant or X86_MODEL_ANY
+ * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY
+ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * Use only if you need all selectors. Otherwise use one of the shorter
+ * macros of the X86_MATCH_* family. If there is no matching shorthand
+ * macro, consider to add one. If you really need to wrap one of the macros
+ * into another macro at the usage site for good reasons, then please
+ * start this local macro with X86_MATCH to allow easy grepping.
+ */
+#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \
+ .vendor = _vendor, \
+ .family = _family, \
+ .model = _model, \
+ .steppings = _steppings, \
+ .feature = _feature, \
+ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
+ .type = _type, \
+ .driver_data = (unsigned long) _data \
+}
+
+/**
+ * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_FEATURE - Macro for matching a CPU feature
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_FEATURE(feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @model: The model number, model constant or X86_MODEL_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \
+ X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM - Match vendor and family
+ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@vendor
+ * @family: The family number or X86_FAMILY_ANY
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM - Match encoded vendor/family/model
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM(vfm, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/**
+ * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings
+ * range.
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @min_step: Lowest stepping number to match
+ * @max_step: Highest stepping number to match
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \
+ X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @type: CPU type e.g. P-core, E-core
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, type, data)
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table);
-#endif
+#endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 29c706415443..462fc34f1317 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPU_ENTRY_AREA_H
#define _ASM_X86_CPU_ENTRY_AREA_H
@@ -6,6 +6,78 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
+
+#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ
+#else
+#define VC_EXCEPTION_STKSZ 0
+#endif
+
+/* Macro to enforce the same ordering and stack sizes */
+#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
+ char DF_stack_guard[guardsize]; \
+ char DF_stack[EXCEPTION_STKSZ]; \
+ char NMI_stack_guard[guardsize]; \
+ char NMI_stack[EXCEPTION_STKSZ]; \
+ char DB_stack_guard[guardsize]; \
+ char DB_stack[EXCEPTION_STKSZ]; \
+ char MCE_stack_guard[guardsize]; \
+ char MCE_stack[EXCEPTION_STKSZ]; \
+ char VC_stack_guard[guardsize]; \
+ char VC_stack[optional_stack_size]; \
+ char VC2_stack_guard[guardsize]; \
+ char VC2_stack[optional_stack_size]; \
+ char IST_top_guard[guardsize]; \
+
+/* The exception stacks' physical storage. No guard pages required */
+struct exception_stacks {
+ ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ)
+};
+
+/* The effective cpu entry area mapping with guard pages. */
+struct cea_exception_stacks {
+ ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
+};
+
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+ ESTACK_DF,
+ ESTACK_NMI,
+ ESTACK_DB,
+ ESTACK_MCE,
+ ESTACK_VC,
+ ESTACK_VC2,
+ N_EXCEPTION_STACKS
+};
+
+#define CEA_ESTACK_SIZE(st) \
+ sizeof(((struct cea_exception_stacks *)0)->st## _stack)
+
+#define CEA_ESTACK_BOT(ceastp, st) \
+ ((unsigned long)&(ceastp)->st## _stack)
+
+#define CEA_ESTACK_TOP(ceastp, st) \
+ (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
+
+#define CEA_ESTACK_OFFS(st) \
+ offsetof(struct cea_exception_stacks, st## _stack)
+
+#define CEA_ESTACK_PAGES \
+ (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+
+#endif
+
+#ifdef CONFIG_X86_32
+struct doublefault_stack {
+ unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
+ struct x86_hw_tss tss;
+} __aligned(PAGE_SIZE);
+#endif
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
@@ -20,10 +92,19 @@ struct cpu_entry_area {
/*
* The GDT is just below entry_stack and thus serves (on x86_64) as
- * a a read-only guard page.
+ * a read-only guard page. On 32-bit the GDT must be writeable, so
+ * it needs an extra guard page.
*/
+#ifdef CONFIG_X86_32
+ char guard_entry_stack[PAGE_SIZE];
+#endif
struct entry_stack_page entry_stack_page;
+#ifdef CONFIG_X86_32
+ char guard_doublefault_stack[PAGE_SIZE];
+ struct doublefault_stack doublefault_stack;
+#endif
+
/*
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
* we need task switches to work, and task switches write to the TSS.
@@ -32,14 +113,10 @@ struct cpu_entry_area {
#ifdef CONFIG_X86_64
/*
- * Exception stacks used for IST entries.
- *
- * In the future, this should have a separate slot for each stack
- * with guard pages between them.
+ * Exception stacks used for IST entries with guard pages.
*/
- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+ struct cea_exception_stacks estacks;
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
@@ -50,30 +127,27 @@ struct cpu_entry_area {
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
-#endif
};
-#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
-
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
-static inline struct entry_stack *cpu_entry_stack(int cpu)
+static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
{
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
+#define __this_cpu_ist_top_va(name) \
+ CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+
+#define __this_cpu_ist_bottom_va(name) \
+ CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
+
#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce95b8cbd229..3ddc1d33399b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,10 +4,12 @@
#include <asm/processor.h>
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
#include <asm/asm.h>
#include <linux/bitops.h>
+#include <asm/alternative.h>
+#include <asm/cpufeaturemasks.h>
enum cpuid_leafs
{
@@ -22,105 +24,48 @@ enum cpuid_leafs
CPUID_LNX_3,
CPUID_7_0_EBX,
CPUID_D_1_EAX,
- CPUID_F_0_EDX,
- CPUID_F_1_EDX,
+ CPUID_LNX_4,
+ CPUID_7_1_EAX,
CPUID_8000_0008_EBX,
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
- CPUID_8000_0007_EBX,
+ CPUID_LNX_6,
CPUID_7_EDX,
+ CPUID_8000_001F_EAX,
+ CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
-#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
-#define X86_CAP_FMT "%s"
-#define x86_cap_flag(flag) x86_cap_flags[flag]
-#else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
-#endif
/*
* In order to save room, we index into this array by doing
* X86_BUG_<name> - NCAPINTS*32.
*/
extern const char * const x86_bug_flags[NBUGINTS*32];
+#define x86_bug_flag(flag) x86_bug_flags[flag]
#define test_cpu_cap(c, bit) \
- test_bit(bit, (unsigned long *)((c)->x86_capability))
-
-/*
- * There are 32 bits/features in each mask word. The high bits
- * (selected with (bit>>5) give us the word number and the low 5
- * bits give us the bit/feature number inside the word.
- * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
- * see if it is set in the mask word.
- */
-#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
- (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
-
-#define REQUIRED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
- REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 19))
-
-#define DISABLED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
- DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+ arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
- (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
- x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+ x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
/*
- * This macro is for detection of features which need kernel
- * infrastructure to be used. It may *not* directly test the CPU
- * itself. Use the cpu_has() family if you want true runtime
- * testing of CPU features, like in hypervisor code where you are
- * supporting a possible guest feature where host support for it
+ * This is the default CPU features testing macro to use in code.
+ *
+ * It is for detection of features which need kernel infrastructure to be
+ * used. It may *not* directly test the CPU itself. Use the cpu_has() family
+ * if you want true runtime testing of CPU features, like in hypervisor code
+ * where you are supporting a possible guest feature where host support for it
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
@@ -132,70 +77,38 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-
-#define setup_force_cpu_cap(bit) do { \
- set_cpu_cap(&boot_cpu_data, bit); \
+void check_cpufeature_deps(struct cpuinfo_x86 *c);
+
+#define setup_force_cpu_cap(bit) do { \
+ \
+ if (!boot_cpu_has(bit)) \
+ WARN_ON(alternatives_patched); \
+ \
+ set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
-#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO)
-
-/*
- * Workaround for the sake of BPF compilation which utilizes kernel
- * headers, but clang does not support ASM GOTO and fails the build.
- */
-#ifndef __BPF_TRACING__
-#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
-#endif
-
-#define static_cpu_has(bit) boot_cpu_has(bit)
-
-#else
-
/*
- * Static testing of CPU features. Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
+ * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
+ * that this is only used on a fallback path and will sometimes cause
+ * it to manifest the address of boot_cpu_data in a register, fouling
+ * the mainline (post-initialization) code.
*/
-static __always_inline __pure bool _static_cpu_has(u16 bit)
+static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P[always]\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P[feature]\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
+ asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
+ ".pushsection .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ ANNOTATE_DATA_SPECIAL "\n"
+ " testb %[bitnum], %a[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".popsection\n"
: : [feature] "i" (bit),
- [always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
@@ -209,7 +122,6 @@ t_no:
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
-#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
@@ -226,5 +138,5 @@ t_no:
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..c3b53beb1300 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -2,187 +2,178 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 19 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
+#define NBUGINTS 2 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c as well.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */
+/* Free ( 3*32+ 7) */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -190,196 +181,393 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
+#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */
+#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */
+#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_LASS (12*32+ 6) /* "lass" Linear Address Space Separation */
+#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
+#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */
+#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */
+#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */
+#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
-
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
+
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
+#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
+
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
+#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
+#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
+
+/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+
+#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
+
+#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
+#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /*
+ * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs.
+ * (SRSO_MSR_FIX in the official doc).
+ */
+
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
+#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
+#define X86_FEATURE_SGX_EUPDATESVN (21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */
+
+#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO (21*32+19) /*
+ * Clear CPU buffers before VM-Enter if the vCPU
+ * can access host MMIO (ignored for all intents
+ * and purposes if CLEAR_CPU_BUF_VM is set).
+ */
+#define X86_FEATURE_X2AVIC_EXT (21*32+20) /* AMD SVM x2AVIC support for 4k vCPUs */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
+/* BUG word 2 */
+#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
+#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
new file mode 100644
index 000000000000..44fa82e1267c
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_API_H
+#define _ASM_X86_CPUID_API_H
+
+#include <asm/cpuid/types.h>
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/string.h>
+
+/*
+ * Raw CPUID accessors:
+ */
+
+#ifdef CONFIG_X86_32
+bool cpuid_feature(void);
+#else
+static inline bool cpuid_feature(void)
+{
+ return true;
+}
+#endif
+
+static inline void native_cpuid(u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+#define NATIVE_CPUID_REG(reg) \
+static inline u32 native_cpuid_##reg(u32 op) \
+{ \
+ u32 eax = op, ebx, ecx = 0, edx; \
+ \
+ native_cpuid(&eax, &ebx, &ecx, &edx); \
+ \
+ return reg; \
+}
+
+/*
+ * Native CPUID functions returning a single datum:
+ */
+NATIVE_CPUID_REG(eax)
+NATIVE_CPUID_REG(ebx)
+NATIVE_CPUID_REG(ecx)
+NATIVE_CPUID_REG(edx)
+
+#ifdef CONFIG_PARAVIRT_XXL
+# include <asm/paravirt.h>
+#else
+# define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ *
+ * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(u32 op,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ECX */
+static inline void cpuid_count(u32 op, int count,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum:
+ */
+
+static inline u32 cpuid_eax(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return eax;
+}
+
+static inline u32 cpuid_ebx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ebx;
+}
+
+static inline u32 cpuid_ecx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ecx;
+}
+
+static inline u32 cpuid_edx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return edx;
+}
+
+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
+{
+ regs[CPUID_EAX] = leaf;
+ regs[CPUID_ECX] = subleaf;
+ __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
+}
+
+#define cpuid_leaf(leaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, 0, (u32 *)(regs)); \
+}
+
+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
+ enum cpuid_regs_idx regidx, u32 *reg)
+{
+ u32 regs[4];
+
+ __cpuid_read(leaf, subleaf, regs);
+ *reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
+}
+
+/*
+ * Hypervisor-related APIs:
+ */
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+ switch (function) {
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x17:
+ case 0x18:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ case 0x24:
+ case 0x8000001d:
+ return true;
+ }
+
+ return false;
+}
+
+#define for_each_possible_cpuid_base_hypervisor(function) \
+ for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
+static inline u32 cpuid_base_hypervisor(const char *sig, u32 leaves)
+{
+ u32 base, eax, signature[3];
+
+ for_each_possible_cpuid_base_hypervisor(base) {
+ cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
+
+ /*
+ * This must not compile to "call memcmp" because it's called
+ * from PVH early boot code before instrumentation is set up
+ * and memcmp() itself may be instrumented.
+ */
+ if (!__builtin_memcmp(sig, signature, 12) &&
+ (leaves == 0 || ((eax - base) >= leaves)))
+ return base;
+ }
+
+ return 0;
+}
+
+/*
+ * CPUID(0x2) parsing:
+ */
+
+/**
+ * cpuid_leaf_0x2() - Return sanitized CPUID(0x2) register output
+ * @regs: Output parameter
+ *
+ * Query CPUID(0x2) and store its output in @regs. Force set any
+ * invalid 1-byte descriptor returned by the hardware to zero (the NULL
+ * cache/TLB descriptor) before returning it to the caller.
+ *
+ * Use for_each_cpuid_0x2_desc() to iterate over the register output in
+ * parsed form.
+ */
+static inline void cpuid_leaf_0x2(union leaf_0x2_regs *regs)
+{
+ cpuid_leaf(0x2, regs);
+
+ /*
+ * All Intel CPUs must report an iteration count of 1. In case
+ * of bogus hardware, treat all returned descriptors as NULL.
+ */
+ if (regs->desc[0] != 0x01) {
+ for (int i = 0; i < 4; i++)
+ regs->regv[i] = 0;
+ return;
+ }
+
+ /*
+ * The most significant bit (MSB) of each register must be clear.
+ * If a register is invalid, replace its descriptors with NULL.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (regs->reg[i].invalid)
+ regs->regv[i] = 0;
+ }
+}
+
+/**
+ * for_each_cpuid_0x2_desc() - Iterator for parsed CPUID(0x2) descriptors
+ * @_regs: CPUID(0x2) register output, as returned by cpuid_leaf_0x2()
+ * @_ptr: u8 pointer, for macro internal use only
+ * @_desc: Pointer to the parsed CPUID(0x2) descriptor at each iteration
+ *
+ * Loop over the 1-byte descriptors in the passed CPUID(0x2) output registers
+ * @_regs. Provide the parsed information for each descriptor through @_desc.
+ *
+ * To handle cache-specific descriptors, switch on @_desc->c_type. For TLB
+ * descriptors, switch on @_desc->t_type.
+ *
+ * Example usage for cache descriptors::
+ *
+ * const struct leaf_0x2_table *desc;
+ * union leaf_0x2_regs regs;
+ * u8 *ptr;
+ *
+ * cpuid_leaf_0x2(&regs);
+ * for_each_cpuid_0x2_desc(regs, ptr, desc) {
+ * switch (desc->c_type) {
+ * ...
+ * }
+ * }
+ */
+#define for_each_cpuid_0x2_desc(_regs, _ptr, _desc) \
+ for (_ptr = &(_regs).desc[1]; \
+ _ptr < &(_regs).desc[16] && (_desc = &cpuid_0x2_table[*_ptr]); \
+ _ptr++)
+
+/*
+ * CPUID(0x80000006) parsing:
+ */
+
+static inline bool cpuid_amd_hygon_has_l3_cache(void)
+{
+ return cpuid_edx(0x80000006);
+}
+
+#endif /* _ASM_X86_CPUID_API_H */
diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
new file mode 100644
index 000000000000..8a00364b79de
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_TYPES_H
+#define _ASM_X86_CPUID_TYPES_H
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+/*
+ * Types for raw CPUID access:
+ */
+
+struct cpuid_regs {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+enum cpuid_regs_idx {
+ CPUID_EAX = 0,
+ CPUID_EBX,
+ CPUID_ECX,
+ CPUID_EDX,
+};
+
+#define CPUID_LEAF_MWAIT 0x05
+#define CPUID_LEAF_DCA 0x09
+#define CPUID_LEAF_XSTATE 0x0d
+#define CPUID_LEAF_TSC 0x15
+#define CPUID_LEAF_FREQ 0x16
+#define CPUID_LEAF_TILE 0x1d
+
+/*
+ * Types for CPUID(0x2) parsing:
+ */
+
+struct leaf_0x2_reg {
+ u32 : 31,
+ invalid : 1;
+};
+
+union leaf_0x2_regs {
+ struct leaf_0x2_reg reg[4];
+ u32 regv[4];
+ u8 desc[16];
+};
+
+/*
+ * Leaf 0x2 1-byte descriptors' cache types
+ * To be used for their mappings at cpuid_0x2_table[]
+ *
+ * Start at 1 since type 0 is reserved for HW byte descriptors which are
+ * not recognized by the kernel; i.e., those without an explicit mapping.
+ */
+enum _cache_table_type {
+ CACHE_L1_INST = 1,
+ CACHE_L1_DATA,
+ CACHE_L2,
+ CACHE_L3
+ /* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _cache_table_type) == 1);
+#endif
+
+/*
+ * Ensure that leaf 0x2 cache and TLB type values do not intersect,
+ * since they share the same type field at struct cpuid_0x2_table.
+ */
+#define __TLB_TABLE_TYPE_BEGIN (CACHE_L3 + 1)
+
+/*
+ * Leaf 0x2 1-byte descriptors' TLB types
+ * To be used for their mappings at cpuid_0x2_table[]
+ */
+enum _tlb_table_type {
+ TLB_INST_4K = __TLB_TABLE_TYPE_BEGIN,
+ TLB_INST_4M,
+ TLB_INST_2M_4M,
+ TLB_INST_ALL,
+
+ TLB_DATA_4K,
+ TLB_DATA_4M,
+ TLB_DATA_2M_4M,
+ TLB_DATA_4K_4M,
+ TLB_DATA_1G,
+ TLB_DATA_1G_2M_4M,
+
+ TLB_DATA0_4K,
+ TLB_DATA0_4M,
+ TLB_DATA0_2M_4M,
+
+ STLB_4K,
+ STLB_4K_2M,
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _tlb_table_type) == 1);
+#endif
+
+/*
+ * Combined parsing table for leaf 0x2 cache and TLB descriptors.
+ */
+
+struct leaf_0x2_table {
+ union {
+ enum _cache_table_type c_type;
+ enum _tlb_table_type t_type;
+ };
+ union {
+ short c_size;
+ short entries;
+ };
+};
+
+extern const struct leaf_0x2_table cpuid_0x2_table[256];
+
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries
+ * for their respective TLB types. TLB descriptor 0x63 is an exception: it
+ * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages.
+ *
+ * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry
+ * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES 32
+
+#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..c8b39c6716ff
--- /dev/null
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+void arch_haltpoll_enable(unsigned int cpu);
+void arch_haltpoll_disable(unsigned int cpu);
+
+#endif
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 6722ffcef2e6..9df9e9cde670 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -1,15 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPUMASK_H
#define _ASM_X86_CPUMASK_H
-#ifndef __ASSEMBLY__
-#include <linux/cpumask.h>
+#ifndef __ASSEMBLER__
-extern cpumask_var_t cpu_callin_mask;
-extern cpumask_var_t cpu_callout_mask;
-extern cpumask_var_t cpu_initialized_mask;
-extern cpumask_var_t cpu_sibling_setup_mask;
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
extern void setup_cpu_local_masks(void);
-#endif /* __ASSEMBLY__ */
+/*
+ * NMI and MCE exceptions need cpu_is_offline() _really_ early,
+ * provide an arch_ special for them to avoid instrumentation.
+ */
+#if NR_CPUS > 1
+static __always_inline bool arch_cpu_online(int cpu)
+{
+ return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
+}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+#else
+static __always_inline bool arch_cpu_online(int cpu)
+{
+ return cpu == 0;
+}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ return;
+}
+#endif
+
+#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index 0acf5ee45a21..8b6bd63530dc 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -2,8 +2,9 @@
#ifndef _ASM_X86_CRASH_H
#define _ASM_X86_CRASH_H
+struct kimage;
+
int crash_load_segments(struct kimage *image);
-int crash_copy_backup_region(struct kimage *image);
int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
void crash_smp_send_stop(void);
diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h
new file mode 100644
index 000000000000..7835b2cdff04
--- /dev/null
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_CRASH_RESERVE_H
+#define _X86_CRASH_RESERVE_H
+
+/* 16M alignment for crash kernel regions */
+#define CRASH_ALIGN SZ_16M
+
+/*
+ * Keep the crash kernel below this limit.
+ *
+ * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
+ * due to mapping restrictions.
+ *
+ * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
+ * the upper limit of system RAM in 4-level paging mode. Since the kdump
+ * jump could be from 5-level paging to 4-level paging, the jump will fail if
+ * the kernel is put above 64 TB, and during the 1st kernel bootup there's
+ * no good way to detect the paging mode of the target kernel which will be
+ * loaded for dumping.
+ */
+extern unsigned long swiotlb_size_or_default(void);
+
+#ifdef CONFIG_X86_32
+# define CRASH_ADDR_LOW_MAX SZ_512M
+# define CRASH_ADDR_HIGH_MAX SZ_512M
+#else
+# define CRASH_ADDR_LOW_MAX SZ_4G
+# define CRASH_ADDR_HIGH_MAX SZ_64T
+#endif
+
+# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default()
+
+static inline unsigned long crash_low_size_default(void)
+{
+#ifdef CONFIG_X86_64
+ return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
+#else
+ return 0;
+#endif
+}
+
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
+#endif /* _X86_CRASH_RESERVE_H */
diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h
deleted file mode 100644
index c508521dd190..000000000000
--- a/arch/x86/include/asm/crypto/aes.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_AES_H
-#define ASM_X86_AES_H
-
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
- const u8 *src);
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
- const u8 *src);
-#endif
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
deleted file mode 100644
index a5d86fc0593f..000000000000
--- a/arch/x86/include/asm/crypto/camellia.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_CAMELLIA_H
-#define ASM_X86_CAMELLIA_H
-
-#include <crypto/b128ops.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-
-#define CAMELLIA_MIN_KEY_SIZE 16
-#define CAMELLIA_MAX_KEY_SIZE 32
-#define CAMELLIA_BLOCK_SIZE 16
-#define CAMELLIA_TABLE_BYTE_LEN 272
-#define CAMELLIA_PARALLEL_BLOCKS 2
-
-struct crypto_skcipher;
-
-struct camellia_ctx {
- u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
- u32 key_length;
-};
-
-struct camellia_xts_ctx {
- struct camellia_ctx tweak_ctx;
- struct camellia_ctx crypt_ctx;
-};
-
-extern int __camellia_setkey(struct camellia_ctx *cctx,
- const unsigned char *key,
- unsigned int key_len, u32 *flags);
-
-extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen);
-
-/* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-
-/* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-
-/* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk(ctx, dst, src, true);
-}
-
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk_2way(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk_2way(ctx, dst, src, true);
-}
-
-/* glue helpers */
-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-
-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-
-#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
deleted file mode 100644
index d1818634ae7e..000000000000
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Shared glue code for 128bit block ciphers
- */
-
-#ifndef _CRYPTO_GLUE_HELPER_H
-#define _CRYPTO_GLUE_HELPER_H
-
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <asm/fpu/api.h>
-#include <crypto/b128ops.h>
-
-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-
-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
-
-struct common_glue_func_entry {
- unsigned int num_blocks; /* number of blocks that @fn will process */
- union {
- common_glue_func_t ecb;
- common_glue_cbc_func_t cbc;
- common_glue_ctr_func_t ctr;
- common_glue_xts_func_t xts;
- } fn_u;
-};
-
-struct common_glue_ctx {
- unsigned int num_funcs;
- int fpu_blocks_limit; /* -1 means fpu not needed at all */
-
- /*
- * First funcs entry must have largest num_blocks and last funcs entry
- * must have num_blocks == 1!
- */
- struct common_glue_func_entry funcs[];
-};
-
-static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
- struct skcipher_walk *walk,
- bool fpu_enabled, unsigned int nbytes)
-{
- if (likely(fpu_blocks_limit < 0))
- return false;
-
- if (fpu_enabled)
- return true;
-
- /*
- * Vector-registers are only used when chunk to be processed is large
- * enough, so do not enable FPU until it is necessary.
- */
- if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
- return false;
-
- /* prevent sleeping if FPU is in use */
- skcipher_walk_atomise(walk);
-
- kernel_fpu_begin();
- return true;
-}
-
-static inline void glue_fpu_end(bool fpu_enabled)
-{
- if (fpu_enabled)
- kernel_fpu_end();
-}
-
-static inline void le128_to_be128(be128 *dst, const le128 *src)
-{
- dst->a = cpu_to_be64(le64_to_cpu(src->a));
- dst->b = cpu_to_be64(le64_to_cpu(src->b));
-}
-
-static inline void be128_to_le128(le128 *dst, const be128 *src)
-{
- dst->a = cpu_to_le64(be64_to_cpu(src->a));
- dst->b = cpu_to_le64(be64_to_cpu(src->b));
-}
-
-static inline void le128_inc(le128 *i)
-{
- u64 a = le64_to_cpu(i->a);
- u64 b = le64_to_cpu(i->b);
-
- b++;
- if (!b)
- a++;
-
- i->a = cpu_to_le64(a);
- i->b = cpu_to_le64(b);
-}
-
-extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req);
-
-extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
- struct skcipher_request *req);
-
-extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req);
-
-extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req);
-
-extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req,
- common_glue_func_t tweak_fn, void *tweak_ctx,
- void *crypt_ctx);
-
-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
- le128 *iv, common_glue_func_t fn);
-
-#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
deleted file mode 100644
index db7c9cc32234..000000000000
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_SERPENT_AVX_H
-#define ASM_X86_SERPENT_AVX_H
-
-#include <crypto/b128ops.h>
-#include <crypto/serpent.h>
-#include <linux/types.h>
-
-struct crypto_skcipher;
-
-#define SERPENT_PARALLEL_BLOCKS 8
-
-struct serpent_xts_ctx {
- struct serpent_ctx tweak_ctx;
- struct serpent_ctx crypt_ctx;
-};
-
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src);
-
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-
-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-
-extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen);
-
-#endif
diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h
deleted file mode 100644
index 1a345e8a7496..000000000000
--- a/arch/x86/include/asm/crypto/serpent-sse2.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_SERPENT_SSE2_H
-#define ASM_X86_SERPENT_SSE2_H
-
-#include <linux/crypto.h>
-#include <crypto/serpent.h>
-
-#ifdef CONFIG_X86_32
-
-#define SERPENT_PARALLEL_BLOCKS 4
-
-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src);
-
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_4way(ctx, dst, src, false);
-}
-
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_4way(ctx, dst, src, true);
-}
-
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- serpent_dec_blk_4way(ctx, dst, src);
-}
-
-#else
-
-#define SERPENT_PARALLEL_BLOCKS 8
-
-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src);
-
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_8way(ctx, dst, src, false);
-}
-
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_8way(ctx, dst, src, true);
-}
-
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- serpent_dec_blk_8way(ctx, dst, src);
-}
-
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
deleted file mode 100644
index f618bf272b90..000000000000
--- a/arch/x86/include/asm/crypto/twofish.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_TWOFISH_H
-#define ASM_X86_TWOFISH_H
-
-#include <linux/crypto.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-
-/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-
-/* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-
-#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 3e204e6140b5..cc4a3f725b37 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -2,21 +2,31 @@
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H
+#include <linux/build_bug.h>
#include <linux/compiler.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/cache.h>
#include <asm/percpu.h>
-#ifndef __ASSEMBLY__
struct task_struct;
-DECLARE_PER_CPU(struct task_struct *, current_task);
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override,
+ const_current_task);
static __always_inline struct task_struct *get_current(void)
{
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_current_task);
+
return this_cpu_read_stable(current_task);
}
#define current get_current()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e5..a2c1f2d24b64 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,10 +2,21 @@
#ifndef _ASM_X86_DEBUGREG_H
#define _ASM_X86_DEBUGREG_H
-
#include <linux/bug.h>
+#include <linux/percpu.h>
#include <uapi/asm/debugreg.h>
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+
+/*
+ * Define bits that are always set to 1 in DR7, only bit 10 is
+ * architecturally reserved to '1'.
+ *
+ * This is also the init/reset value for DR7.
+ */
+#define DR7_FIXED_1 0x00000400
+
DECLARE_PER_CPU(unsigned long, cpu_dr7);
#ifndef CONFIG_PARAVIRT_XXL
@@ -18,9 +29,9 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7);
native_set_debugreg(register, value)
#endif
-static inline unsigned long native_get_debugreg(int regno)
+static __always_inline unsigned long native_get_debugreg(int regno)
{
- unsigned long val = 0; /* Damn you, gcc! */
+ unsigned long val;
switch (regno) {
case 0:
@@ -39,7 +50,20 @@ static inline unsigned long native_get_debugreg(int regno)
asm("mov %%db6, %0" :"=r" (val));
break;
case 7:
- asm("mov %%db7, %0" :"=r" (val));
+ /*
+ * Use "asm volatile" for DR7 reads to forbid re-ordering them
+ * with other code.
+ *
+ * This is needed because a DR7 access can cause a #VC exception
+ * when running under SEV-ES. Taking a #VC exception is not a
+ * safe thing to do just anywhere in the entry code and
+ * re-ordering might place the access into an unsafe location.
+ *
+ * This happened in the NMI handler, where the DR7 read was
+ * re-ordered to happen before the call to sev_es_ist_enter(),
+ * causing stack recursion.
+ */
+ asm volatile("mov %%db7, %0" : "=r" (val));
break;
default:
BUG();
@@ -47,7 +71,7 @@ static inline unsigned long native_get_debugreg(int regno)
return val;
}
-static inline void native_set_debugreg(int regno, unsigned long value)
+static __always_inline void native_set_debugreg(int regno, unsigned long value)
{
switch (regno) {
case 0:
@@ -66,7 +90,16 @@ static inline void native_set_debugreg(int regno, unsigned long value)
asm("mov %0, %%db6" ::"r" (value));
break;
case 7:
- asm("mov %0, %%db7" ::"r" (value));
+ /*
+ * Use "asm volatile" for DR7 writes to forbid re-ordering them
+ * with other code.
+ *
+ * While is didn't happen with a DR7 write (see the DR7 read
+ * comment above which explains where it happened), add the
+ * "asm volatile" here too to avoid similar problems in the
+ * future.
+ */
+ asm volatile("mov %0, %%db7" ::"r" (value));
break;
default:
BUG();
@@ -75,8 +108,8 @@ static inline void native_set_debugreg(int regno, unsigned long value)
static inline void hw_breakpoint_disable(void)
{
- /* Zero the control register for HW Breakpoint */
- set_debugreg(0UL, 7);
+ /* Reset the control register for HW Breakpoint */
+ set_debugreg(DR7_FIXED_1, 7);
/* Zero-out the individual HW breakpoint address registers */
set_debugreg(0UL, 0);
@@ -85,40 +118,80 @@ static inline void hw_breakpoint_disable(void)
set_debugreg(0UL, 3);
}
-static inline int hw_breakpoint_active(void)
+static __always_inline bool hw_breakpoint_active(void)
{
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
}
-extern void aout_dump_debugregs(struct user *dump);
-
extern void hw_breakpoint_restore(void);
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(int, debug_stack_usage);
-static inline void debug_stack_usage_inc(void)
+static __always_inline unsigned long local_db_save(void)
{
- __this_cpu_inc(debug_stack_usage);
+ unsigned long dr7;
+
+ if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active())
+ return 0;
+
+ get_debugreg(dr7, 7);
+
+ /* Architecturally set bit */
+ dr7 &= ~DR7_FIXED_1;
+ if (dr7)
+ set_debugreg(DR7_FIXED_1, 7);
+
+ /*
+ * Ensure the compiler doesn't lower the above statements into
+ * the critical section; disabling breakpoints late would not
+ * be good.
+ */
+ barrier();
+
+ return dr7;
}
-static inline void debug_stack_usage_dec(void)
+
+static __always_inline void local_db_restore(unsigned long dr7)
{
- __this_cpu_dec(debug_stack_usage);
+ /*
+ * Ensure the compiler doesn't raise this statement into
+ * the critical section; enabling breakpoints early would
+ * not be good.
+ */
+ barrier();
+ if (dr7)
+ set_debugreg(dr7, 7);
}
-int is_debug_stack(unsigned long addr);
-void debug_stack_set_zero(void);
-void debug_stack_reset(void);
-#else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
-static inline void debug_stack_set_zero(void) { }
-static inline void debug_stack_reset(void) { }
-static inline void debug_stack_usage_inc(void) { }
-static inline void debug_stack_usage_dec(void) { }
-#endif /* X86_64 */
#ifdef CONFIG_CPU_SUP_AMD
-extern void set_dr_addr_mask(unsigned long mask, int dr);
+extern void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr);
+extern unsigned long amd_get_dr_addr_mask(unsigned int dr);
#else
-static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
+static inline void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { }
+static inline unsigned long amd_get_dr_addr_mask(unsigned int dr)
+{
+ return 0;
+}
+#endif
+
+static inline unsigned long get_debugctlmsr(void)
+{
+ unsigned long debugctlmsr = 0;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return 0;
#endif
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+
+ return debugctlmsr;
+}
+
+static inline void update_debugctlmsr(unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return;
+#endif
+ wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+}
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e7841f953..630891d25819 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,10 @@
#define _ASM_X86_DELAY_H
#include <asm-generic/delay.h>
+#include <linux/init.h>
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 68a99d2a5f33..ec95fe44fa3a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -9,6 +9,7 @@
#include <asm/irq_vectors.h>
#include <asm/cpu_entry_area.h>
+#include <linux/debug_locks.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -40,11 +41,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->l = 0;
}
-extern struct desc_ptr idt_descr;
-extern gate_desc idt_table[];
-extern const struct desc_ptr debug_idt_descr;
-extern gate_desc debug_idt_table[];
-
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
@@ -214,7 +210,7 @@ static inline void native_load_gdt(const struct desc_ptr *dtr)
asm volatile("lgdt %0"::"m" (*dtr));
}
-static inline void native_load_idt(const struct desc_ptr *dtr)
+static __always_inline void native_load_idt(const struct desc_ptr *dtr)
{
asm volatile("lidt %0"::"m" (*dtr));
}
@@ -229,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr)
asm volatile("sidt %0":"=m" (*dtr));
}
+static inline void native_gdt_invalidate(void)
+{
+ const struct desc_ptr invalid_gdt = {
+ .address = 0,
+ .size = 0
+ };
+
+ native_load_gdt(&invalid_gdt);
+}
+
+static inline void native_idt_invalidate(void)
+{
+ const struct desc_ptr invalid_idt = {
+ .address = 0,
+ .size = 0
+ };
+
+ native_load_idt(&invalid_idt);
+}
+
/*
* The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
* a read-only remapping. To prevent a page fault, the GDT is switched to the
@@ -386,66 +402,48 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit1 = (limit >> 16) & 0xf;
}
-void update_intr_gate(unsigned int n, const void *addr);
-void alloc_intr_gate(unsigned int n, const void *addr);
-
-extern unsigned long system_vectors[];
-
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(u32, debug_idt_ctr);
-static inline bool is_debug_idt_enabled(void)
+static inline void init_idt_data(struct idt_data *data, unsigned int n,
+ const void *addr)
{
- if (this_cpu_read(debug_idt_ctr))
- return true;
+ BUG_ON(n > 0xFF);
- return false;
+ memset(data, 0, sizeof(*data));
+ data->vector = n;
+ data->addr = addr;
+ data->segment = __KERNEL_CS;
+ data->bits.type = GATE_INTERRUPT;
+ data->bits.p = 1;
}
-static inline void load_debug_idt(void)
-{
- load_idt((const struct desc_ptr *)&debug_idt_descr);
-}
-#else
-static inline bool is_debug_idt_enabled(void)
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
{
- return false;
-}
+ unsigned long addr = (unsigned long) d->addr;
-static inline void load_debug_idt(void)
-{
-}
+ gate->offset_low = (u16) addr;
+ gate->segment = (u16) d->segment;
+ gate->bits = d->bits;
+ gate->offset_middle = (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+ gate->offset_high = (u32) (addr >> 32);
+ gate->reserved = 0;
#endif
-
-/*
- * The load_current_idt() must be called with interrupts disabled
- * to avoid races. That way the IDT will always be set back to the expected
- * descriptor. It's also called when a CPU is being initialized, and
- * that doesn't need to disable interrupts, as nothing should be
- * bothering the CPU then.
- */
-static inline void load_current_idt(void)
-{
- if (is_debug_idt_enabled())
- load_debug_idt();
- else
- load_idt((const struct desc_ptr *)&idt_descr);
}
+extern unsigned long system_vectors[];
+
+extern void load_current_idt(void);
extern void idt_setup_early_handler(void);
extern void idt_setup_early_traps(void);
extern void idt_setup_traps(void);
extern void idt_setup_apic_and_irq_gates(void);
+extern bool idt_is_f00f_address(unsigned long address);
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
-extern void idt_setup_ist_traps(void);
-extern void idt_setup_debugidt_traps(void);
#else
static inline void idt_setup_early_pf(void) { }
-static inline void idt_setup_ist_traps(void) { }
-static inline void idt_setup_debugidt_traps(void) { }
#endif
-extern void idt_invalidate(void *addr);
+extern void idt_invalidate(void);
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a91f3b6e4f2a..7e6b9314758a 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -8,7 +8,57 @@
* archs.
*/
-#ifndef __ASSEMBLY__
+/*
+ * Low-level interface mapping flags/field names to bits
+ */
+
+/* Flags for _DESC_S (non-system) descriptors */
+#define _DESC_ACCESSED 0x0001
+#define _DESC_DATA_WRITABLE 0x0002
+#define _DESC_CODE_READABLE 0x0002
+#define _DESC_DATA_EXPAND_DOWN 0x0004
+#define _DESC_CODE_CONFORMING 0x0004
+#define _DESC_CODE_EXECUTABLE 0x0008
+
+/* Common flags */
+#define _DESC_S 0x0010
+#define _DESC_DPL(dpl) ((dpl) << 5)
+#define _DESC_PRESENT 0x0080
+
+#define _DESC_LONG_CODE 0x2000
+#define _DESC_DB 0x4000
+#define _DESC_GRANULARITY_4K 0x8000
+
+/* System descriptors have a numeric "type" field instead of flags */
+#define _DESC_SYSTEM(code) (code)
+
+/*
+ * High-level interface mapping intended usage to low-level combinations
+ * of flags
+ */
+
+#define _DESC_DATA (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+ _DESC_DATA_WRITABLE)
+#define _DESC_CODE (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+ _DESC_CODE_READABLE | _DESC_CODE_EXECUTABLE)
+
+#define DESC_DATA16 (_DESC_DATA)
+#define DESC_CODE16 (_DESC_CODE)
+
+#define DESC_DATA32 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_DATA32_BIOS (_DESC_DATA | _DESC_DB)
+
+#define DESC_CODE32 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE32_BIOS (_DESC_CODE | _DESC_DB)
+
+#define DESC_TSS32 (_DESC_SYSTEM(9) | _DESC_PRESENT)
+
+#define DESC_DATA64 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE64 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_LONG_CODE)
+
+#define DESC_USER (_DESC_DPL(3))
+
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -22,19 +72,19 @@ struct desc_struct {
#define GDT_ENTRY_INIT(flags, base, limit) \
{ \
- .limit0 = (u16) (limit), \
- .limit1 = ((limit) >> 16) & 0x0F, \
- .base0 = (u16) (base), \
- .base1 = ((base) >> 16) & 0xFF, \
- .base2 = ((base) >> 24) & 0xFF, \
- .type = (flags & 0x0f), \
- .s = (flags >> 4) & 0x01, \
- .dpl = (flags >> 5) & 0x03, \
- .p = (flags >> 7) & 0x01, \
- .avl = (flags >> 12) & 0x01, \
- .l = (flags >> 13) & 0x01, \
- .d = (flags >> 14) & 0x01, \
- .g = (flags >> 15) & 0x01, \
+ .limit0 = ((limit) >> 0) & 0xFFFF, \
+ .limit1 = ((limit) >> 16) & 0x000F, \
+ .base0 = ((base) >> 0) & 0xFFFF, \
+ .base1 = ((base) >> 16) & 0x00FF, \
+ .base2 = ((base) >> 24) & 0x00FF, \
+ .type = ((flags) >> 0) & 0x000F, \
+ .s = ((flags) >> 4) & 0x0001, \
+ .dpl = ((flags) >> 5) & 0x0003, \
+ .p = ((flags) >> 7) & 0x0001, \
+ .avl = ((flags) >> 12) & 0x0001, \
+ .l = ((flags) >> 13) & 0x0001, \
+ .d = ((flags) >> 14) & 0x0001, \
+ .g = ((flags) >> 15) & 0x0001, \
}
enum {
@@ -74,6 +124,13 @@ struct idt_bits {
p : 1;
} __attribute__((packed));
+struct idt_data {
+ unsigned int vector;
+ unsigned int segment;
+ struct idt_bits bits;
+ const void *addr;
+};
+
struct gate_struct {
u16 offset_low;
u16 segment;
@@ -87,6 +144,7 @@ struct gate_struct {
typedef struct gate_struct gate_desc;
+#ifndef _SETUP
static inline unsigned long gate_offset(const gate_desc *g)
{
#ifdef CONFIG_X86_64
@@ -101,13 +159,17 @@ static inline unsigned long gate_segment(const gate_desc *g)
{
return g->segment;
}
+#endif
struct desc_ptr {
unsigned short size;
unsigned long address;
} __attribute__((packed)) ;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
+
+/* Boot IDT definitions */
+#define BOOT_IDT_ENTRIES 32
/* Access rights as returned by LAR */
#define AR_TYPE_RODATA (0 * (1 << 9))
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index a8f6c809d9b1..7c0a52ca2f4d 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -3,24 +3,8 @@
#define _ASM_X86_DEVICE_H
struct dev_archdata {
-#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
- void *iommu; /* hook for IOMMU specific extension */
-#endif
-#ifdef CONFIG_STA2X11
- bool is_sta2x11;
-#endif
};
-#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
-struct dma_domain {
- struct list_head node;
- const struct dma_map_ops *dma_ops;
- int domain_nr;
-};
-void add_dma_domain(struct dma_domain *domain);
-void del_dma_domain(struct dma_domain *domain);
-#endif
-
struct pdev_archdata {
};
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index a5ea841cc6d2..000000000000
--- a/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX 0
-#else
-# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
-#endif
-
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP 0
-#else
-# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
-#ifdef CONFIG_X86_INTEL_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
-#define DISABLED_MASK12 0
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..30fd06ede751 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
}
#define div_u64_rem div_u64_rem
+/*
+ * gcc tends to zero extend 32bit values and do full 64bit maths.
+ * Define asm functions that avoid this.
+ * (clang generates better code for the C versions.)
+ */
+#ifndef __clang__
static inline u64 mul_u32_u32(u32 a, u32 b)
{
u32 high, low;
@@ -71,8 +77,54 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
}
#define mul_u32_u32 mul_u32_u32
+static inline u64 add_u64_u32(u64 a, u32 b)
+{
+ u32 high = a >> 32, low = a;
+
+ asm ("addl %[b], %[low]; adcl $0, %[high]"
+ : [low] "+r" (low), [high] "+r" (high)
+ : [b] "rm" (b) );
+
+ return low | (u64)high << 32;
+}
+#define add_u64_u32 add_u64_u32
+#endif
+
+/*
+ * __div64_32() is never called on x86, so prevent the
+ * generic definition from getting built.
+ */
+#define __div64_32
+
#else
# include <asm-generic/div64.h>
+
+/*
+ * Will generate an #DE when the result doesn't fit u64, could fix with an
+ * __ex_table[] entry when it becomes an issue.
+ */
+static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div)
+{
+ u64 rdx;
+
+ asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul));
+
+ if (!statically_true(!add))
+ asm ("addq %[add], %[lo]; adcq $0, %[hi]" :
+ [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add));
+
+ asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div));
+
+ return rax;
+}
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+ return mul_u64_add_u64_div_u64(a, mul, 0, div);
+}
+#define mul_u64_u32_div mul_u64_u32_div
+
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
deleted file mode 100644
index 1a19251eaac9..000000000000
--- a/arch/x86/include/asm/dma-direct.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_DMA_DIRECT_H
-#define ASM_X86_DMA_DIRECT_H 1
-
-bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-
-#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ce4d176b3d13..d1dac96ee30b 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,35 +2,11 @@
#ifndef _ASM_X86_DMA_MAPPING_H
#define _ASM_X86_DMA_MAPPING_H
-/*
- * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and
- * Documentation/DMA-API.txt for documentation.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-#include <linux/dma-contiguous.h>
-
-#ifdef CONFIG_ISA
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
-#else
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
-#endif
-
-extern int iommu_merge;
-extern struct device x86_dma_fallback_dev;
-extern int panic_on_overflow;
-
extern const struct dma_map_ops *dma_ops;
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+static inline const struct dma_map_ops *get_arch_dma_ops(void)
{
return dma_ops;
}
-bool arch_dma_alloc_attrs(struct device **dev);
-#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-
#endif
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 00f7cf45e699..8ae6e0e11b8b 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -74,7 +74,7 @@
#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT))
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
@@ -307,12 +307,4 @@ extern int request_dma(unsigned int dmanr, const char *device_id);
extern void free_dma(unsigned int dmanr);
#endif
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
#endif /* _ASM_X86_DMA_H */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
new file mode 100644
index 000000000000..de0e88b32207
--- /dev/null
+++ b/arch/x86/include/asm/doublefault.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_DOUBLEFAULT_H
+#define _ASM_X86_DOUBLEFAULT_H
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_X86_32
+extern void doublefault_init_cpu_tss(void);
+#else
+static inline void doublefault_init_cpu_tss(void)
+{
+}
+#endif
+
+asmlinkage void __noreturn doublefault_shim(void);
+
+#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index ae391f609840..302e11b15da8 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -2,19 +2,10 @@
#ifndef _ASM_X86_DWARF2_H
#define _ASM_X86_DWARF2_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#warning "asm/dwarf2.h should be only included in pure assembly files"
#endif
-/*
- * Macros for dwarf2 CFI unwind table entries.
- * See "as.info" for details on these pseudo ops. Unfortunately
- * they are only supported in very new binutils, so define them
- * away for older version.
- */
-
-#ifdef CONFIG_AS_CFI
-
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
#define CFI_DEF_CFA .cfi_def_cfa
@@ -30,20 +21,13 @@
#define CFI_UNDEFINED .cfi_undefined
#define CFI_ESCAPE .cfi_escape
-#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
-#else
-#define CFI_SIGNAL_FRAME
-#endif
-
-#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
#ifndef BUILD_VDSO
/*
* Emit CFI data in .debug_frame sections, not .eh_frame sections.
* The latter we currently just discard since we don't do DWARF
* unwinding at runtime. So only the offline DWARF information is
- * useful to anyone. Note we should not use this directive if
- * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
+ * useful to anyone. Note we should not use this directive if we
+ * ever decide to enable DWARF unwinding at runtime.
*/
.cfi_sections .debug_frame
#else
@@ -53,33 +37,5 @@
*/
.cfi_sections .eh_frame, .debug_frame
#endif
-#endif
-
-#else
-
-/*
- * Due to the structure of pre-exisiting code, don't use assembler line
- * comment character # to ignore the arguments. Instead, use a dummy macro.
- */
-.macro cfi_ignore a=0, b=0, c=0, d=0
-.endm
-
-#define CFI_STARTPROC cfi_ignore
-#define CFI_ENDPROC cfi_ignore
-#define CFI_DEF_CFA cfi_ignore
-#define CFI_DEF_CFA_REGISTER cfi_ignore
-#define CFI_DEF_CFA_OFFSET cfi_ignore
-#define CFI_ADJUST_CFA_OFFSET cfi_ignore
-#define CFI_OFFSET cfi_ignore
-#define CFI_REL_OFFSET cfi_ignore
-#define CFI_REGISTER cfi_ignore
-#define CFI_RESTORE cfi_ignore
-#define CFI_REMEMBER_STATE cfi_ignore
-#define CFI_RESTORE_STATE cfi_ignore
-#define CFI_UNDEFINED cfi_ignore
-#define CFI_ESCAPE cfi_ignore
-#define CFI_SIGNAL_FRAME cfi_ignore
-
-#endif
#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 62be73b23d5c..c83645d5b2a8 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -10,12 +10,14 @@ extern struct e820_table *e820_table_firmware;
extern unsigned long pci_mem_start;
+extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
@@ -27,7 +29,6 @@ extern unsigned long e820__end_of_low_ram_pfn(void);
extern u64 e820__memblock_alloc_reserved(u64 size, u64 align);
extern void e820__memblock_setup(void);
-extern void e820__reserve_setup_data(void);
extern void e820__finish_early_params(void);
extern void e820__reserve_resources(void);
extern void e820__reserve_resources_late(void);
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..80c4a7266629 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,13 +29,12 @@ enum e820_type {
E820_TYPE_PRAM = 12,
/*
- * Reserved RAM used by the kernel itself if
- * CONFIG_INTEL_TXT=y is enabled, memory of this type
- * will be included in the S3 integrity calculation
- * and so should not include any memory that the BIOS
- * might alter over the S3 transition:
+ * Special-purpose memory is indicated to the system via the
+ * EFI_MEMORY_SP attribute. Define an e820 translation of this
+ * memory type for the purpose of reserving this range and
+ * marking it with the IORES_DESC_SOFT_RESERVED designation.
*/
- E820_TYPE_RESERVED_KERN = 128,
+ E820_TYPE_SOFT_RESERVED = 0xefffffff,
};
/*
diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h
index 426fc53ff803..dfbd1ebb9f10 100644
--- a/arch/x86/include/asm/edac.h
+++ b/arch/x86/include/asm/edac.h
@@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
* are interrupt, DMA and SMP safe.
*/
for (i = 0; i < size / 4; i++, virt_addr++)
- asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
+ asm volatile("lock addl $0, %0"::"m" (*virt_addr));
}
#endif /* _ASM_X86_EDAC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 107283b1eb1e..f227a70ac91f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,11 +3,17 @@
#define _ASM_X86_EFI_H
#include <asm/fpu/api.h>
-#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
+#include <asm/ibt.h>
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+extern unsigned long efi_fw_vendor, efi_config_table;
+extern unsigned long efi_mixed_mode_stack_pa;
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -18,92 +24,93 @@
*
* This is the main reason why we're doing stable VA mappings for RT
* services.
- *
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
*/
-#define EFI_OLD_MEMMAP EFI_ARCH_1
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
-#define MAX_CMDLINE_ADDRESS UINT_MAX
-
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
-#ifdef CONFIG_X86_32
-
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
-
-#define arch_efi_call_virt_setup() \
-({ \
- kernel_fpu_begin(); \
- firmware_restrict_branch_speculation_start(); \
-})
+#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
-#define arch_efi_call_virt_teardown() \
-({ \
- firmware_restrict_branch_speculation_end(); \
- kernel_fpu_end(); \
-})
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \
+ __efi_arg_sentinel(9), __efi_arg_sentinel(8), \
+ __efi_arg_sentinel(7), __efi_arg_sentinel(6), \
+ __efi_arg_sentinel(5), __efi_arg_sentinel(4), \
+ __efi_arg_sentinel(3), __efi_arg_sentinel(2), \
+ __efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, ...) \
+ __take_second_arg(n, \
+ ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 10; }))
+#define __efi_arg_sentinel(n) , n
/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
*/
-#define arch_efi_call_virt(p, f, args...) \
-({ \
- ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \
+
+#define __efi_nargs_check(f, n, ...) \
+ __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({ \
+ BUILD_BUG_ON_MSG( \
+ (p) > (n), \
+ #f " called with too many arguments (" #p ">" #n ")"); \
})
-#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
+static inline void efi_fpu_begin(void)
+{
+ /*
+ * The UEFI calling convention (UEFI spec 2.3.2 and 2.3.4) requires
+ * that FCW and MXCSR (64-bit) must be initialized prior to calling
+ * UEFI code. (Oddly the spec does not require that the FPU stack
+ * be empty.)
+ */
+ kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+}
-#else /* !CONFIG_X86_32 */
+static inline void efi_fpu_end(void)
+{
+ kernel_fpu_end();
+}
-#define EFI_LOADER_SIGNATURE "EL64"
+#ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
+#else /* !CONFIG_X86_32 */
+#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
-#define efi_call_phys(f, args...) efi_call((f), args)
+extern bool efi_disable_ibt_for_runtime;
-/*
- * struct efi_scratch - Scratch space used while switching to/from efi_mm
- * @phys_stack: stack used during EFI Mixed Mode
- * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm
- */
-struct efi_scratch {
- u64 phys_stack;
- struct mm_struct *prev_mm;
-} __packed;
-
-#define arch_efi_call_virt_setup() \
-({ \
- efi_sync_low_kernel_mappings(); \
- kernel_fpu_begin(); \
- firmware_restrict_branch_speculation_start(); \
- \
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
- efi_switch_mm(&efi_mm); \
+#define efi_call(...) ({ \
+ __efi_nargs_check(efi_call, 7, __VA_ARGS__); \
+ __efi_call(__VA_ARGS__); \
})
-#define arch_efi_call_virt(p, f, args...) \
- efi_call((void *)p->f, args) \
-
-#define arch_efi_call_virt_teardown() \
-({ \
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
- efi_switch_mm(efi_scratch.prev_mm); \
- \
- firmware_restrict_branch_speculation_end(); \
- kernel_fpu_end(); \
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) ({ \
+ u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \
+ ret = efi_call((void *)p->f, args); \
+ ibt_restore(ibt); \
+ ret; \
})
-extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
- u32 type, u64 attribute);
-
#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
@@ -118,138 +125,306 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
-extern struct efi_scratch efi_scratch;
-extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_print_memmap(void);
-extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern int __init efi_alloc_page_tables(void);
extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void __init old_map_region(efi_memory_desc_t *md);
-extern void __init runtime_code_page_mkexec(void);
extern void __init efi_runtime_update_mappings(void);
extern void __init efi_dump_pagetable(void);
extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
-extern void efi_switch_mm(struct mm_struct *mm);
-extern void efi_recover_from_page_fault(unsigned long phys_addr);
+extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
-struct efi_setup_data {
- u64 fw_vendor;
- u64 runtime;
- u64 tables;
- u64 smbios;
- u64 reserved[8];
-};
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
extern u64 efi_setup;
#ifdef CONFIG_EFI
+extern u64 __efi64_thunk(u32, ...);
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({ \
+ u64 __pad[3]; /* must have space for 3 args on the stack */ \
+ __efi_nargs_check(efi64_thunk, 9, __VA_ARGS__); \
+ __efi64_thunk(__VA_ARGS__, __pad); \
+})
+
+static inline bool efi_is_mixed(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return false;
+ return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
}
static inline bool efi_runtime_supported(void)
{
- if (efi_is_native())
+ if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
return true;
- if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
- return true;
-
- return false;
+ return IS_ENABLED(CONFIG_EFI_MIXED);
}
-extern struct console early_efi_console;
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+extern void efi_thunk_runtime_setup(void);
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys);
+
+/* arch specific definitions used by the stub code */
#ifdef CONFIG_EFI_MIXED
-extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
+
+#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
+#define ARCH_HAS_EFISTUB_WRAPPERS
+
+static inline bool efi_is_64bit(void)
{
- return EFI_SUCCESS;
+ extern const bool efi_is64;
+
+ return efi_is64;
}
-#endif /* CONFIG_EFI_MIXED */
+static inline bool efi_is_native(void)
+{
+ return efi_is_64bit();
+}
-/* arch specific definitions used by the stub code */
+#define efi_table_attr(inst, attr) \
+ (efi_is_native() ? (inst)->attr \
+ : efi_mixed_table_attr((inst), attr))
-struct efi_config {
- u64 image_handle;
- u64 table;
- u64 runtime_services;
- u64 boot_services;
- u64 text_output;
- efi_status_t (*call)(unsigned long, ...);
- bool is64;
-} __packed;
+#define efi_mixed_table_attr(inst, attr) \
+ (__typeof__(inst->attr)) \
+ _Generic(inst->mixed_mode.attr, \
+ u32: (unsigned long)(inst->mixed_mode.attr), \
+ default: (inst->mixed_mode.attr))
-__pure const struct efi_config *__efi_early(void);
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * free_pages(addr, size)
+ * must be translated to
+ * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
-static inline bool efi_is_64bit(void)
+static inline void *efi64_zero_upper(void *p)
{
- if (!IS_ENABLED(CONFIG_X86_64))
- return false;
+ if (p)
+ ((u32 *)p)[1] = 0;
+ return p;
+}
- if (!IS_ENABLED(CONFIG_EFI_MIXED))
- return true;
+static inline u32 efi64_convert_status(efi_status_t status)
+{
+ return (u32)(status | (u64)status >> 32);
+}
- return __efi_early()->is64;
+#define __efi64_split(val) (val) & U32_MAX, (u64)(val) >> 32
+
+#define __efi64_argmap_free_pages(addr, size) \
+ ((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \
+ ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer) \
+ ((type), (size), efi64_zero_upper(buffer))
+
+#define __efi64_argmap_locate_handle_buffer(type, proto, key, num, buf) \
+ ((type), (proto), (key), efi64_zero_upper(num), efi64_zero_upper(buf))
+
+#define __efi64_argmap_create_event(type, tpl, f, c, event) \
+ ((type), (tpl), (f), (c), efi64_zero_upper(event))
+
+#define __efi64_argmap_set_timer(event, type, time) \
+ ((event), (type), lower_32_bits(time), upper_32_bits(time))
+
+#define __efi64_argmap_wait_for_event(num, event, index) \
+ ((num), (event), efi64_zero_upper(index))
+
+#define __efi64_argmap_handle_protocol(handle, protocol, interface) \
+ ((handle), (protocol), efi64_zero_upper(interface))
+
+#define __efi64_argmap_locate_protocol(protocol, reg, interface) \
+ ((protocol), (reg), efi64_zero_upper(interface))
+
+#define __efi64_argmap_locate_device_path(protocol, path, handle) \
+ ((protocol), (path), efi64_zero_upper(handle))
+
+#define __efi64_argmap_exit(handle, status, size, data) \
+ ((handle), efi64_convert_status(status), (size), (data))
+
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \
+ ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \
+ efi64_zero_upper(dev), efi64_zero_upper(func))
+
+/* LoadFile */
+#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \
+ ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf))
+
+/* Graphics Output Protocol */
+#define __efi64_argmap_query_mode(gop, mode, size, info) \
+ ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info))
+
+/* TCG2 protocol */
+#define __efi64_argmap_hash_log_extend_event(prot, fl, addr, size, ev) \
+ ((prot), (fl), 0ULL, (u64)(addr), 0ULL, (u64)(size), 0ULL, ev)
+
+/* DXE services */
+#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
+ (__efi64_split(phys), (desc))
+
+#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
+ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+/* file protocol */
+#define __efi64_argmap_open(prot, newh, fname, mode, attr) \
+ ((prot), efi64_zero_upper(newh), (fname), __efi64_split(mode), \
+ __efi64_split(attr))
+
+#define __efi64_argmap_set_position(pos) (__efi64_split(pos))
+
+/* file system protocol */
+#define __efi64_argmap_open_volume(prot, file) \
+ ((prot), efi64_zero_upper(file))
+
+/* Memory Attribute Protocol */
+#define __efi64_argmap_get_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), (flags))
+
+#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
+ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+/* EFI SMBIOS protocol */
+#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \
+ ((protocol), (smbioshandle), (type), efi64_zero_upper(record), \
+ efi64_zero_upper(phandle))
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
+
+#define __efi64_thunk_map(inst, func, ...) \
+ efi64_thunk(inst->mixed_mode.func, \
+ __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \
+ (__VA_ARGS__)))
+
+#define __efi64_argmap(mapped, args) \
+ __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
+
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
+
+static inline efi_status_t __efi64_widen_efi_status(u64 status)
+{
+ /* use rotate to move the value of bit #31 into position #63 */
+ return ror64(rol32(status, 1), 1);
}
-#define efi_table_attr(table, attr, instance) \
- (efi_is_64bit() ? \
- ((table##_64_t *)(unsigned long)instance)->attr : \
- ((table##_32_t *)(unsigned long)instance)->attr)
+/* The macro below handles dispatching via the thunk if needed */
-#define efi_call_proto(protocol, f, instance, ...) \
- __efi_early()->call(efi_table_attr(protocol, f, instance), \
- instance, ##__VA_ARGS__)
+#define efi_fn_call(inst, func, ...) \
+ (efi_is_native() ? (inst)->func(__VA_ARGS__) \
+ : efi_mixed_call((inst), func, ##__VA_ARGS__))
-#define efi_call_early(f, ...) \
- __efi_early()->call(efi_table_attr(efi_boot_services, f, \
- __efi_early()->boot_services), __VA_ARGS__)
+#define efi_mixed_call(inst, func, ...) \
+ _Generic(inst->func(__VA_ARGS__), \
+ efi_status_t: \
+ __efi64_widen_efi_status( \
+ __efi64_thunk_map(inst, func, ##__VA_ARGS__)), \
+ u64: ({ BUILD_BUG(); ULONG_MAX; }), \
+ default: \
+ (__typeof__(inst->func(__VA_ARGS__))) \
+ __efi64_thunk_map(inst, func, ##__VA_ARGS__))
-#define __efi_call_early(f, ...) \
- __efi_early()->call((unsigned long)f, __VA_ARGS__);
+#else /* CONFIG_EFI_MIXED */
+
+static inline bool efi_is_64bit(void)
+{
+ return IS_ENABLED(CONFIG_X86_64);
+}
-#define efi_call_runtime(f, ...) \
- __efi_early()->call(efi_table_attr(efi_runtime_services, f, \
- __efi_early()->runtime_services), __VA_ARGS__)
+#endif /* CONFIG_EFI_MIXED */
extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
+extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void)
{
return false;
}
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+ return false;
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
#endif /* CONFIG_EFI */
+extern int __init efi_memmap_alloc(unsigned int num_entries,
+ struct efi_memory_map_data *data);
+
+extern int __init efi_memmap_install(struct efi_memory_map_data *data);
+extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+ struct range *range);
+extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
+ void *buf, struct efi_mem_range *mem);
+
+extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void);
+
+#define arch_ima_efi_boot_mode __x86_ima_efi_boot_mode()
+
+#ifdef CONFIG_EFI_RUNTIME_MAP
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
+#else
+static inline int efi_get_runtime_map_size(void)
+{
+ return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+ return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ return 0;
+}
+
+#endif
+
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 69c0f892e310..6c8fdc96be7e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -7,6 +7,7 @@
*/
#include <linux/thread_info.h>
+#include <asm/ia32.h>
#include <asm/ptrace.h>
#include <asm/user.h>
#include <asm/auxvec.h>
@@ -21,8 +22,6 @@ typedef struct user_i387_struct elf_fpregset_t;
#ifdef __i386__
-typedef struct user_fxsr_struct elf_fpxregset_t;
-
#define R_386_NONE 0
#define R_386_32 1
#define R_386_PC32 2
@@ -55,8 +54,9 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
#define R_X86_64_RELATIVE 8 /* Adjust by program base */
-#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative
- offset to GOT */
+#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */
+#define R_X86_64_GOTPCRELX 41
+#define R_X86_64_REX_GOTPCRELX 42
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
#define R_X86_64_16 12 /* Direct 16 bit zero extended */
@@ -76,12 +76,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#include <asm/vdso.h>
-#ifdef CONFIG_X86_64
extern unsigned int vdso64_enabled;
-#endif
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
extern unsigned int vdso32_enabled;
-#endif
/*
* This is used to ensure we don't load something for the wrong architecture.
@@ -118,7 +114,7 @@ extern unsigned int vdso32_enabled;
* now struct_user_regs, they are different)
*/
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \
@@ -130,6 +126,7 @@ do { \
pr_reg[7] = regs->ds; \
pr_reg[8] = regs->es; \
pr_reg[9] = regs->fs; \
+ savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs; \
@@ -138,18 +135,6 @@ do { \
pr_reg[16] = regs->ss; \
} while (0);
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- pr_reg[10] = get_user_gs(regs); \
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- savesegment(gs, pr_reg[10]); \
-} while (0);
-
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
@@ -162,13 +147,9 @@ do { \
((x)->e_machine == EM_X86_64)
#define compat_elf_check_arch(x) \
- (elf_check_arch_ia32(x) || \
+ ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
(IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
-#if __USER32_DS != __USER_DS
-# error "The following code assumes __USER32_DS == __USER_DS"
-#endif
-
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
@@ -188,8 +169,9 @@ static inline void elf_common_init(struct thread_struct *t,
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
elf_common_init(&current->thread, regs, __USER_DS)
-void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
-#define compat_start_thread compat_start_thread
+void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32);
+#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \
+ compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64)
void set_personality_ia32(bool);
#define COMPAT_SET_PERSONALITY(ex) \
@@ -238,7 +220,6 @@ do { \
/* I'm not sure if we can use '-' here */
#define ELF_PLATFORM ("x86_64")
extern void set_personality_64bit(void);
-extern unsigned int sysctl_vsyscall32;
extern int force_personality32;
#endif /* !CONFIG_X86_32 */
@@ -281,9 +262,29 @@ extern u32 elf_hwcap2;
/*
* An executable for which elf_read_implies_exec() returns TRUE will
* have the READ_IMPLIES_EXEC personality flag set automatically.
+ *
+ * The decision process for determining the results are:
+ *
+ * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 |
+ * ELF: | | | |
+ * ---------------------|------------|------------------|----------------|
+ * missing PT_GNU_STACK | exec-all | exec-all | exec-none |
+ * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack |
+ * PT_GNU_STACK == RW | exec-none | exec-none | exec-none |
+ *
+ * exec-all : all PROT_READ user mappings are executable, except when
+ * backed by files on a noexec-filesystem.
+ * exec-none : only PROT_EXEC user mappings are executable.
+ * exec-stack: only the stack and PROT_EXEC user mappings are executable.
+ *
+ * *this column has no architectural effect: NX markings are ignored by
+ * hardware, but may have behavioral effects when "wants X" collides with
+ * "cannot be X" constraints in memory permission flags, as in
+ * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com
+ *
*/
#define elf_read_implies_exec(ex, executable_stack) \
- (executable_stack != EXSTACK_DISABLE_X)
+ (mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT)
struct task_struct;
@@ -293,6 +294,7 @@ do { \
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
} \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/*
@@ -309,6 +311,7 @@ extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern unsigned long get_sigframe_size(void);
#ifdef CONFIG_X86_32
@@ -330,6 +333,7 @@ do { \
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
@@ -338,14 +342,15 @@ do { \
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
#define AT_SYSINFO 32
#define COMPAT_ARCH_DLINFO \
-if (test_thread_flag(TIF_X32)) \
+if (exec->e_machine == EM_X86_64) \
ARCH_DLINFO_X32; \
-else \
+else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \
ARCH_DLINFO_IA32
#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
@@ -364,8 +369,12 @@ struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp);
-#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
+ int uses_interp, bool x32);
+#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \
+ compat_arch_setup_additional_pages(bprm, interpreter, \
+ (ex->e_machine == EM_X86_64))
+
+extern bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs);
/* Do not change the values. See get_align_mask() */
enum align_flags {
@@ -380,5 +389,4 @@ struct va_alignment {
} ____cacheline_aligned;
extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/elfcore-compat.h b/arch/x86/include/asm/elfcore-compat.h
new file mode 100644
index 000000000000..f1b6c7a8d8fc
--- /dev/null
+++ b/arch/x86/include/asm/elfcore-compat.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_ELFCORE_COMPAT_H
+#define _ASM_X86_ELFCORE_COMPAT_H
+
+#include <asm/user32.h>
+
+/*
+ * On amd64 we have two 32bit ABIs - i386 and x32. The latter
+ * has bigger registers, so we use it for compat_elf_regset_t.
+ * The former uses i386_elf_prstatus and PRSTATUS_SIZE/SET_PR_FPVALID
+ * are used to choose the size and location of ->pr_fpvalid of
+ * the layout actually used.
+ */
+typedef struct user_regs_struct compat_elf_gregset_t;
+
+struct i386_elf_prstatus
+{
+ struct compat_elf_prstatus_common common;
+ struct user_regs_struct32 pr_reg;
+ compat_int_t pr_fpvalid;
+};
+
+#define PRSTATUS_SIZE \
+ (user_64bit_mode(task_pt_regs(current)) \
+ ? sizeof(struct compat_elf_prstatus) \
+ : sizeof(struct i386_elf_prstatus))
+#define SET_PR_FPVALID(S) \
+ (*(user_64bit_mode(task_pt_regs(current)) \
+ ? &(S)->pr_fpvalid \
+ : &((struct i386_elf_prstatus *)(S))->pr_fpvalid) = 1)
+
+#endif
diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h
new file mode 100644
index 000000000000..70f5b98a5286
--- /dev/null
+++ b/arch/x86/include/asm/emulate_prefix.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EMULATE_PREFIX_H
+#define _ASM_X86_EMULATE_PREFIX_H
+
+/*
+ * Virt escape sequences to trigger instruction emulation;
+ * ideally these would decode to 'whole' instruction and not destroy
+ * the instruction stream; sadly this is not true for the 'kvm' one :/
+ */
+
+#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
+#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
+
+#endif
diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
new file mode 100644
index 000000000000..b1314e41a744
--- /dev/null
+++ b/arch/x86/include/asm/enclu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ENCLU_H
+#define _ASM_X86_ENCLU_H
+
+#define EENTER 0x02
+#define ERESUME 0x03
+#define EEXIT 0x04
+
+#endif /* _ASM_X86_ENCLU_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
new file mode 100644
index 000000000000..ce3eb6d5fdf9
--- /dev/null
+++ b/arch/x86/include/asm/entry-common.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_ENTRY_COMMON_H
+#define _ASM_X86_ENTRY_COMMON_H
+
+#include <linux/randomize_kstack.h>
+#include <linux/user-return-notifier.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/io_bitmap.h>
+#include <asm/fpu/api.h>
+#include <asm/fred.h>
+
+/* Check that the stack and regs on entry from user mode are sane. */
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
+ /*
+ * Make sure that the entry code gave us a sensible EFLAGS
+ * register. Native because we want to check the actual CPU
+ * state, not the interrupt state as imagined by Xen.
+ */
+ unsigned long flags = native_save_fl();
+ unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+ /*
+ * For !SMAP hardware we patch out CLAC on entry.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SMAP) ||
+ cpu_feature_enabled(X86_FEATURE_XENPV))
+ mask |= X86_EFLAGS_AC;
+
+ WARN_ON_ONCE(flags & mask);
+
+ /* We think we came from user mode. Make sure pt_regs agrees. */
+ WARN_ON_ONCE(!user_mode(regs));
+
+ /*
+ * All entries from user mode (except #DF) should be on the
+ * normal thread stack and should have user pt_regs in the
+ * correct location.
+ */
+ WARN_ON_ONCE(!on_thread_stack());
+ WARN_ON_ONCE(regs != task_pt_regs(current));
+ }
+}
+#define arch_enter_from_user_mode arch_enter_from_user_mode
+
+static inline void arch_exit_work(unsigned long ti_work)
+{
+ if (ti_work & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
+
+ if (unlikely(ti_work & _TIF_IO_BITMAP))
+ tss_update_io_bitmap();
+
+ if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ fpregs_assert_state_consistent();
+
+ if (unlikely(ti_work))
+ arch_exit_work(ti_work);
+
+ fred_update_rsp0();
+
+#ifdef CONFIG_COMPAT
+ /*
+ * Compat syscalls set TS_COMPAT. Make sure we clear it before
+ * returning to user mode. We need to clear it *after* signal
+ * handling, because syscall restart has a fixup for compat
+ * syscalls. The fixup is exercised by the ptrace_syscall_32
+ * selftest.
+ *
+ * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
+ */
+ current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
+#endif
+
+ /*
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
+ * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
+ * low bits from any entropy chosen here.
+ *
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
+ */
+ choose_random_kstack_offset(rdtsc());
+
+ /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
+ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
+ this_cpu_read(x86_ibpb_exit_to_user)) {
+ indirect_branch_prediction_barrier();
+ this_cpu_write(x86_ibpb_exit_to_user, false);
+ }
+}
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+ amd_clear_divider();
+}
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
+#endif
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
deleted file mode 100644
index 416422762845..000000000000
--- a/arch/x86/include/asm/entry_arch.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file is designed to contain the BUILD_INTERRUPT specifications for
- * all of the extra named interrupt vectors used by the architecture.
- * Usually this is the Inter Process Interrupts (IPIs)
- */
-
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-#ifdef CONFIG_SMP
-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
-#endif
-
-#ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
-#endif
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-
-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
-#ifdef CONFIG_IRQ_WORK
-BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
-#endif
-#endif
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b..a0e0c6b50155 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -1,12 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EXTABLE_H
#define _ASM_X86_EXTABLE_H
+
+#include <asm/extable_fixup_types.h>
+
/*
- * The exception table consists of triples of addresses relative to the
- * exception table entry itself. The first address is of an instruction
- * that is allowed to fault, the second is the target at which the program
- * should continue. The third is a handler function to deal with the fault
- * caused by the instruction in the first field.
+ * The exception table consists of two addresses relative to the
+ * exception table entry itself and a type selector field.
+ *
+ * The first address is of an instruction that is allowed to fault, the
+ * second is the target at which the program should continue.
+ *
+ * The type entry is used by fixup_exception() to select the handler to
+ * deal with the fault caused by the instruction in the first field.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -15,7 +21,7 @@
*/
struct exception_table_entry {
- int insn, fixup, handler;
+ int insn, fixup, data;
};
struct pt_regs;
@@ -25,14 +31,30 @@ struct pt_regs;
do { \
(a)->fixup = (b)->fixup + (delta); \
(b)->fixup = (tmp).fixup - (delta); \
- (a)->handler = (b)->handler + (delta); \
- (b)->handler = (tmp).handler - (delta); \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
} while (0)
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
-extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
+#ifdef CONFIG_X86_MCE
+extern void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
+#else
+static inline void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
+{
+ for (;;)
+ cpu_relax();
+}
+#endif
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs);
+#else
+static inline bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs) { return false; }
+#endif
+
#endif
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
new file mode 100644
index 000000000000..906b0d5541e8
--- /dev/null
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
+#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
+
+/*
+ * Our IMM is signed, as such it must live at the top end of the word. Also,
+ * since C99 hex constants are of ambiguous type, force cast the mask to 'int'
+ * so that FIELD_GET() will DTRT and sign extend the value when it extracts it.
+ */
+#define EX_DATA_TYPE_MASK ((int)0x000000FF)
+#define EX_DATA_REG_MASK ((int)0x00000F00)
+#define EX_DATA_FLAG_MASK ((int)0x0000F000)
+#define EX_DATA_IMM_MASK ((int)0xFFFF0000)
+
+#define EX_DATA_REG_SHIFT 8
+#define EX_DATA_FLAG_SHIFT 12
+#define EX_DATA_IMM_SHIFT 16
+
+#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT)
+#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT)
+#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT)
+
+/* segment regs */
+#define EX_REG_DS EX_DATA_REG(8)
+#define EX_REG_ES EX_DATA_REG(9)
+#define EX_REG_FS EX_DATA_REG(10)
+#define EX_REG_GS EX_DATA_REG(11)
+
+/* flags */
+#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1)
+#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2)
+#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3)
+
+/* types */
+#define EX_TYPE_NONE 0
+#define EX_TYPE_DEFAULT 1
+#define EX_TYPE_FAULT 2
+#define EX_TYPE_UACCESS 3
+/* unused, was: #define EX_TYPE_COPY 4 */
+#define EX_TYPE_CLEAR_FS 5
+#define EX_TYPE_FPU_RESTORE 6
+#define EX_TYPE_BPF 7
+#define EX_TYPE_WRMSR 8
+#define EX_TYPE_RDMSR 9
+#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */
+#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */
+#define EX_TYPE_WRMSR_IN_MCE 12
+#define EX_TYPE_RDMSR_IN_MCE 13
+#define EX_TYPE_DEFAULT_MCE_SAFE 14
+#define EX_TYPE_FAULT_MCE_SAFE 15
+
+#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */
+#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0))
+
+#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */
+#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT))
+#define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0))
+#define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1))
+
+#define EX_TYPE_FAULT_SGX 18
+
+#define EX_TYPE_UCOPY_LEN 19 /* cx := reg + imm*cx */
+#define EX_TYPE_UCOPY_LEN1 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1))
+#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
+#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
+
+#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
+
+#define EX_TYPE_ERETU 21
+
+#endif
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
deleted file mode 100644
index ab4c960146e3..000000000000
--- a/arch/x86/include/asm/fb.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
- unsigned long off)
-{
- unsigned long prot;
-
- prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
- if (boot_cpu_data.x86 > 3)
- pgprot_val(vma->vm_page_prot) =
- prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
-}
-
-extern int fb_is_primary_device(struct fb_info *info);
-
-#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37..4519c9f35ba0 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,24 +14,30 @@
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
+#include <asm/kmap_size.h>
+
/*
* Exposed to assembly code for setting up initial page tables. Cannot be
* calculated in assembly code (fixmap entries are an enum), but is sanity
* checked in the actual fixmap C code to make sure that the fixmap is
* covered fully.
*/
-#define FIXMAP_PMD_NUM 2
+#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+# define FIXMAP_PMD_NUM 2
+#else
+# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512))
+# define FIXMAP_PMD_NUM (KM_PMDS + 2)
+#endif
/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
#define FIXMAP_PMD_TOP 507
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
-#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm/pgtable_types.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
-#include <asm/kmap_types.h>
#else
#include <uapi/asm/vsyscall.h>
#endif
@@ -42,8 +48,7 @@
* Because of this, FIXADDR_TOP x86 integration was left as later work.
*/
#ifdef CONFIG_X86_32
-/* used by vmalloc.c, vsyscall.lds.S.
- *
+/*
* Leave one empty page between vmalloc'ed areas and
* the start of the fixmap.
*/
@@ -93,21 +98,16 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_KMAP_LOCAL
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+ FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
#ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
#endif
#endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
FIX_PARAVIRT_BOOTMAP,
#endif
- FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
- FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
-#ifdef CONFIG_X86_INTEL_MID
- FIX_LNW_VRTC,
-#endif
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
@@ -122,7 +122,7 @@ enum fixed_addresses {
* before ioremap() is functional.
*
* If necessary we round it up to the next 512 pages boundary so
- * that we can have a single pgd entry and a single pte table:
+ * that we can have a single pmd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 8
@@ -154,12 +154,10 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set;
-extern pte_t *kmap_pte;
-#define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags);
#ifndef CONFIG_PARAVIRT_XXL
@@ -198,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
void __early_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 7ec59edde154..e7a244051c62 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -10,6 +10,7 @@
#ifndef _ASM_X86_FLOPPY_H
#define _ASM_X86_FLOPPY_H
+#include <linux/sizes.h>
#include <linux/vmalloc.h>
/*
@@ -22,17 +23,14 @@
*/
#define _CROSS_64KB(a, s, vdma) \
(!(vdma) && \
- ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1)
-
+ ((unsigned long)(a) / SZ_64K != ((unsigned long)(a) + (s) - 1) / SZ_64K))
#define SW fd_routine[use_virtual_dma & 1]
#define CSW fd_routine[can_use_virtual_dma & 1]
-#define fd_inb(port) inb_p(port)
-#define fd_outb(value, port) outb_p(value, port)
+#define fd_inb(base, reg) inb_p((base) + (reg))
+#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy")
#define fd_free_dma() CSW._free_dma(FLOPPY_DMA)
@@ -74,28 +72,28 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
int lcount;
char *lptr;
- st = 1;
for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
lcount; lcount--, lptr++) {
- st = inb(virtual_dma_port + 4) & 0xa0;
- if (st != 0xa0)
+ st = inb(virtual_dma_port + FD_STATUS);
+ st &= STATUS_DMA | STATUS_READY;
+ if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
- outb_p(*lptr, virtual_dma_port + 5);
+ outb_p(*lptr, virtual_dma_port + FD_DATA);
else
- *lptr = inb_p(virtual_dma_port + 5);
+ *lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
- st = inb(virtual_dma_port + 4);
+ st = inb(virtual_dma_port + FD_STATUS);
}
#ifdef TRACE_FLPY_INT
calls++;
#endif
- if (st == 0x20)
+ if (st == STATUS_DMA)
return IRQ_HANDLED;
- if (!(st & 0x20)) {
+ if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count = 0;
#ifdef TRACE_FLPY_INT
@@ -206,7 +204,7 @@ static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
{
#ifdef FLOPPY_SANITY_CHECK
- if (CROSS_64KB(addr, size)) {
+ if (_CROSS_64KB(addr, size, use_virtual_dma & 1)) {
printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
return -1;
}
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b56d504af654..cd6f194a912b 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -10,17 +10,94 @@
#ifndef _ASM_X86_FPU_API_H
#define _ASM_X86_FPU_API_H
+#include <linux/bottom_half.h>
+
+#include <asm/fpu/types.h>
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
- * disables preemption so be careful if you intend to use it for long periods
- * of time.
- * If you intend to use the FPU in softirq you need to check first with
- * irq_fpu_usable() if it is possible.
+ * disables preemption and softirq processing, so be careful if you intend to
+ * use it for long periods of time. Kernel-mode FPU cannot be used in all
+ * contexts -- see irq_fpu_usable() for details.
*/
-extern void kernel_fpu_begin(void);
+
+/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */
+#define KFPU_387 _BITUL(0) /* 387 state will be initialized */
+#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */
+
+extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
+extern void fpregs_mark_activate(void);
+
+/* Code that is unaware of kernel_fpu_begin_mask() can use this */
+static inline void kernel_fpu_begin(void)
+{
+#ifdef CONFIG_X86_64
+ /*
+ * Any 64-bit code that uses 387 instructions must explicitly request
+ * KFPU_387.
+ */
+ kernel_fpu_begin_mask(KFPU_MXCSR);
+#else
+ /*
+ * 32-bit kernel code may use 387 operations as well as SSE2, etc,
+ * as long as it checks that the CPU has the required capability.
+ */
+ kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+#endif
+}
+
+/*
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate, or while
+ * using the FPU in kernel mode. A context switch will (and softirq might) save
+ * CPU's FPU registers to fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving
+ * CPU's FPU registers in a random state.
+ *
+ * local_bh_disable() protects against both preemption and soft interrupts
+ * on !RT kernels.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
+ */
+static inline void fpregs_lock(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
+}
+
+static inline void fpregs_unlock(void)
+{
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
+}
+
+/*
+ * FPU state gets lazily restored before returning to userspace. So when in the
+ * kernel, the valid FPU state may be kept in the buffer. This function will force
+ * restore all the fpu state to the registers early if needed, and lock them from
+ * being automatically saved/restored. Then FPU state can be modified safely in the
+ * registers, before unlocking with fpregs_unlock().
+ */
+void fpregs_lock_and_load(void);
+
+#ifdef CONFIG_X86_DEBUG_FPU
+extern void fpregs_assert_state_consistent(void);
+#else
+static inline void fpregs_assert_state_consistent(void) { }
+#endif
+
+/*
+ * Load the task FPU state before returning to userspace.
+ */
+extern void switch_fpu_return(void);
/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
@@ -31,4 +108,73 @@ extern bool irq_fpu_usable(void);
*/
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
+/* Trap handling */
+extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern void fpu_sync_fpstate(struct fpu *fpu);
+extern void fpu_reset_from_exception_fixup(void);
+
+/* Boot, hotplug and resume */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system(void);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+
+/* State tracking */
+DECLARE_PER_CPU(bool, kernel_fpu_allowed);
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/* Process cleanup */
+#ifdef CONFIG_X86_64
+extern void fpstate_free(struct fpu *fpu);
+#else
+static inline void fpstate_free(struct fpu *fpu) { }
+#endif
+
+/* fpstate-related functions which are exported to KVM */
+extern void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature);
+
+extern u64 xstate_get_guest_group_perm(void);
+
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+
+/* KVM specific functions */
+extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
+extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
+extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
+extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);
+
+#ifdef CONFIG_X86_64
+extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+extern void fpu_sync_guest_vmexit_xfd_state(void);
+#else
+static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+#endif
+
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u64 xfeatures, u32 pkru);
+extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+
+static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+{
+ gfpu->fpstate->is_confidential = true;
+}
+
+static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
+{
+ return gfpu->fpstate->is_confidential;
+}
+
+/* prctl */
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
+
+extern void fpu_idle_fpregs(void);
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644
index fa2c93cb42a2..000000000000
--- a/arch/x86/include/asm/fpu/internal.h
+++ /dev/null
@@ -1,610 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_FPU_INTERNAL_H
-#define _ASM_X86_FPU_INTERNAL_H
-
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/user.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/xstate.h>
-#include <asm/cpufeature.h>
-#include <asm/trace/fpu.h>
-
-/*
- * High level FPU state handling functions:
- */
-extern void fpu__initialize(struct fpu *fpu);
-extern void fpu__prepare_read(struct fpu *fpu);
-extern void fpu__prepare_write(struct fpu *fpu);
-extern void fpu__save(struct fpu *fpu);
-extern void fpu__restore(struct fpu *fpu);
-extern int fpu__restore_sig(void __user *buf, int ia32_frame);
-extern void fpu__drop(struct fpu *fpu);
-extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
-extern void fpu__clear(struct fpu *fpu);
-extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
-extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
-
-/*
- * Boot time FPU initialization functions:
- */
-extern void fpu__init_cpu(void);
-extern void fpu__init_system_xstate(void);
-extern void fpu__init_cpu_xstate(void);
-extern void fpu__init_system(struct cpuinfo_x86 *c);
-extern void fpu__init_check_bugs(void);
-extern void fpu__resume_cpu(void);
-extern u64 fpu__get_supported_xfeatures_mask(void);
-
-/*
- * Debugging facility:
- */
-#ifdef CONFIG_X86_DEBUG_FPU
-# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
-#else
-# define WARN_ON_FPU(x) ({ (void)(x); 0; })
-#endif
-
-/*
- * FPU related CPU feature flag helper routines:
- */
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has(X86_FEATURE_FXSR);
-}
-
-/*
- * fpstate handling functions:
- */
-
-extern union fpregs_state init_fpstate;
-
-extern void fpstate_init(union fpregs_state *state);
-#ifdef CONFIG_MATH_EMULATION
-extern void fpstate_init_soft(struct swregs_state *soft);
-#else
-static inline void fpstate_init_soft(struct swregs_state *soft) {}
-#endif
-
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
-{
- /*
- * XRSTORS requires these bits set in xcomp_bv, or it will
- * trigger #GP:
- */
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
-}
-
-static inline void fpstate_init_fxstate(struct fxregs_state *fx)
-{
- fx->cwd = 0x37f;
- fx->mxcsr = MXCSR_DEFAULT;
-}
-extern void fpstate_sanitize_xstate(struct fpu *fpu);
-
-#define user_insn(insn, output, input...) \
-({ \
- int err; \
- \
- might_fault(); \
- \
- asm volatile(ASM_STAC "\n" \
- "1:" #insn "\n\t" \
- "2: " ASM_CLAC "\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-#define kernel_insn(insn, output, input...) \
- asm volatile("1:" #insn "\n\t" \
- "2:\n" \
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
- : output : input)
-
-static inline int copy_fregs_to_user(struct fregs_state __user *fx)
-{
- return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
- return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
-}
-
-static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32)) {
- kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
- kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- /* See comment in copy_fxregs_to_kernel() below. */
- kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
- }
- }
-}
-
-static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
- return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
-}
-
-static inline void copy_kernel_to_fregs(struct fregs_state *fx)
-{
- kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int copy_user_to_fregs(struct fregs_state __user *fx)
-{
- return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void copy_fxregs_to_kernel(struct fpu *fpu)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
- asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else {
- /* Using "rex64; fxsave %0" is broken because, if the memory
- * operand uses any extended registers for addressing, a second
- * REX prefix will be generated (to the assembler, rex64
- * followed by semicolon is a separate instruction), and hence
- * the 64-bitness is lost.
- *
- * Using "fxsaveq %0" would be the ideal choice, but is only
- * supported starting with gas 2.16.
- *
- * Using, as a workaround, the properly prefixed form below
- * isn't accepted by any binutils version so far released,
- * complaining that the same type of prefix is used twice if
- * an extended register is needed for addressing (fix submitted
- * to mainline 2005-11-21).
- *
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
- *
- * This, however, we can work around by forcing the compiler to
- * select an addressing mode that doesn't require extended
- * registers.
- */
- asm volatile( "rex64/fxsave (%[fx])"
- : "=m" (fpu->state.fxsave)
- : [fx] "R" (&fpu->state.fxsave));
- }
-}
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-#define XSTATE_OP(op, st, lmask, hmask, err) \
- asm volatile("1:" op "\n\t" \
- "xor %[err], %[err]\n" \
- "2:\n\t" \
- ".pushsection .fixup,\"ax\"\n\t" \
- "3: movl $-2,%[err]\n\t" \
- "jmp 2b\n\t" \
- ".popsection\n\t" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
- *
- * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
- * supports modified optimization which is not supported by XSAVE.
- *
- * We use XSAVE as a fallback.
- *
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
- * original instruction which gets replaced. We need to use it here as the
- * address of the instruction where we might get an exception at.
- */
-#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile(ALTERNATIVE_2(XSAVE, \
- XSAVEOPT, X86_FEATURE_XSAVEOPT, \
- XSAVES, X86_FEATURE_XSAVES) \
- "\n" \
- "xor %[err], %[err]\n" \
- "3:\n" \
- ".pushsection .fixup,\"ax\"\n" \
- "4: movl $-2, %[err]\n" \
- "jmp 3b\n" \
- ".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
- : [err] "=r" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
- * XSAVE area format.
- */
-#define XSTATE_XRESTORE(st, lmask, hmask) \
- asm volatile(ALTERNATIVE(XRSTOR, \
- XRSTORS, X86_FEATURE_XSAVES) \
- "\n" \
- "3:\n" \
- _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
- : \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
-{
- u64 mask = -1;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (static_cpu_has(X86_FEATURE_XSAVES))
- XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
- else
- XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
-
- /* We should never fault when copying to a kernel buffer: */
- WARN_ON_FPU(err);
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
-{
- u64 mask = -1;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (static_cpu_has(X86_FEATURE_XSAVES))
- XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
- else
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-
- /*
- * We should never fault when copying from a kernel buffer, and the FPU
- * state we set at boot time should be valid.
- */
- WARN_ON_FPU(err);
-}
-
-/*
- * Save processor xstate to xsave area.
- */
-static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
-{
- u64 mask = -1;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON_FPU(!alternatives_patched);
-
- XSTATE_XSAVE(xstate, lmask, hmask, err);
-
- /* We should never fault when copying to a kernel buffer: */
- WARN_ON_FPU(err);
-}
-
-/*
- * Restore processor xstate from xsave area.
- */
-static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- XSTATE_XRESTORE(xstate, lmask, hmask);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int copy_xregs_to_user(struct xregs_state __user *buf)
-{
- int err;
-
- /*
- * Clear the xsave header first, so that reserved fields are
- * initialized to zero.
- */
- err = __clear_user(&buf->header, sizeof(buf->header));
- if (unlikely(err))
- return -EFAULT;
-
- stac();
- XSTATE_OP(XSAVE, buf, -1, -1, err);
- clac();
-
- return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
-{
- struct xregs_state *xstate = ((__force struct xregs_state *)buf);
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- stac();
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
- clac();
-
- return err;
-}
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact and we can
- * keep registers active.
- *
- * The legacy FNSAVE instruction cleared all FPU state
- * unconditionally, so registers are essentially destroyed.
- * Modern FPU state can be kept in registers, if there are
- * no pending FP exceptions.
- */
-static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
-{
- if (likely(use_xsave())) {
- copy_xregs_to_kernel(&fpu->state.xsave);
- return 1;
- }
-
- if (likely(use_fxsr())) {
- copy_fxregs_to_kernel(fpu);
- return 1;
- }
-
- /*
- * Legacy FPU register saving, FNSAVE always clears FPU registers,
- * so we have to mark them inactive:
- */
- asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
-
- return 0;
-}
-
-static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
-{
- if (use_xsave()) {
- copy_kernel_to_xregs(&fpstate->xsave, mask);
- } else {
- if (use_fxsr())
- copy_kernel_to_fxregs(&fpstate->fxsave);
- else
- copy_kernel_to_fregs(&fpstate->fsave);
- }
-}
-
-static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
-{
- /*
- * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
- * pending. Clear the x87 state here by setting it to fixed values.
- * "m" is a random variable that should be in L1.
- */
- if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
- asm volatile(
- "fnclex\n\t"
- "emms\n\t"
- "fildl %P[addr]" /* set F?P to defined value */
- : : [addr] "m" (fpstate));
- }
-
- __copy_kernel_to_fpregs(fpstate, -1);
-}
-
-extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
-
-/*
- * FPU context switch related helper methods:
- */
-
-DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-
-/*
- * The in-register FPU state for an FPU context on a CPU is assumed to be
- * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
- * matches the FPU.
- *
- * If the FPU register state is valid, the kernel can skip restoring the
- * FPU state from memory.
- *
- * Any code that clobbers the FPU registers or updates the in-memory
- * FPU state for a task MUST let the rest of the kernel know that the
- * FPU registers are no longer valid for this task.
- *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
- * (with preemption disabled), FPU for the current task, or a task that
- * is prevented from running by the current task.
- */
-static inline void __cpu_invalidate_fpregs_state(void)
-{
- __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-}
-
-static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
-{
- fpu->last_cpu = -1;
-}
-
-static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-/*
- * These generally need preemption protection to work,
- * do try to avoid using these on their own:
- */
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- trace_x86_fpu_regs_deactivated(fpu);
-}
-
-static inline void fpregs_activate(struct fpu *fpu)
-{
- this_cpu_write(fpu_fpregs_owner_ctx, fpu);
- trace_x86_fpu_regs_activated(fpu);
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- * - switch_fpu_prepare() saves the old state.
- * This is done within the context of the old process.
- *
- * - switch_fpu_finish() restores the new state as
- * necessary.
- */
-static inline void
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
-{
- if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
- if (!copy_fpregs_to_fpstate(old_fpu))
- old_fpu->last_cpu = -1;
- else
- old_fpu->last_cpu = cpu;
-
- /* But leave fpu_fpregs_owner_ctx! */
- trace_x86_fpu_regs_deactivated(old_fpu);
- } else
- old_fpu->last_cpu = -1;
-}
-
-/*
- * Misc helper functions:
- */
-
-/*
- * Set up the userspace FPU context for the new task, if the task
- * has used the FPU.
- */
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
-{
- bool preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->initialized;
-
- if (preload) {
- if (!fpregs_state_valid(new_fpu, cpu))
- copy_kernel_to_fpregs(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
-}
-
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
- struct fpu *fpu = &current->thread.fpu;
-
- preempt_disable();
- fpregs_activate(fpu);
- preempt_enable();
-}
-
-/*
- * MXCSR and XCR definitions:
- */
-
-extern unsigned int mxcsr_feature_mask;
-
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
-
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
-}
-
-#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
index d5bdffb9d27f..697b77e96025 100644
--- a/arch/x86/include/asm/fpu/regset.h
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -7,11 +7,12 @@
#include <linux/regset.h>
-extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active,
+ ssp_active;
+extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+ xstateregs_get, ssp_get;
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
+ xstateregs_set, ssp_set;
/*
* xstateregs_active == regset_fpregs_active. Please refer to the comment
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
new file mode 100644
index 000000000000..89004f4ca208
--- /dev/null
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_SCHED_H
+#define _ASM_X86_FPU_SCHED_H
+
+#include <linux/sched.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/types.h>
+
+#include <asm/trace/fpu.h>
+
+extern void save_fpregs_to_fpstate(struct fpu *fpu);
+extern void fpu__drop(struct task_struct *tsk);
+extern int fpu_clone(struct task_struct *dst, u64 clone_flags, bool minimal,
+ unsigned long shstk_addr);
+extern void fpu_flush_thread(void);
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if
+ * TIF_NEED_FPU_LOAD is not set. This is done within the context
+ * of the old process.
+ *
+ * Once TIF_NEED_FPU_LOAD is set, it is required to load the
+ * registers before returning to userland or using the content
+ * otherwise.
+ *
+ * The FPU context is only stored/restored for a user task and
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
+ */
+static inline void switch_fpu(struct task_struct *old, int cpu)
+{
+ if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) &&
+ cpu_feature_enabled(X86_FEATURE_FPU) &&
+ !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct fpu *old_fpu = x86_task_fpu(old);
+
+ set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD);
+ save_fpregs_to_fpstate(old_fpu);
+ /*
+ * The save operation preserved register state, so the
+ * fpu_fpregs_owner_ctx is still @old_fpu. Store the
+ * current CPU number in @old_fpu, so the next return
+ * to user space can avoid the FPU register restore
+ * when is returns on the same CPU and still owns the
+ * context. See fpregs_restore_userregs().
+ */
+ old_fpu->last_cpu = cpu;
+
+ trace_x86_fpu_regs_deactivated(old_fpu);
+ }
+}
+
+#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 44bbc39a57b3..eccc75bc9c4f 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -5,30 +5,33 @@
#ifndef _ASM_X86_FPU_SIGNAL_H
#define _ASM_X86_FPU_SIGNAL_H
+#include <linux/compat.h>
+#include <linux/user.h>
+
+#include <asm/fpu/types.h>
+
#ifdef CONFIG_X86_64
# include <uapi/asm/sigcontext.h>
# include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
#else
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
-# define ia32_setup_frame __setup_frame
-# define ia32_setup_rt_frame __setup_rt_frame
#endif
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
+extern void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env);
unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size);
-extern void fpu__init_prepare_fx_sw_frame(void);
+unsigned long fpu__get_fpstate_size(void);
+
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
+extern void fpu__clear_user_states(struct fpu *fpu);
+extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
+extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask);
#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecf..93e99d2583d6 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -2,8 +2,10 @@
/*
* FPU data structures:
*/
-#ifndef _ASM_X86_FPU_H
-#define _ASM_X86_FPU_H
+#ifndef _ASM_X86_FPU_TYPES_H
+#define _ASM_X86_FPU_TYPES_H
+
+#include <asm/page_types.h>
/*
* The legacy x87 FPU state format, as saved by FSAVE and
@@ -114,6 +116,16 @@ enum xfeature {
XFEATURE_Hi16_ZMM,
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
+ XFEATURE_PASID,
+ XFEATURE_CET_USER,
+ XFEATURE_CET_KERNEL,
+ XFEATURE_RSRVD_COMP_13,
+ XFEATURE_RSRVD_COMP_14,
+ XFEATURE_LBR,
+ XFEATURE_RSRVD_COMP_16,
+ XFEATURE_XTILE_CFG,
+ XFEATURE_XTILE_DATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -128,12 +140,26 @@ enum xfeature {
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
+#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER)
+#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL)
+#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
+#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
+#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
+#define XFEATURE_MASK_APX (1 << XFEATURE_APX)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
| XFEATURE_MASK_ZMM_Hi256 \
| XFEATURE_MASK_Hi16_ZMM)
+#ifdef CONFIG_X86_64
+# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \
+ | XFEATURE_MASK_XTILE_CFG)
+#else
+# define XFEATURE_MASK_XTILE (0)
+#endif
+
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
struct reg_128_bit {
@@ -145,6 +171,9 @@ struct reg_256_bit {
struct reg_512_bit {
u8 regbytes[512/8];
};
+struct reg_1024_byte {
+ u8 regbytes[1024];
+};
/*
* State component 2:
@@ -229,6 +258,78 @@ struct pkru_state {
u32 pad;
} __packed;
+/*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+ /* user control-flow settings */
+ u64 user_cet;
+ /* user shadow stack pointer */
+ u64 user_ssp;
+};
+
+/*
+ * State component 12 is Control-flow Enforcement supervisor states.
+ * This state includes SSP pointers for privilege levels 0 through 2.
+ */
+struct cet_supervisor_state {
+ u64 pl0_ssp;
+ u64 pl1_ssp;
+ u64 pl2_ssp;
+} __packed;
+
+/*
+ * State component 15: Architectural LBR configuration state.
+ * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
+ */
+
+struct lbr_entry {
+ u64 from;
+ u64 to;
+ u64 info;
+};
+
+struct arch_lbr_state {
+ u64 lbr_ctl;
+ u64 lbr_depth;
+ u64 ler_from;
+ u64 ler_to;
+ u64 ler_info;
+ struct lbr_entry entries[];
+};
+
+/*
+ * State component 17: 64-byte tile configuration register.
+ */
+struct xtile_cfg {
+ u64 tcfg[8];
+} __packed;
+
+/*
+ * State component 18: 1KB tile data register.
+ * Each register represents 16 64-byte rows of the matrix
+ * data. But the number of registers depends on the actual
+ * implementation.
+ */
+struct xtile_data {
+ struct reg_1024_byte tmm;
+} __packed;
+
+/*
+ * State component 19: 8B extended general purpose register.
+ */
+struct apx_state {
+ u64 egpr[16];
+} __packed;
+
+/*
+ * State component 10 is supervisor state used for context-switching the
+ * PASID state.
+ */
+struct ia32_pasid_state {
+ u64 pasid;
+} __packed;
+
struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
@@ -253,7 +354,7 @@ struct xstate_header {
struct xregs_state {
struct fxregs_state i387;
struct xstate_header header;
- u8 extended_state_area[0];
+ u8 extended_state_area[];
} __attribute__ ((packed, aligned (64)));
/*
@@ -273,6 +374,95 @@ union fpregs_state {
u8 __padding[PAGE_SIZE];
};
+struct fpstate {
+ /* @kernel_size: The size of the kernel register image */
+ unsigned int size;
+
+ /* @user_size: The size in non-compacted UABI format */
+ unsigned int user_size;
+
+ /* @xfeatures: xfeatures for which the storage is sized */
+ u64 xfeatures;
+
+ /* @user_xfeatures: xfeatures valid in UABI buffers */
+ u64 user_xfeatures;
+
+ /* @xfd: xfeatures disabled to trap userspace use. */
+ u64 xfd;
+
+ /* @is_valloc: Indicator for dynamically allocated state */
+ unsigned int is_valloc : 1;
+
+ /* @is_guest: Indicator for guest state (KVM) */
+ unsigned int is_guest : 1;
+
+ /*
+ * @is_confidential: Indicator for KVM confidential mode.
+ * The FPU registers are restored by the
+ * vmentry firmware from encrypted guest
+ * memory. On vmexit the FPU registers are
+ * saved by firmware to encrypted guest memory
+ * and the registers are scrubbed before
+ * returning to the host. So there is no
+ * content which is worth saving and restoring.
+ * The fpstate has to be there so that
+ * preemption and softirq FPU usage works
+ * without special casing.
+ */
+ unsigned int is_confidential : 1;
+
+ /* @in_use: State is in use */
+ unsigned int in_use : 1;
+
+ /* @regs: The register state union for all supported formats */
+ union fpregs_state regs;
+
+ /* @regs is dynamically sized! Don't add anything after @regs! */
+} __aligned(64);
+
+#define FPU_GUEST_PERM_LOCKED BIT_ULL(63)
+
+struct fpu_state_perm {
+ /*
+ * @__state_perm:
+ *
+ * This bitmap indicates the permission for state components
+ * available to a thread group, including both user and supervisor
+ * components and software-defined bits like FPU_GUEST_PERM_LOCKED.
+ * The permission prctl() sets the enabled state bits in
+ * thread_group_leader()->thread.fpu.
+ *
+ * All run time operations use the per thread information in the
+ * currently active fpu.fpstate which contains the xfeature masks
+ * and sizes for kernel and user space.
+ *
+ * This master permission field is only to be used when
+ * task.fpu.fpstate based checks fail to validate whether the task
+ * is allowed to expand its xfeatures set which requires to
+ * allocate a larger sized fpstate buffer.
+ *
+ * Do not access this field directly. Use the provided helper
+ * function. Unlocked access is possible for quick checks.
+ */
+ u64 __state_perm;
+
+ /*
+ * @__state_size:
+ *
+ * The size required for @__state_perm. Only valid to access
+ * with sighand locked.
+ */
+ unsigned int __state_size;
+
+ /*
+ * @__user_state_size:
+ *
+ * The size required for @__state_perm user part. Only valid to
+ * access with sighand locked.
+ */
+ unsigned int __user_state_size;
+};
+
/*
* Highest level per task FPU state data structure that
* contains the FPU register state plus various FPU
@@ -294,28 +484,164 @@ struct fpu {
unsigned int last_cpu;
/*
- * @initialized:
+ * @avx512_timestamp:
*
- * This flag indicates whether this context is initialized: if the task
- * is not running then we can restore from this context, if the task
- * is running then we should save into this context.
+ * Records the timestamp of AVX512 use during last context switch.
*/
- unsigned char initialized;
+ unsigned long avx512_timestamp;
/*
- * @state:
+ * @fpstate:
*
- * In-memory copy of all FPU registers that we save/restore
- * over context switches. If the task is using the FPU then
- * the registers in the FPU are more recent than this state
- * copy. If the task context-switches away then they get
- * saved here and represent the FPU state.
+ * Pointer to the active struct fpstate. Initialized to
+ * point at @__fpstate below.
*/
- union fpregs_state state;
+ struct fpstate *fpstate;
+
/*
- * WARNING: 'state' is dynamically-sized. Do not put
+ * @__task_fpstate:
+ *
+ * Pointer to an inactive struct fpstate. Initialized to NULL. Is
+ * used only for KVM support to swap out the regular task fpstate.
+ */
+ struct fpstate *__task_fpstate;
+
+ /*
+ * @perm:
+ *
+ * Permission related information
+ */
+ struct fpu_state_perm perm;
+
+ /*
+ * @guest_perm:
+ *
+ * Permission related information for guest pseudo FPUs
+ */
+ struct fpu_state_perm guest_perm;
+
+ /*
+ * @__fpstate:
+ *
+ * Initial in-memory storage for FPU registers which are saved in
+ * context switch and when the kernel uses the FPU. The registers
+ * are restored from this storage on return to user space if they
+ * are not longer containing the tasks FPU register state.
+ */
+ struct fpstate __fpstate;
+ /*
+ * WARNING: '__fpstate' is dynamically-sized. Do not put
* anything after it here.
*/
};
-#endif /* _ASM_X86_FPU_H */
+/*
+ * Guest pseudo FPU container
+ */
+struct fpu_guest {
+ /*
+ * @xfeatures: xfeature bitmap of features which are
+ * currently enabled for the guest vCPU.
+ */
+ u64 xfeatures;
+
+ /*
+ * @xfd_err: Save the guest value.
+ */
+ u64 xfd_err;
+
+ /*
+ * @uabi_size: Size required for save/restore
+ */
+ unsigned int uabi_size;
+
+ /*
+ * @fpstate: Pointer to the allocated guest fpstate
+ */
+ struct fpstate *fpstate;
+};
+
+/*
+ * FPU state configuration data for fpu_guest.
+ * Initialized at boot time. Read only after init.
+ */
+struct vcpu_fpu_config {
+ /*
+ * @size:
+ *
+ * The default size of the register state buffer in guest FPUs.
+ * Includes all supported features except independent managed
+ * features and features which have to be requested by user space
+ * before usage.
+ */
+ unsigned int size;
+
+ /*
+ * @features:
+ *
+ * The default supported features bitmap in guest FPUs. Does not
+ * include independent managed features and features which have to
+ * be requested by user space before usage.
+ */
+ u64 features;
+};
+
+/*
+ * FPU state configuration data. Initialized at boot time. Read only after init.
+ */
+struct fpu_state_config {
+ /*
+ * @max_size:
+ *
+ * The maximum size of the register state buffer. Includes all
+ * supported features except independent managed features.
+ */
+ unsigned int max_size;
+
+ /*
+ * @default_size:
+ *
+ * The default size of the register state buffer. Includes all
+ * supported features except independent managed features,
+ * guest-only features and features which have to be requested by
+ * user space before usage.
+ */
+ unsigned int default_size;
+
+ /*
+ * @max_features:
+ *
+ * The maximum supported features bitmap. Does not include
+ * independent managed features.
+ */
+ u64 max_features;
+
+ /*
+ * @default_features:
+ *
+ * The default supported features bitmap. Does not include
+ * independent managed features, guest-only features and features
+ * which have to be requested by user space before usage.
+ */
+ u64 default_features;
+ /*
+ * @legacy_features:
+ *
+ * Features which can be reported back to user space
+ * even without XSAVE support, i.e. legacy features FP + SSE
+ */
+ u64 legacy_features;
+ /*
+ * @independent_features:
+ *
+ * Features that are supported by XSAVES, but not managed as part of
+ * the FPU core, such as LBR
+ */
+ u64 independent_features;
+};
+
+/* FPU state configuration information */
+extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
+extern struct vcpu_fpu_config guest_default_cfg;
+
+#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
new file mode 100644
index 000000000000..9a710c060445
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_XCR_H
+#define _ASM_X86_FPU_XCR_H
+
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+#define XCR_XFEATURE_IN_USE_MASK 0x00000001
+
+static __always_inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static __always_inline u64 xfeatures_in_use(void)
+{
+ return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
+#endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c..7a7dc9d56027 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -2,15 +2,16 @@
#ifndef __ASM_X86_XSAVE_H
#define __ASM_X86_XSAVE_H
+#include <linux/uaccess.h>
#include <linux/types.h>
+
#include <asm/processor.h>
-#include <linux/uaccess.h>
+#include <asm/fpu/api.h>
+#include <asm/user.h>
/* Bit 63 of XCR0 is reserved for future expansion */
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
-#define XSTATE_CPUID 0x0000000d
-
#define FXSAVE_SIZE 512
#define XSAVE_HDR_SIZE 64
@@ -19,42 +20,115 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-/* Supervisor features */
-#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
+#define XSAVE_ALIGNMENT 64
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_FP | \
- XFEATURE_MASK_SSE | \
- XFEATURE_MASK_YMM | \
- XFEATURE_MASK_OPMASK | \
- XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM | \
- XFEATURE_MASK_PKRU | \
- XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+/* All currently supported user features */
+#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
-#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
-#else
-#define REX_PREFIX
-#endif
+/*
+ * Features which are restored when returning to user space.
+ * PKRU is not restored on return to user space because PKRU
+ * is switched eagerly in switch_to() and flush_thread()
+ */
+#define XFEATURE_MASK_USER_RESTORE \
+ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
+
+/* Features which are dynamically enabled for a process on request */
+#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
+
+/* Supervisor features which are enabled only in guest FPUs */
+#define XFEATURE_MASK_GUEST_SUPERVISOR XFEATURE_MASK_CET_KERNEL
+
+/* All currently supported supervisor features */
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
+ XFEATURE_MASK_CET_USER | \
+ XFEATURE_MASK_GUEST_SUPERVISOR)
+
+/*
+ * A supervisor state component may not always contain valuable information,
+ * and its size may be huge. Saving/restoring such supervisor state components
+ * at each context switch can cause high CPU and space overhead, which should
+ * be avoided. Such supervisor state components should only be saved/restored
+ * on demand. The on-demand supervisor features are set in this mask.
+ *
+ * Unlike the existing supported supervisor features, an independent supervisor
+ * feature does not allocate a buffer in task->fpu, and the corresponding
+ * supervisor state component cannot be saved/restored at each context switch.
+ *
+ * To support an independent supervisor feature, a developer should follow the
+ * dos and don'ts as below:
+ * - Do dynamically allocate a buffer for the supervisor state component.
+ * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
+ * state component to/from the buffer.
+ * - Don't set the bit corresponding to the independent supervisor feature in
+ * IA32_XSS at run time, since it has been set at boot time.
+ */
+#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR)
+
+/*
+ * Unsupported supervisor features. When a supervisor feature in this mask is
+ * supported in the future, move it to the supported supervisor feature mask.
+ */
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+
+/* All supervisor states including supported and unsupported states. */
+#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+ XFEATURE_MASK_INDEPENDENT | \
+ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
+
+/*
+ * The feature mask required to restore FPU state:
+ * - All user states which are not eagerly switched in switch_to()/exec()
+ * - The suporvisor states
+ */
+#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED)
+
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_USER_DYNAMIC)
-extern u64 xfeatures_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
-void fpu__xstate_clear_all_cpu_caps(void);
-void *get_xsave_addr(struct xregs_state *xsave, int xstate);
-const void *get_xsave_field_ptr(int xstate_field);
-int using_compacted_format(void);
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
-int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
-
-/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-extern int validate_xstate_header(const struct xstate_header *hdr);
+int xfeature_size(int xfeature_nr);
+
+void xsaves(struct xregs_state *xsave, u64 mask);
+void xrstors(struct xregs_state *xsave, u64 mask);
+
+int xfd_enable_feature(u64 xfd_err);
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+#endif
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+ return static_branch_unlikely(&__fpu_state_size_dynamic);
+}
+#else
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+ return false;
+}
+#endif
#endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..0ab65073c1cc 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -11,7 +11,7 @@
#ifdef CONFIG_FRAME_POINTER
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro FRAME_BEGIN
push %_ASM_BP
@@ -22,7 +22,36 @@
pop %_ASM_BP
.endm
-#else /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+ leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+ mov %esp, %ebp
+ andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
+#else /* !__ASSEMBLER__ */
#define FRAME_BEGIN \
"push %" _ASM_BP "\n" \
@@ -30,12 +59,51 @@
#define FRAME_END "pop %" _ASM_BP "\n"
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+
+#define ENCODE_FRAME_POINTER \
+ "lea 1(%rsp), %rbp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return (unsigned long)regs + 1;
+}
+
+#else /* !CONFIG_X86_64 */
+
+#define ENCODE_FRAME_POINTER \
+ "movl %esp, %ebp\n\t" \
+ "andl $0x7fffffff, %ebp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return (unsigned long)regs & 0x7fffffff;
+}
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* __ASSEMBLER__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
+#ifdef __ASSEMBLER__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLER__ */
+
+#define ENCODE_FRAME_POINTER
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return 0;
+}
+
+#endif
+
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
new file mode 100644
index 000000000000..2bb65677c079
--- /dev/null
+++ b/arch/x86/include/asm/fred.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Macros for Flexible Return and Event Delivery (FRED)
+ */
+
+#ifndef ASM_X86_FRED_H
+#define ASM_X86_FRED_H
+
+#include <linux/const.h>
+
+#include <asm/asm.h>
+#include <asm/msr.h>
+#include <asm/trapnr.h>
+
+/*
+ * FRED event return instruction opcodes for ERET{S,U}; supported in
+ * binutils >= 2.41.
+ */
+#define ERETS _ASM_BYTES(0xf2,0x0f,0x01,0xca)
+#define ERETU _ASM_BYTES(0xf3,0x0f,0x01,0xca)
+
+/*
+ * RSP is aligned to a 64-byte boundary before used to push a new stack frame
+ */
+#define FRED_STACK_FRAME_RSP_MASK _AT(unsigned long, (~0x3f))
+
+/*
+ * Used for the return address for call emulation during code patching,
+ * and measured in 64-byte cache lines.
+ */
+#define FRED_CONFIG_REDZONE_AMOUNT 1
+#define FRED_CONFIG_REDZONE (_AT(unsigned long, FRED_CONFIG_REDZONE_AMOUNT) << 6)
+#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9)
+#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p))
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_X86_FRED
+#include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/ptrace.h>
+
+struct fred_info {
+ /* Event data: CR2, DR6, ... */
+ unsigned long edata;
+ unsigned long resv;
+};
+
+/* Full format of the FRED stack frame */
+struct fred_frame {
+ struct pt_regs regs;
+ struct fred_info info;
+};
+
+static __always_inline struct fred_info *fred_info(struct pt_regs *regs)
+{
+ return &container_of(regs, struct fred_frame, regs)->info;
+}
+
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
+{
+ return fred_info(regs)->edata;
+}
+
+void asm_fred_entrypoint_user(void);
+void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
+
+__visible void fred_entry_from_user(struct pt_regs *regs);
+__visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+ struct fred_ss ss = {
+ .ss =__KERNEL_DS,
+ .type = type,
+ .vector = vector,
+ .nmi = type == EVENT_TYPE_NMI,
+ .l = 1,
+ };
+
+ asm_fred_entry_from_kvm(ss);
+}
+
+void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
+void fred_complete_exception_setup(void);
+
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+ __this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+ unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+ if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+ wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+ __this_cpu_write(fred_rsp0, rsp0);
+ }
+}
+#else /* CONFIG_X86_FRED */
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
+static inline void fred_complete_exception_setup(void) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
+#endif /* CONFIG_X86_FRED */
+#endif /* !__ASSEMBLER__ */
+
+#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index bca4c743de77..ab2547f97c2c 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -2,11 +2,11 @@
#ifndef _ASM_FSGSBASE_H
#define _ASM_FSGSBASE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_64
-#include <asm/msr-index.h>
+#include <asm/msr.h>
/*
* Read/write a task's FSBASE or GSBASE. This returns the value that
@@ -19,38 +19,67 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task);
extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
-/* Helper functions for reading/writing FS/GS base */
+/* Must be protected by X86_FEATURE_FSGSBASE check. */
-static inline unsigned long x86_fsbase_read_cpu(void)
+static __always_inline unsigned long rdfsbase(void)
{
unsigned long fsbase;
- rdmsrl(MSR_FS_BASE, fsbase);
+ asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
return fsbase;
}
-static inline unsigned long x86_gsbase_read_cpu_inactive(void)
+static __always_inline unsigned long rdgsbase(void)
{
unsigned long gsbase;
- rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
return gsbase;
}
-static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+static __always_inline void wrfsbase(unsigned long fsbase)
{
- wrmsrl(MSR_FS_BASE, fsbase);
+ asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
}
-static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+static __always_inline void wrgsbase(unsigned long gsbase)
{
- wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+ asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
}
+#include <asm/cpufeature.h>
+
+/* Helper functions for reading/writing FS/GS base */
+
+static inline unsigned long x86_fsbase_read_cpu(void)
+{
+ unsigned long fsbase;
+
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE))
+ fsbase = rdfsbase();
+ else
+ rdmsrq(MSR_FS_BASE, fsbase);
+
+ return fsbase;
+}
+
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE))
+ wrfsbase(fsbase);
+ else
+ wrmsrq(MSR_FS_BASE, fsbase);
+}
+
+extern unsigned long x86_gsbase_read_cpu_inactive(void);
+extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+extern unsigned long x86_fsgsbase_read_task(struct task_struct *task,
+ unsigned short selector);
+
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_FSGSBASE_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index cf350639e76d..b08c95872eed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -2,24 +2,27 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
+#include <asm/ptrace.h>
+
#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_FENTRY
-# define MCOUNT_ADDR ((unsigned long)(__fentry__))
-#else
-# define MCOUNT_ADDR ((unsigned long)(mcount))
-# define HAVE_FUNCTION_GRAPH_FP_TEST
+#ifndef CC_USING_FENTRY
+# error Compiler does not support fentry?
#endif
+# define MCOUNT_ADDR ((unsigned long)(__fentry__))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
+/* Ignore unused weak functions which will have non zero offsets */
+#ifdef CONFIG_HAVE_FENTRY
+# include <asm/ibt.h>
+/* Add offset for endbr64 if IBT enabled */
+# define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-
-#ifndef __ASSEMBLY__
-extern void mcount(void);
-extern atomic_t modifying_ftrace_code;
+#ifndef __ASSEMBLER__
extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
@@ -31,33 +34,109 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE)))
+ fentry_ip -= ENDBR_INSN_SIZE;
+
+ return fentry_ip;
+}
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
+
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+#include <linux/ftrace_regs.h>
+
+static __always_inline struct pt_regs *
+arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+ /* Only when FL_SAVE_REGS is set, cs will be non zero */
+ if (!arch_ftrace_regs(fregs)->regs.cs)
+ return NULL;
+ return &arch_ftrace_regs(fregs)->regs;
+}
+
+#define arch_ftrace_partial_regs(regs) do { \
+ regs->flags &= ~X86_EFLAGS_FIXED; \
+ regs->cs = __KERNEL_CS; \
+} while (0)
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
+ (_regs)->cs = __KERNEL_CS; \
+ (_regs)->flags = 0; \
+ } while (0)
+
+#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
+ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
+
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+ return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
+struct ftrace_ops;
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#else
+#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void
+__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ /* Emulate a call */
+ regs->orig_ax = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+ __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#ifdef CONFIG_DYNAMIC_FTRACE
struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
-int ftrace_int3_handler(struct pt_regs *regs);
-
-#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
-
#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
+ unsigned long frame_pointer);
+
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+extern void set_ftrace_ops_ro(void);
+#else
+static inline void set_ftrace_ops_ro(void) { }
+#endif
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
/*
* Compare the symbol name with the system call name. Skip the
- * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
- (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+ (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}
#ifndef COMPILE_OFFSETS
@@ -80,6 +159,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
#endif /* !COMPILE_OFFSETS */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 13c83fe97988..fe5d9a10d900 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -12,76 +12,86 @@
#include <asm/processor.h>
#include <asm/smap.h>
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
- asm volatile("\t" ASM_STAC "\n" \
- "1:\t" insn "\n" \
- "2:\t" ASM_CLAC "\n" \
- "\t.section .fixup,\"ax\"\n" \
- "3:\tmov\t%3, %1\n" \
- "\tjmp\t2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
+#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \
+do { \
+ int oldval = 0, ret; \
+ asm volatile("1:\t" insn "\n" \
+ "2:\n" \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
- : "i" (-EFAULT), "0" (oparg), "1" (0))
+ : "0" (oparg), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+} while(0)
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
- asm volatile("\t" ASM_STAC "\n" \
- "1:\tmovl %2, %0\n" \
- "\tmovl\t%0, %3\n" \
+
+#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \
+do { \
+ int oldval = 0, ret, tem; \
+ asm volatile("1:\tmovl %2, %0\n" \
+ "2:\tmovl\t%0, %3\n" \
"\t" insn "\n" \
- "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
- "\tjnz\t1b\n" \
- "3:\t" ASM_CLAC "\n" \
- "\t.section .fixup,\"ax\"\n" \
- "4:\tmov\t%5, %1\n" \
- "\tjmp\t3b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 4b) \
- _ASM_EXTABLE_UA(2b, 4b) \
+ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
+ "\tjnz\t2b\n" \
+ "4:\n" \
+ _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \
+ _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "1" (0))
+ : "r" (oparg), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+} while(0)
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int oldval = 0, ret, tem;
-
- pagefault_disable();
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
- uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
+ scoped_user_rw_access(uaddr, Efault) {
+ switch (op) {
+ case FUTEX_OP_SET:
+ unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_ADD:
+ unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_OR:
+ unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_ANDN:
+ unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
+ break;
+ case FUTEX_OP_XOR:
+ unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
+ break;
+ default:
+ return -ENOSYS;
+ }
}
-
- pagefault_enable();
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ return 0;
+Efault:
+ return -EFAULT;
}
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
- return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);
+ int ret = 0;
+
+ scoped_user_rw_access(uaddr, Efault) {
+ asm_inline volatile("\n"
+ "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
+ "2:\n"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "r" (newval), "1" (oldval)
+ : "memory");
+ *uval = oldval;
+ }
+ return ret;
+Efault:
+ return -EFAULT;
}
#endif
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 318556574345..5af8088a10df 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled;
extern void early_gart_iommu_check(void);
extern int gart_iommu_init(void);
extern void __init gart_parse_options(char *);
-extern int gart_iommu_hole_init(void);
+void gart_iommu_hole_init(void);
#else
#define gart_iommu_aperture 0
@@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void)
static inline void gart_parse_options(char *options)
{
}
-static inline int gart_iommu_hole_init(void)
+static inline void gart_iommu_hole_init(void)
{
- return -ENODEV;
}
#endif
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 7cd73552a4e8..3c7267ef4a9e 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Geode definitions
* Copyright (C) 2006, Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
*/
#ifndef _ASM_X86_GEODE_H
diff --git a/arch/x86/include/asm/gsseg.h b/arch/x86/include/asm/gsseg.h
new file mode 100644
index 000000000000..ab6a595cea70
--- /dev/null
+++ b/arch/x86/include/asm/gsseg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_GSSEG_H
+#define _ASM_X86_GSSEG_H
+
+#include <linux/types.h>
+
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
+#include <asm/nops.h>
+
+#ifdef CONFIG_X86_64
+
+extern asmlinkage void asm_load_gs_index(u16 selector);
+
+/* Replace with "lkgs %di" once binutils support LKGS instruction */
+#define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7)
+
+static inline void native_lkgs(unsigned int selector)
+{
+ u16 sel = selector;
+ asm_inline volatile("1: " LKGS_DI
+ _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel])
+ : [sel] "+D" (sel));
+}
+
+static inline void native_load_gs_index(unsigned int selector)
+{
+ if (cpu_feature_enabled(X86_FEATURE_LKGS)) {
+ native_lkgs(selector);
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ asm_load_gs_index(selector);
+ local_irq_restore(flags);
+ }
+}
+
+#endif /* CONFIG_X86_64 */
+
+static inline void __init lkgs_init(void)
+{
+#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+ if (cpu_feature_enabled(X86_FEATURE_LKGS))
+ pv_ops.cpu.load_gs_index = native_lkgs;
+#endif
+#endif
+}
+
+#ifndef CONFIG_PARAVIRT_XXL
+
+static inline void load_gs_index(unsigned int selector)
+{
+#ifdef CONFIG_X86_64
+ native_load_gs_index(selector);
+#else
+ loadsegment(gs, selector);
+#endif
+}
+
+#endif /* CONFIG_PARAVIRT_XXL */
+
+#endif /* _ASM_X86_GSSEG_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d9069bb26c7f..6b6d472baa0b 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -5,8 +5,7 @@
#include <linux/threads.h>
typedef struct {
- u16 __softirq_pending;
-#if IS_ENABLED(CONFIG_KVM_INTEL)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
u8 kvm_cpu_l1tf_flush_l1d;
#endif
unsigned int __nmi_count; /* arch dependent */
@@ -15,7 +14,7 @@ typedef struct {
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
unsigned int kvm_posted_intr_nested_ipis;
@@ -37,17 +36,23 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
unsigned int irq_hv_callback_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV)
unsigned int irq_hv_reenlightenment_count;
unsigned int hyperv_stimer0_count;
#endif
+#ifdef CONFIG_X86_POSTED_MSI
+ unsigned int posted_msi_notification_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+#ifdef CONFIG_X86_POSTED_MSI
+DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+#endif
#define __ARCH_IRQ_STAT
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
@@ -60,24 +65,30 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
+DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
+#define local_softirq_pending_ref __softirq_pending
-#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
-static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
}
-static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
{
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index a8059930056d..585bdadba47d 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -23,10 +23,10 @@
#include <linux/interrupt.h>
#include <linux/threads.h>
-#include <asm/kmap_types.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/fixmap.h>
+#include <asm/pgtable_areas.h>
/* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn;
@@ -58,22 +58,16 @@ extern unsigned long highstart_pfn, highend_pfn;
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
-extern void *kmap_high(struct page *page);
-extern void kunmap_high(struct page *page);
-
-void *kmap(struct page *page);
-void kunmap(struct page *page);
-
-void *kmap_atomic_prot(struct page *page, pgprot_t prot);
-void *kmap_atomic(struct page *page);
-void __kunmap_atomic(void *kvaddr);
-void *kmap_atomic_pfn(unsigned long pfn);
-void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-
#define flush_cache_kmaps() do { } while (0)
-extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn);
+#define arch_kmap_local_post_map(vaddr, pteval) \
+ arch_flush_lazy_mmu_mode()
+
+#define arch_kmap_local_post_unmap(vaddr) \
+ do { \
+ flush_tlb_one_kernel((vaddr)); \
+ arch_flush_lazy_mmu_mode(); \
+ } while (0)
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..ab0c78855ecb 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,18 +74,6 @@ extern void hpet_disable(void);
extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
-struct irq_data;
-struct hpet_dev;
-struct irq_domain;
-
-extern void hpet_msi_unmask(struct irq_data *data);
-extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
-extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
-extern int hpet_assign_irq(struct irq_domain *domain,
- struct hpet_dev *dev, int dev_num);
-
#ifdef CONFIG_HPET_EMULATE_RTC
#include <linux/interrupt.h>
@@ -96,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
unsigned char sec);
extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
extern int hpet_rtc_timer_init(void);
extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
extern int hpet_register_irq_handler(rtc_irq_handler handler);
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 7469d321f072..1721b1aadeb1 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -7,18 +7,4 @@
#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
-static inline int is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr,
- unsigned long len) {
- return 0;
-}
-
-static inline void arch_clear_hugepage_flags(struct page *page)
-{
-}
-
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void) { return true; }
-#endif
-
#endif /* _ASM_X86_HUGETLB_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index a1f0e90d0818..0bc931cd0698 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
/* Total number of available HW breakpoint registers */
#define HBP_NUM 4
-static inline int hw_breakpoint_slots(int type)
-{
- return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
struct perf_event_attr;
struct perf_event;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbe19e669080 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,9 +16,7 @@
#include <asm/irq_vectors.h>
-#define IRQ_MATRIX_BITS NR_VECTORS
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/percpu.h>
#include <linux/profile.h>
@@ -28,29 +26,7 @@
#include <asm/irq.h>
#include <asm/sections.h>
-/* Interrupt handlers registered during init_IRQ */
-extern asmlinkage void apic_timer_interrupt(void);
-extern asmlinkage void x86_platform_ipi(void);
-extern asmlinkage void kvm_posted_intr_ipi(void);
-extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
-extern asmlinkage void kvm_posted_intr_nested_ipi(void);
-extern asmlinkage void error_interrupt(void);
-extern asmlinkage void irq_work_interrupt(void);
-extern asmlinkage void uv_bau_message_intr1(void);
-
-extern asmlinkage void spurious_interrupt(void);
-extern asmlinkage void thermal_interrupt(void);
-extern asmlinkage void reschedule_interrupt(void);
-
-extern asmlinkage void irq_move_cleanup_interrupt(void);
-extern asmlinkage void reboot_interrupt(void);
-extern asmlinkage void threshold_interrupt(void);
-extern asmlinkage void deferred_error_interrupt(void);
-
-extern asmlinkage void call_function_interrupt(void);
-extern asmlinkage void call_function_single_interrupt(void);
-
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
struct irq_data;
struct pci_dev;
struct msi_desc;
@@ -58,61 +34,54 @@ struct msi_desc;
enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
X86_IRQ_ALLOC_TYPE_HPET,
- X86_IRQ_ALLOC_TYPE_MSI,
- X86_IRQ_ALLOC_TYPE_MSIX,
+ X86_IRQ_ALLOC_TYPE_PCI_MSI,
+ X86_IRQ_ALLOC_TYPE_PCI_MSIX,
X86_IRQ_ALLOC_TYPE_DMAR,
+ X86_IRQ_ALLOC_TYPE_AMDVI,
X86_IRQ_ALLOC_TYPE_UV,
};
+struct ioapic_alloc_info {
+ int pin;
+ int node;
+ u32 is_level : 1;
+ u32 active_low : 1;
+ u32 valid : 1;
+};
+
+struct uv_alloc_info {
+ int limit;
+ int blade;
+ unsigned long offset;
+ char *name;
+
+};
+
+/**
+ * irq_alloc_info - X86 specific interrupt allocation info
+ * @type: X86 specific allocation type
+ * @flags: Flags for allocation tweaks
+ * @devid: Device ID for allocations
+ * @hwirq: Associated hw interrupt number in the domain
+ * @mask: CPU mask for vector allocation
+ * @desc: Pointer to msi descriptor
+ * @data: Allocation specific data
+ *
+ * @ioapic: IOAPIC specific allocation data
+ * @uv: UV specific allocation data
+*/
struct irq_alloc_info {
enum irq_alloc_type type;
u32 flags;
- const struct cpumask *mask; /* CPU mask for vector allocation */
+ u32 devid;
+ irq_hw_number_t hwirq;
+ const struct cpumask *mask;
+ struct msi_desc *desc;
+ void *data;
+
union {
- int unused;
-#ifdef CONFIG_HPET_TIMER
- struct {
- int hpet_id;
- int hpet_index;
- void *hpet_data;
- };
-#endif
-#ifdef CONFIG_PCI_MSI
- struct {
- struct pci_dev *msi_dev;
- irq_hw_number_t msi_hwirq;
- };
-#endif
-#ifdef CONFIG_X86_IO_APIC
- struct {
- int ioapic_id;
- int ioapic_pin;
- int ioapic_node;
- u32 ioapic_trigger : 1;
- u32 ioapic_polarity : 1;
- u32 ioapic_valid : 1;
- struct IO_APIC_route_entry *ioapic_entry;
- };
-#endif
-#ifdef CONFIG_DMAR_TABLE
- struct {
- int dmar_id;
- void *dmar_data;
- };
-#endif
-#ifdef CONFIG_X86_UV
- struct {
- int uv_limit;
- int uv_blade;
- unsigned long uv_offset;
- char *uv_name;
- };
-#endif
-#if IS_ENABLED(CONFIG_VMD)
- struct {
- struct msi_desc *desc;
- };
-#endif
+ struct ioapic_alloc_info ioapic;
+ struct uv_alloc_info uv;
};
};
@@ -123,21 +92,23 @@ struct irq_cfg {
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
#ifdef CONFIG_SMP
-extern void send_cleanup_vector(struct irq_cfg *);
+extern void vector_schedule_cleanup(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
#else
-static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
-
extern void apic_ack_edge(struct irq_data *data);
-#else /* CONFIG_X86_LOCAL_APIC */
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
+#else
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
-#endif /* CONFIG_X86_LOCAL_APIC */
+#endif
/* Statistics */
extern atomic_t irq_err_count;
@@ -150,12 +121,15 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
+extern char spurious_entries_start[];
+
#define VECTOR_UNUSED NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN ((void *)-1L)
+#define VECTOR_RETRIGGERED ((void *)-2L)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif /* !ASSEMBLY_ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_HW_IRQ_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
deleted file mode 100644
index 705dafc2d11a..000000000000
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,869 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_X86_HYPERV_TLFS_H
-#define _ASM_X86_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <asm/page.h>
-
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
- */
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
-#define HYPERV_CPUID_INTERFACE 0x40000001
-#define HYPERV_CPUID_VERSION 0x40000002
-#define HYPERV_CPUID_FEATURES 0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
-#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
-
-#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
-#define HYPERV_CPUID_MIN 0x40000005
-#define HYPERV_CPUID_MAX 0x4000ffff
-
-/*
- * Feature identification. EAX indicates which features are available
- * to the partition based upon the current partition privileges.
- * These are HYPERV_CPUID_FEATURES.EAX bits.
- */
-
-/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
-#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0)
-/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
-/*
- * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
- * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
- */
-#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2)
-/*
- * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
- * HV_X64_MSR_STIMER3_COUNT) available
- */
-#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
-/*
- * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
- * are available
- */
-#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4)
-/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
-#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5)
-/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
-#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6)
-/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
-#define HV_X64_MSR_RESET_AVAILABLE BIT(7)
-/*
- * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
- * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
- * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
- */
-#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8)
-/* Partition reference TSC MSR is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
-/* Partition Guest IDLE MSR is available */
-#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10)
-/*
- * There is a single feature flag that signifies if the partition has access
- * to MSRs with local APIC and TSC frequencies.
- */
-#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
-
-/*
- * Feature identification: indicates which flags were specified at partition
- * creation. The format is the same as the partition creation flag structure
- * defined in section Partition Creation Flags.
- * These are HYPERV_CPUID_FEATURES.EBX bits.
- */
-#define HV_X64_CREATE_PARTITIONS BIT(0)
-#define HV_X64_ACCESS_PARTITION_ID BIT(1)
-#define HV_X64_ACCESS_MEMORY_POOL BIT(2)
-#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3)
-#define HV_X64_POST_MESSAGES BIT(4)
-#define HV_X64_SIGNAL_EVENTS BIT(5)
-#define HV_X64_CREATE_PORT BIT(6)
-#define HV_X64_CONNECT_PORT BIT(7)
-#define HV_X64_ACCESS_STATS BIT(8)
-#define HV_X64_DEBUGGING BIT(11)
-#define HV_X64_CPU_POWER_MANAGEMENT BIT(12)
-
-/*
- * Feature identification. EDX indicates which miscellaneous features
- * are available to the partition.
- * These are HYPERV_CPUID_FEATURES.EDX bits.
- */
-/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
-#define HV_X64_MWAIT_AVAILABLE BIT(0)
-/* Guest debugging support is available */
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
-/* Performance Monitor support is available*/
-#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
-/* Support for physical CPU dynamic partitioning events is available*/
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
-/*
- * Support for passing hypercall input parameter block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
-/* Support for a virtual guest idle state is available */
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
-/* Frequency MSRs available */
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
-/* Crash MSR available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
-/* stimer Direct Mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
-
-/*
- * Implementation recommendations. Indicates which behaviors the hypervisor
- * recommends the OS implement for optimal performance.
- * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits.
- */
-/*
- * Recommend using hypercall for address space switches rather
- * than MOV to CR3 instruction
- */
-#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
-/* Recommend using hypercall for local TLB flushes rather
- * than INVLPG or MOV to CR3 instructions */
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
-/*
- * Recommend using hypercall for remote TLB flushes rather
- * than inter-processor interrupts
- */
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
-/*
- * Recommend using MSRs for accessing APIC registers
- * EOI, ICR and TPR rather than their memory-mapped counterparts
- */
-#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
-/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
-#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
-/*
- * Recommend using relaxed timing for this partition. If used,
- * the VM should disable any watchdog timeouts that rely on the
- * timely delivery of external interrupts
- */
-#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
-
-/*
- * Recommend not using Auto End-Of-Interrupt feature
- */
-#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
-
-/*
- * Recommend using cluster IPI hypercalls.
- */
-#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
-
-/* Recommend using the newer ExProcessorMasks interface */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
-
-/* Recommend using enlightened VMCS */
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
-
-/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
-#define HV_X64_NESTED_MSR_BITMAP BIT(19)
-
-/* Hyper-V specific model specific registers (MSRs) */
-
-/* MSR used to identify the guest OS. */
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-
-/* MSR used to setup pages used to communicate with the hypervisor. */
-#define HV_X64_MSR_HYPERCALL 0x40000001
-
-/* MSR used to provide vcpu index */
-#define HV_X64_MSR_VP_INDEX 0x40000002
-
-/* MSR used to reset the guest OS. */
-#define HV_X64_MSR_RESET 0x40000003
-
-/* MSR used to provide vcpu runtime in 100ns units */
-#define HV_X64_MSR_VP_RUNTIME 0x40000010
-
-/* MSR used to read the per-partition time reference counter */
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-
-/* A partition's reference time stamp counter (TSC) page */
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-
-/* MSR used to retrieve the TSC frequency */
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-
-/* MSR used to retrieve the local APIC timer frequency */
-#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
-
-/* Define the virtual APIC registers */
-#define HV_X64_MSR_EOI 0x40000070
-#define HV_X64_MSR_ICR 0x40000071
-#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-
-/* Define synthetic interrupt controller model specific registers. */
-#define HV_X64_MSR_SCONTROL 0x40000080
-#define HV_X64_MSR_SVERSION 0x40000081
-#define HV_X64_MSR_SIEFP 0x40000082
-#define HV_X64_MSR_SIMP 0x40000083
-#define HV_X64_MSR_EOM 0x40000084
-#define HV_X64_MSR_SINT0 0x40000090
-#define HV_X64_MSR_SINT1 0x40000091
-#define HV_X64_MSR_SINT2 0x40000092
-#define HV_X64_MSR_SINT3 0x40000093
-#define HV_X64_MSR_SINT4 0x40000094
-#define HV_X64_MSR_SINT5 0x40000095
-#define HV_X64_MSR_SINT6 0x40000096
-#define HV_X64_MSR_SINT7 0x40000097
-#define HV_X64_MSR_SINT8 0x40000098
-#define HV_X64_MSR_SINT9 0x40000099
-#define HV_X64_MSR_SINT10 0x4000009A
-#define HV_X64_MSR_SINT11 0x4000009B
-#define HV_X64_MSR_SINT12 0x4000009C
-#define HV_X64_MSR_SINT13 0x4000009D
-#define HV_X64_MSR_SINT14 0x4000009E
-#define HV_X64_MSR_SINT15 0x4000009F
-
-/*
- * Synthetic Timer MSRs. Four timers per vcpu.
- */
-#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
-
-/* Hyper-V guest idle MSR */
-#define HV_X64_MSR_GUEST_IDLE 0x400000F0
-
-/* Hyper-V guest crash notification MSR's */
-#define HV_X64_MSR_CRASH_P0 0x40000100
-#define HV_X64_MSR_CRASH_P1 0x40000101
-#define HV_X64_MSR_CRASH_P2 0x40000102
-#define HV_X64_MSR_CRASH_P3 0x40000103
-#define HV_X64_MSR_CRASH_P4 0x40000104
-#define HV_X64_MSR_CRASH_CTL 0x40000105
-
-/* TSC emulation after migration */
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
-
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 guest_physical_address:52;
- } __packed;
-};
-
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
- u64 reserved2[509];
-} __packed;
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0 - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID 0x8100
-
-struct hv_reenlightenment_control {
- __u64 vector:8;
- __u64 reserved1:8;
- __u64 enabled:1;
- __u64 reserved2:15;
- __u64 target_vp:32;
-} __packed;
-
-struct hv_tsc_emulation_control {
- __u64 enabled:1;
- __u64 reserved:63;
-} __packed;
-
-struct hv_tsc_emulation_status {
- __u64 inprogress:1;
- __u64 reserved:63;
-} __packed;
-
-#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
-
-/*
- * Crash notification (HV_X64_MSR_CRASH_CTL) flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
-#define HV_X64_MSR_CRASH_PARAMS \
- (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
-
-#define HV_IPI_LOW_VECTOR 0x10
-#define HV_IPI_HIGH_VECTOR 0xff
-
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
-#define HVCALL_SEND_IPI 0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
-#define HVCALL_SEND_IPI_EX 0x0015
-#define HVCALL_POST_MESSAGE 0x005c
-#define HVCALL_SIGNAL_EVENT 0x005d
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
-#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff
-
-#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
-#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
-
-#define HV_PROCESSOR_POWER_STATE_C0 0
-#define HV_PROCESSOR_POWER_STATE_C1 1
-#define HV_PROCESSOR_POWER_STATE_C2 2
-#define HV_PROCESSOR_POWER_STATE_C3 3
-
-#define HV_FLUSH_ALL_PROCESSORS BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
-
-enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARSE_4K,
- HV_GENERIC_SET_ALL,
-};
-
-#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET 48
-#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS 0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
-#define HV_STATUS_INVALID_ALIGNMENT 4
-#define HV_STATUS_INVALID_PARAMETER 5
-#define HV_STATUS_INSUFFICIENT_MEMORY 11
-#define HV_STATUS_INVALID_PORT_ID 17
-#define HV_STATUS_INVALID_CONNECTION_ID 18
-#define HV_STATUS_INSUFFICIENT_BUFFERS 19
-
-typedef struct _HV_REFERENCE_TSC_PAGE {
- __u32 tsc_sequence;
- __u32 res1;
- __u64 tsc_scale;
- __s64 tsc_offset;
-} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT (16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1 (0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR (16)
-
-#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SINT_MASKED (1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
-
-#define HV_SYNIC_STIMER_COUNT (4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE (256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
-
-/* Define hypervisor message types. */
-enum hv_message_type {
- HVMSG_NONE = 0x00000000,
-
- /* Memory access messages. */
- HVMSG_UNMAPPED_GPA = 0x80000000,
- HVMSG_GPA_INTERCEPT = 0x80000001,
-
- /* Timer notification messages. */
- HVMSG_TIMER_EXPIRED = 0x80000010,
-
- /* Error messages. */
- HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
- HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
- HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
-
- /* Trace buffer complete messages. */
- HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
-
- /* Platform-specific processor intercept messages. */
- HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
- HVMSG_X64_MSR_INTERCEPT = 0x80010001,
- HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
- HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
- HVMSG_X64_APIC_EOI = 0x80010004,
- HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
- __u8 asu8;
- struct {
- __u8 msg_pending:1;
- __u8 reserved:7;
- } __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
- __u32 asu32;
- struct {
- __u32 id:24;
- __u32 reserved:8;
- } __packed u;
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
- __u32 message_type;
- __u8 payload_size;
- union hv_message_flags message_flags;
- __u8 reserved[2];
- union {
- __u64 sender;
- union hv_port_id port;
- };
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
- struct hv_message_header header;
- union {
- __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
- } u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
- struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
- __u32 timer_index;
- __u32 reserved;
- __u64 expiration_time; /* When the timer expired */
- __u64 delivery_time; /* When the message was delivered */
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved;
- __u64 vtl_control[2];
- __u64 nested_enlightenments_control[2];
- __u32 enlighten_vmentry;
- __u32 padding;
- __u64 current_nested_vmcs;
-} __packed;
-
-struct hv_enlightened_vmcs {
- u32 revision_id;
- u32 abort;
-
- u16 host_es_selector;
- u16 host_cs_selector;
- u16 host_ss_selector;
- u16 host_ds_selector;
- u16 host_fs_selector;
- u16 host_gs_selector;
- u16 host_tr_selector;
-
- u16 padding16_1;
-
- u64 host_ia32_pat;
- u64 host_ia32_efer;
-
- u64 host_cr0;
- u64 host_cr3;
- u64 host_cr4;
-
- u64 host_ia32_sysenter_esp;
- u64 host_ia32_sysenter_eip;
- u64 host_rip;
- u32 host_ia32_sysenter_cs;
-
- u32 pin_based_vm_exec_control;
- u32 vm_exit_controls;
- u32 secondary_vm_exec_control;
-
- u64 io_bitmap_a;
- u64 io_bitmap_b;
- u64 msr_bitmap;
-
- u16 guest_es_selector;
- u16 guest_cs_selector;
- u16 guest_ss_selector;
- u16 guest_ds_selector;
- u16 guest_fs_selector;
- u16 guest_gs_selector;
- u16 guest_ldtr_selector;
- u16 guest_tr_selector;
-
- u32 guest_es_limit;
- u32 guest_cs_limit;
- u32 guest_ss_limit;
- u32 guest_ds_limit;
- u32 guest_fs_limit;
- u32 guest_gs_limit;
- u32 guest_ldtr_limit;
- u32 guest_tr_limit;
- u32 guest_gdtr_limit;
- u32 guest_idtr_limit;
-
- u32 guest_es_ar_bytes;
- u32 guest_cs_ar_bytes;
- u32 guest_ss_ar_bytes;
- u32 guest_ds_ar_bytes;
- u32 guest_fs_ar_bytes;
- u32 guest_gs_ar_bytes;
- u32 guest_ldtr_ar_bytes;
- u32 guest_tr_ar_bytes;
-
- u64 guest_es_base;
- u64 guest_cs_base;
- u64 guest_ss_base;
- u64 guest_ds_base;
- u64 guest_fs_base;
- u64 guest_gs_base;
- u64 guest_ldtr_base;
- u64 guest_tr_base;
- u64 guest_gdtr_base;
- u64 guest_idtr_base;
-
- u64 padding64_1[3];
-
- u64 vm_exit_msr_store_addr;
- u64 vm_exit_msr_load_addr;
- u64 vm_entry_msr_load_addr;
-
- u64 cr3_target_value0;
- u64 cr3_target_value1;
- u64 cr3_target_value2;
- u64 cr3_target_value3;
-
- u32 page_fault_error_code_mask;
- u32 page_fault_error_code_match;
-
- u32 cr3_target_count;
- u32 vm_exit_msr_store_count;
- u32 vm_exit_msr_load_count;
- u32 vm_entry_msr_load_count;
-
- u64 tsc_offset;
- u64 virtual_apic_page_addr;
- u64 vmcs_link_pointer;
-
- u64 guest_ia32_debugctl;
- u64 guest_ia32_pat;
- u64 guest_ia32_efer;
-
- u64 guest_pdptr0;
- u64 guest_pdptr1;
- u64 guest_pdptr2;
- u64 guest_pdptr3;
-
- u64 guest_pending_dbg_exceptions;
- u64 guest_sysenter_esp;
- u64 guest_sysenter_eip;
-
- u32 guest_activity_state;
- u32 guest_sysenter_cs;
-
- u64 cr0_guest_host_mask;
- u64 cr4_guest_host_mask;
- u64 cr0_read_shadow;
- u64 cr4_read_shadow;
- u64 guest_cr0;
- u64 guest_cr3;
- u64 guest_cr4;
- u64 guest_dr7;
-
- u64 host_fs_base;
- u64 host_gs_base;
- u64 host_tr_base;
- u64 host_gdtr_base;
- u64 host_idtr_base;
- u64 host_rsp;
-
- u64 ept_pointer;
-
- u16 virtual_processor_id;
- u16 padding16_2[3];
-
- u64 padding64_2[5];
- u64 guest_physical_address;
-
- u32 vm_instruction_error;
- u32 vm_exit_reason;
- u32 vm_exit_intr_info;
- u32 vm_exit_intr_error_code;
- u32 idt_vectoring_info_field;
- u32 idt_vectoring_error_code;
- u32 vm_exit_instruction_len;
- u32 vmx_instruction_info;
-
- u64 exit_qualification;
- u64 exit_io_instruction_ecx;
- u64 exit_io_instruction_esi;
- u64 exit_io_instruction_edi;
- u64 exit_io_instruction_eip;
-
- u64 guest_linear_address;
- u64 guest_rsp;
- u64 guest_rflags;
-
- u32 guest_interruptibility_info;
- u32 cpu_based_vm_exec_control;
- u32 exception_bitmap;
- u32 vm_entry_controls;
- u32 vm_entry_intr_info_field;
- u32 vm_entry_exception_error_code;
- u32 vm_entry_instruction_len;
- u32 tpr_threshold;
-
- u64 guest_rip;
-
- u32 hv_clean_fields;
- u32 hv_padding_32;
- u32 hv_synthetic_controls;
- struct {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 reserved:30;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
-
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 padding64_4[4];
- u64 guest_bndcfgs;
- u64 padding64_5[7];
- u64 xss_exit_bitmap;
- u64 padding64_6[7];
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
-
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT (256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 periodic:1;
- u64 lazy:1;
- u64 auto_enable:1;
- u64 apic_vector:8;
- u64 direct_mode:1;
- u64 reserved_z0:3;
- u64 sintx:4;
- u64 reserved_z1:44;
- } __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
- unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:63;
- } __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
- u64 as_uint64;
- struct {
- u64 vector:8;
- u64 reserved1:8;
- u64 masked:1;
- u64 auto_eoi:1;
- u64 reserved2:46;
- } __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
- u64 as_uint64;
- struct {
- u64 simp_enabled:1;
- u64 preserved:11;
- u64 base_simp_gpa:52;
- } __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
- u64 as_uint64;
- struct {
- u64 siefp_enabled:1;
- u64 preserved:11;
- u64 base_siefp_gpa:52;
- } __packed;
-};
-
-struct hv_vpset {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[];
-} __packed;
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
- u32 vector;
- u32 reserved;
- struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
- u64 address_space;
- u64 flags;
-} __packed;
-
-/*
- * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- * by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-
-/* HvFlushGuestPhysicalAddressList hypercall */
-union hv_gpa_page_range {
- u64 address_space;
- struct {
- u64 additional_pages:11;
- u64 largepage:1;
- u64 basepfn:52;
- } page;
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \
- sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
- u64 address_space;
- u64 flags;
- union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
- u64 address_space;
- u64 flags;
- u64 processor_mask;
- u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
- u64 address_space;
- u64 flags;
- struct hv_vpset hv_vp_set;
- u64 gva_list[];
-} __packed;
-
-#endif
diff --git a/arch/x86/include/asm/hyperv_timer.h b/arch/x86/include/asm/hyperv_timer.h
new file mode 100644
index 000000000000..388fa81b8f38
--- /dev/null
+++ b/arch/x86/include/asm/hyperv_timer.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_HYPERV_TIMER_H
+#define _ASM_X86_HYPERV_TIMER_H
+
+#include <asm/msr.h>
+
+#define hv_get_raw_timer() rdtsc_ordered()
+
+#endif
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 8c5aaba6633f..9ad86a7d13f6 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -29,6 +29,8 @@ enum x86_hypervisor_type {
X86_HYPER_XEN_HVM,
X86_HYPER_KVM,
X86_HYPER_JAILHOUSE,
+ X86_HYPER_ACRN,
+ X86_HYPER_BHYVE,
};
#ifdef CONFIG_HYPERVISOR_GUEST
@@ -52,8 +54,21 @@ struct hypervisor_x86 {
/* runtime callbacks */
struct x86_hyper_runtime runtime;
+
+ /* ignore nopv parameter */
+ bool ignore_nopv;
};
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+extern const struct hypervisor_x86 x86_hyper_jailhouse;
+extern const struct hypervisor_x86 x86_hyper_acrn;
+extern const struct hypervisor_x86 x86_hyper_bhyve;
+extern struct hypervisor_x86 x86_hyper_xen_hvm;
+
+extern bool nopv;
extern enum x86_hypervisor_type x86_hyper_type;
extern void init_hypervisor_platform(void);
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 89789e8c80f6..c715097e92fd 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -19,6 +19,8 @@ extern unsigned int cached_irq_mask;
#define PIC_MASTER_OCW3 PIC_MASTER_ISR
#define PIC_SLAVE_CMD 0xa0
#define PIC_SLAVE_IMR 0xa1
+#define PIC_ELCR1 0x4d0
+#define PIC_ELCR2 0x4d1
/* i8259A PIC related value */
#define PIC_CASCADE_IR 2
@@ -67,6 +69,8 @@ struct legacy_pic {
void (*make_irq)(unsigned int irq);
};
+void legacy_pic_pcat_compat(void);
+
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 2c5f7861d373..9d69f3f8dbab 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_IA32_H
#define _ASM_X86_IA32_H
-
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
@@ -57,17 +56,37 @@ struct stat64 {
unsigned long long st_ino;
} __attribute__((packed));
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
+extern bool __ia32_enabled;
+
+static __always_inline bool ia32_enabled(void)
+{
+ return __ia32_enabled;
+}
+
+static inline void ia32_disable(void)
+{
+ __ia32_enabled = false;
+}
+
+#else /* !CONFIG_IA32_EMULATION */
-#ifdef __KERNEL__
-struct linux_binprm;
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
- unsigned long stack_top, int exec_stack);
-struct mm_struct;
-extern void ia32_pick_mmap_layout(struct mm_struct *mm);
+static __always_inline bool ia32_enabled(void)
+{
+ return IS_ENABLED(CONFIG_X86_32);
+}
+
+static inline void ia32_disable(void) {}
#endif
-#endif /* !CONFIG_IA32_SUPPORT */
+static inline bool ia32_enabled_verbose(void)
+{
+ bool enabled = ia32_enabled();
+
+ if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled)
+ pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n");
+
+ return enabled;
+}
#endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
deleted file mode 100644
index aa065c94ccf5..000000000000
--- a/arch/x86/include/asm/ia32_unistd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IA32_UNISTD_H
-#define _ASM_X86_IA32_UNISTD_H
-
-/*
- * This file contains the system call numbers of the ia32 compat ABI,
- * this is for the kernel only.
- */
-#define __SYSCALL_ia32_NR(x) (x)
-#include <asm/unistd_32_ia32.h>
-
-#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
new file mode 100644
index 000000000000..5e45d6424722
--- /dev/null
+++ b/arch/x86/include/asm/ibt.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IBT_H
+#define _ASM_X86_IBT_H
+
+#include <linux/types.h>
+
+/*
+ * The rules for enabling IBT are:
+ *
+ * - CC_HAS_IBT: the toolchain supports it
+ * - X86_KERNEL_IBT: it is selected in Kconfig
+ * - !__DISABLE_EXPORTS: this is regular kernel code
+ *
+ * Esp. that latter one is a bit non-obvious, but some code like compressed,
+ * purgatory, realmode etc.. is built with custom CFLAGS that do not include
+ * -fcf-protection=branch and things will go *bang*.
+ *
+ * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0.
+ */
+#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS)
+
+#define HAS_KERNEL_IBT 1
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_X86_64
+#define ASM_ENDBR "endbr64\n\t"
+#else
+#define ASM_ENDBR "endbr32\n\t"
+#endif
+
+#define __noendbr __attribute__((nocf_check))
+
+/*
+ * Create a dummy function pointer reference to prevent objtool from marking
+ * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
+ * apply_seal_endbr()).
+ */
+#define IBT_NOSEAL(fname) \
+ ".pushsection .discard.ibt_endbr_noseal\n\t" \
+ _ASM_PTR fname "\n\t" \
+ ".popsection\n\t"
+
+static __always_inline __attribute_const__ u32 gen_endbr(void)
+{
+ u32 endbr;
+
+ /*
+ * Generate ENDBR64 in a way that is sure to not result in
+ * an ENDBR64 instruction as immediate.
+ */
+ asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t"
+ "not %[endbr]\n\t"
+ : [endbr] "=&r" (endbr) );
+
+ return endbr;
+}
+
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
+{
+ /*
+ * 4 byte NOP that isn't NOP4, such that it will be unique to (former)
+ * ENDBR sites. Additionally it carries UDB as immediate.
+ */
+ return 0xd6401f0f; /* nopl -42(%rax) */
+}
+
+static inline bool __is_endbr(u32 val)
+{
+ if (val == gen_endbr_poison())
+ return true;
+
+ val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
+ return val == gen_endbr();
+}
+
+extern __noendbr bool is_endbr(u32 *val);
+extern __noendbr u64 ibt_save(bool disable);
+extern __noendbr void ibt_restore(u64 save);
+
+#else /* __ASSEMBLER__ */
+
+#ifdef CONFIG_X86_64
+#define ENDBR endbr64
+#else
+#define ENDBR endbr32
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#else /* !IBT */
+
+#define HAS_KERNEL_IBT 0
+
+#ifndef __ASSEMBLER__
+
+#define ASM_ENDBR
+#define IBT_NOSEAL(name)
+
+#define __noendbr
+
+static inline bool is_endbr(u32 *val) { return false; }
+
+static inline u64 ibt_save(bool disable) { return 0; }
+static inline void ibt_restore(u64 save) { }
+
+#else /* __ASSEMBLER__ */
+
+#define ENDBR
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
+#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT)
+
+#endif /* _ASM_X86_IBT_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
new file mode 100644
index 000000000000..3218770670d3
--- /dev/null
+++ b/arch/x86/include/asm/idtentry.h
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IDTENTRY_H
+#define _ASM_X86_IDTENTRY_H
+
+/* Interrupts/Exceptions */
+#include <asm/trapnr.h>
+
+#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT))
+
+#ifndef __ASSEMBLER__
+#include <linux/entry-common.h>
+#include <linux/hardirq.h>
+
+#include <asm/irq_stack.h>
+
+typedef void (*idtentry_t)(struct pt_regs *regs);
+
+/**
+ * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
+ * No error code pushed by hardware
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares four functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the FRED event dispatcher (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it
+ * declares the entry points for usage in C code. There is an ASM variant
+ * as well which is used to emit the entry stubs in entry_32/64.S.
+ */
+#define DECLARE_IDTENTRY(vector, func) \
+ asmlinkage void asm_##func(void); \
+ asmlinkage void xen_asm_##func(void); \
+ void fred_##func(struct pt_regs *regs); \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_IDTENTRY - Emit code for simple IDT entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * irqentry_enter() contains common code which has to be invoked before
+ * arbitrary code in the body. irqentry_exit() contains common code
+ * which has to run before returning to the low level assembly code.
+ */
+#define DEFINE_IDTENTRY(func) \
+static __always_inline void __##func(struct pt_regs *regs); \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ \
+ instrumentation_begin(); \
+ __##func (regs); \
+ instrumentation_end(); \
+ irqentry_exit(regs, state); \
+} \
+ \
+static __always_inline void __##func(struct pt_regs *regs)
+
+/* Special case for 32bit IRET 'trap' */
+#define DECLARE_IDTENTRY_SW DECLARE_IDTENTRY
+#define DEFINE_IDTENTRY_SW DEFINE_IDTENTRY
+
+/**
+ * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points
+ * Error code pushed by hardware
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the
+ * C-handler.
+ */
+#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \
+ asmlinkage void asm_##func(void); \
+ asmlinkage void xen_asm_##func(void); \
+ __visible void func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points
+ * Error code pushed by hardware
+ * @func: Function name of the entry point
+ *
+ * Same as DEFINE_IDTENTRY, but has an extra error_code argument
+ */
+#define DEFINE_IDTENTRY_ERRORCODE(func) \
+static __always_inline void __##func(struct pt_regs *regs, \
+ unsigned long error_code); \
+ \
+__visible noinstr void func(struct pt_regs *regs, \
+ unsigned long error_code) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ \
+ instrumentation_begin(); \
+ __##func (regs, error_code); \
+ instrumentation_end(); \
+ irqentry_exit(regs, state); \
+} \
+ \
+static __always_inline void __##func(struct pt_regs *regs, \
+ unsigned long error_code)
+
+/**
+ * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points
+ * No error code pushed by hardware
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY().
+ */
+#define DECLARE_IDTENTRY_RAW(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * Contrary to DEFINE_IDTENTRY() this does not invoke the
+ * idtentry_enter/exit() helpers before and after the body invocation. This
+ * needs to be done in the body itself if applicable. Use if extra work
+ * is required before the enter/exit() helpers are invoked.
+ */
+#define DEFINE_IDTENTRY_RAW(func) \
+__visible noinstr void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from the FRED event dispatcher with interrupts disabled.
+ *
+ * See @DEFINE_IDTENTRY_RAW for further details.
+ */
+#define DEFINE_FREDENTRY_RAW(func) \
+noinstr void fred_##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points
+ * Error code pushed by hardware
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_ERRORCODE()
+ */
+#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \
+ DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the
+ * irqentry_enter/exit() helpers before and after the body invocation. This
+ * needs to be done in the body itself if applicable. Use if extra work
+ * is required before the enter/exit() helpers are invoked.
+ */
+#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \
+__visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry
+ * points (common/spurious)
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_ERRORCODE()
+ */
+#define DECLARE_IDTENTRY_IRQ(vector, func) \
+ DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points
+ * @func: Function name of the entry point
+ *
+ * The vector number is pushed by the low level entry stub and handed
+ * to the function as error_code argument which needs to be truncated
+ * to an u8 because the push is sign extending.
+ *
+ * irq_enter/exit_rcu() are invoked before the function body and the
+ * KVM L1D flush request is set. Stack switching to the interrupt stack
+ * has to be done in the function body if necessary.
+ */
+#define DEFINE_IDTENTRY_IRQ(func) \
+static void __##func(struct pt_regs *regs, u32 vector); \
+ \
+__visible noinstr void func(struct pt_regs *regs, \
+ unsigned long error_code) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ u32 vector = (u32)(u8)error_code; \
+ \
+ kvm_set_cpu_l1tf_flush_l1d(); \
+ instrumentation_begin(); \
+ run_irq_on_irqstack_cond(__##func, regs, vector); \
+ instrumentation_end(); \
+ irqentry_exit(regs, state); \
+} \
+ \
+static noinline void __##func(struct pt_regs *regs, u32 vector)
+
+/**
+ * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Maps to DECLARE_IDTENTRY().
+ */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points
+ * @func: Function name of the entry point
+ *
+ * irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the
+ * function body. KVM L1D flush request is set.
+ *
+ * Runs the function on the interrupt stack if the entry hit kernel mode
+ */
+#define DEFINE_IDTENTRY_SYSVEC(func) \
+static void __##func(struct pt_regs *regs); \
+ \
+static __always_inline void instr_##func(struct pt_regs *regs) \
+{ \
+ run_sysvec_on_irqstack_cond(__##func, regs); \
+} \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ \
+ kvm_set_cpu_l1tf_flush_l1d(); \
+ instrumentation_begin(); \
+ instr_##func (regs); \
+ instrumentation_end(); \
+ irqentry_exit(regs, state); \
+} \
+ \
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
+static noinline void __##func(struct pt_regs *regs)
+
+/**
+ * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT
+ * entry points
+ * @func: Function name of the entry point
+ *
+ * Runs the function on the interrupted stack. No switch to IRQ stack and
+ * only the minimal __irq_enter/exit() handling.
+ *
+ * Only use for 'empty' vectors like reschedule IPI and KVM posted
+ * interrupt vectors.
+ */
+#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \
+static __always_inline void __##func(struct pt_regs *regs); \
+ \
+static __always_inline void instr_##func(struct pt_regs *regs) \
+{ \
+ __irq_enter_raw(); \
+ __##func (regs); \
+ __irq_exit_raw(); \
+} \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ irqentry_state_t state = irqentry_enter(regs); \
+ \
+ kvm_set_cpu_l1tf_flush_l1d(); \
+ instrumentation_begin(); \
+ instr_##func (regs); \
+ instrumentation_end(); \
+ irqentry_exit(regs, state); \
+} \
+ \
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
+static __always_inline void __##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit
+ * difference
+ */
+#define DECLARE_IDTENTRY_XENCB(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+#ifdef CONFIG_X86_64
+/**
+ * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler
+ * which is called from the ASM entry point on user mode entry
+ */
+#define DECLARE_IDTENTRY_IST(vector, func) \
+ DECLARE_IDTENTRY_RAW(vector, func); \
+ __visible void noist_##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the
+ * safe_stack C handler.
+ */
+#define DECLARE_IDTENTRY_VC(vector, func) \
+ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
+ __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
+ __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DEFINE_IDTENTRY_IST - Emit code for IST entry points
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW
+ */
+#define DEFINE_IDTENTRY_IST(func) \
+ DEFINE_IDTENTRY_RAW(func)
+
+/**
+ * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which
+ * belong to a IST entry point (MCE, DB)
+ * @func: Function name of the entry point. Must be the same as
+ * the function name of the corresponding IST variant
+ *
+ * Maps to DEFINE_IDTENTRY_RAW().
+ */
+#define DEFINE_IDTENTRY_NOIST(func) \
+ DEFINE_IDTENTRY_RAW(noist_##func)
+
+/**
+ * DECLARE_IDTENTRY_DF - Declare functions for double fault
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DECLARE_IDTENTRY_DF(vector, func) \
+ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_DF - Emit code for double fault
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_DF(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+
+/**
+ * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
+ * when raised from kernel mode
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_KERNEL(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
+ * when raised from user mode
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_USER(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
+
+#else /* CONFIG_X86_64 */
+
+/**
+ * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares two functions:
+ * - The ASM entry point: asm_##func
+ * - The C handler called from the C shim
+ */
+#define DECLARE_IDTENTRY_DF(vector, func) \
+ asmlinkage void asm_##func(void); \
+ __visible void func(struct pt_regs *regs, \
+ unsigned long error_code, \
+ unsigned long address)
+
+/**
+ * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit
+ * @func: Function name of the entry point
+ *
+ * This is called through the doublefault shim which already provides
+ * cr2 in the address argument.
+ */
+#define DEFINE_IDTENTRY_DF(func) \
+__visible noinstr void func(struct pt_regs *regs, \
+ unsigned long error_code, \
+ unsigned long address)
+
+#endif /* !CONFIG_X86_64 */
+
+/* C-Code mapping */
+#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
+#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
+#define DEFINE_FREDENTRY_NMI DEFINE_FREDENTRY_RAW
+
+#ifdef CONFIG_X86_64
+#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_MCE DEFINE_FREDENTRY_RAW
+
+#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW
+#endif
+
+void idt_install_sysvec(unsigned int n, const void *function);
+void fred_install_sysvec(unsigned int vector, const idtentry_t function);
+
+#define sysvec_install(vector, function) { \
+ if (IS_ENABLED(CONFIG_X86_FRED)) \
+ fred_install_sysvec(vector, function); \
+ if (!cpu_feature_enabled(X86_FEATURE_FRED)) \
+ idt_install_sysvec(vector, asm_##function); \
+}
+
+#else /* !__ASSEMBLER__ */
+
+/*
+ * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs.
+ */
+#define DECLARE_IDTENTRY(vector, func) \
+ idtentry vector asm_##func func has_error_code=0
+
+#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \
+ idtentry vector asm_##func func has_error_code=1
+
+/* Special case for 32bit IRET 'trap'. Do not emit ASM code */
+#define DECLARE_IDTENTRY_SW(vector, func)
+
+#define DECLARE_IDTENTRY_RAW(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \
+ DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/* Entries for common/spurious (device) interrupts */
+#define DECLARE_IDTENTRY_IRQ(vector, func) \
+ idtentry_irq vector func
+
+/* System vector entries */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+#ifdef CONFIG_X86_64
+# define DECLARE_IDTENTRY_MCE(vector, func) \
+ idtentry_mce_db vector asm_##func func
+
+# define DECLARE_IDTENTRY_DEBUG(vector, func) \
+ idtentry_mce_db vector asm_##func func
+
+# define DECLARE_IDTENTRY_DF(vector, func) \
+ idtentry_df vector asm_##func func
+
+# define DECLARE_IDTENTRY_XENCB(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+# define DECLARE_IDTENTRY_VC(vector, func) \
+ idtentry_vc vector asm_##func func
+
+#else
+# define DECLARE_IDTENTRY_MCE(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+/* No ASM emitted for DF as this goes through a C shim */
+# define DECLARE_IDTENTRY_DF(vector, func)
+
+/* No ASM emitted for XEN hypervisor callback */
+# define DECLARE_IDTENTRY_XENCB(vector, func)
+
+#endif
+
+/* No ASM code emitted for NMI */
+#define DECLARE_IDTENTRY_NMI(vector, func)
+
+/*
+ * ASM code to emit the common vector entry stubs where each stub is
+ * packed into IDT_ALIGN bytes.
+ *
+ * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
+ * GCC treats the local vector variable as unsigned int and would expand
+ * all vectors above 0x7F to a 5 byte push. The original code did an
+ * adjustment of the vector number to be in the signed byte range to avoid
+ * this. While clever it's mindboggling counterintuitive and requires the
+ * odd conversion back to a real vector number in the C entry points. Using
+ * .byte achieves the same thing and the only fixup needed in the C entry
+ * point is to mask off the bits above bit 7 because the push is sign
+ * extending.
+ */
+ .align IDT_ALIGN
+SYM_CODE_START(irq_entries_start)
+ vector=FIRST_EXTERNAL_VECTOR
+ .rept NR_EXTERNAL_VECTORS
+ UNWIND_HINT_IRET_REGS
+0 :
+ ENDBR
+ .byte 0x6a, vector
+ jmp asm_common_interrupt
+ /* Ensure that the above is IDT_ALIGN bytes max */
+ .fill 0b + IDT_ALIGN - ., 1, 0xcc
+ vector = vector+1
+ .endr
+SYM_CODE_END(irq_entries_start)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align IDT_ALIGN
+SYM_CODE_START(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept NR_SYSTEM_VECTORS
+ UNWIND_HINT_IRET_REGS
+0 :
+ ENDBR
+ .byte 0x6a, vector
+ jmp asm_spurious_interrupt
+ /* Ensure that the above is IDT_ALIGN bytes max */
+ .fill 0b + IDT_ALIGN - ., 1, 0xcc
+ vector = vector+1
+ .endr
+SYM_CODE_END(spurious_entries_start)
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * The actual entry points. Note that DECLARE_IDTENTRY*() serves two
+ * purposes:
+ * - provide the function declarations when included from C-Code
+ * - emit the ASM stubs when included from entry_32/64.S
+ *
+ * This avoids duplicate defines and ensures that everything is consistent.
+ */
+
+/*
+ * Dummy trap number so the low level ASM macro vector number checks do not
+ * match which results in emitting plain IDTENTRY stubs without bells and
+ * whistles.
+ */
+#define X86_TRAP_OTHER 0xFFFF
+
+/* Simple exception entry points. No hardware error code */
+DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error);
+DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow);
+DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds);
+DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available);
+DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun);
+DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug);
+DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error);
+DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error);
+
+/* 32bit software IRET trap. Do not emit ASM code */
+DECLARE_IDTENTRY_SW(X86_TRAP_IRET, iret_error);
+
+/* Simple exception entries with error code pushed by hardware */
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check);
+
+/* Raw exception entries which need extra work */
+DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
+
+#if defined(CONFIG_IA32_EMULATION)
+DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation);
+#endif
+
+#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_64
+DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check);
+#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);
+#endif
+#endif
+
+/* NMI */
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special entry point for VMX which invokes this on the kernel stack, even for
+ * 64-bit, i.e. without using an IST. asm_exc_nmi() requires an IST to work
+ * correctly vs. the NMI 'executing' marker. Used for 32-bit kernels as well
+ * to avoid more ifdeffery.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_kvm_vmx);
+#endif
+
+DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
+#endif
+
+/* #DB */
+#ifdef CONFIG_X86_64
+DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug);
+#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug);
+#endif
+
+/* #DF */
+DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
+#endif
+
+/* #CP */
+#ifdef CONFIG_X86_CET
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
+#endif
+
+/* #VC */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
+#endif
+
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
+DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap);
+#endif
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE, exc_virtualization_exception);
+#endif
+
+/* Device interrupts common/spurious */
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt);
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt);
+#endif
+
+/* System vector entry points */
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt);
+DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt);
+DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt);
+DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi);
+#endif
+
+#ifdef CONFIG_SMP
+DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi);
+DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
+#else
+# define fred_sysvec_reschedule_ipi NULL
+# define fred_sysvec_reboot NULL
+# define fred_sysvec_call_function_single NULL
+# define fred_sysvec_call_function NULL
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+# ifdef CONFIG_X86_MCE_THRESHOLD
+DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
+# else
+# define fred_sysvec_threshold NULL
+# endif
+
+# ifdef CONFIG_X86_MCE_AMD
+DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error);
+# else
+# define fred_sysvec_deferred_error NULL
+# endif
+
+# ifdef CONFIG_X86_THERMAL_VECTOR
+DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal);
+# else
+# define fred_sysvec_thermal NULL
+# endif
+
+# ifdef CONFIG_IRQ_WORK
+DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
+# else
+# define fred_sysvec_irq_work NULL
+# endif
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
+#else
+# define fred_sysvec_kvm_posted_intr_ipi NULL
+# define fred_sysvec_kvm_posted_intr_wakeup_ipi NULL
+# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
+#endif
+
+# ifdef CONFIG_X86_POSTED_MSI
+DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification);
+#else
+# define fred_sysvec_posted_msi_notification NULL
+# endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+#endif
+
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
+#endif
+
+#ifdef CONFIG_XEN_PVHVM
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
+#endif
+
+#ifdef CONFIG_KVM_GUEST
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
+#endif
+
+#undef X86_TRAP_OTHER
+
+#endif
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index ebea2c9d2cdc..0d1dbf235679 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* imr.h: Isolated Memory Region API
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _IMR_H
#define _IMR_H
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 1c78580e58be..1b3060a3425c 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -1,26 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INAT_H
#define _ASM_X86_INAT_H
/*
* x86 instruction attributes
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
-#include <asm/inat_types.h>
+#include <asm/inat_types.h> /* __ignore_sync_check__ */
/*
* Internal bits. Don't use bitmasks directly, because these bits are
@@ -49,6 +35,10 @@
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
#define INAT_PFX_EVEX 15 /* EVEX prefix */
+/* x86-64 REX2 prefix */
+#define INAT_PFX_REX2 16 /* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP 17 /* 0x8F */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -64,7 +54,7 @@
/* Legacy prefix */
#define INAT_PFX_OFFS 0
-#define INAT_PFX_BITS 4
+#define INAT_PFX_BITS 5
#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
/* Escape opcodes */
@@ -89,8 +79,13 @@
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK INAT_VEXOK
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
+#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
+#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -119,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+ insn_byte_t map_select);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -142,6 +139,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
}
+static inline int inat_is_rex2_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX2;
+}
+
static inline int inat_last_prefix_id(insn_attr_t attr)
{
if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
@@ -167,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
@@ -232,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
return attr & INAT_VEXOK;
}
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+ return attr & INAT_XOPOK;
+}
+
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
@@ -241,4 +253,14 @@ static inline int inat_must_evex(insn_attr_t attr)
{
return attr & INAT_EVEXONLY;
}
+
+static inline int inat_evex_scalable(insn_attr_t attr)
+{
+ return attr & INAT_EVEX_SCALABLE;
+}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+ return attr & INAT_INV64;
+}
#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
index cb3c20ce39cf..b047efa9ddc2 100644
--- a/arch/x86/include/asm/inat_types.h
+++ b/arch/x86/include/asm/inat_types.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INAT_TYPES_H
#define _ASM_X86_INAT_TYPES_H
/*
* x86 instruction attributes
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
/* Instruction attributes */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 5f1d3c421f68..01ccdd168df0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,6 +4,7 @@
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void (*free_pgt_page)(void *, void *); /* free buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
@@ -14,4 +15,6 @@ struct x86_mapping_info {
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend);
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 2b6ccf2c49f1..4733e9064ee5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -15,9 +15,35 @@
#define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf)
#define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4))
+int pt_regs_offset(struct pt_regs *regs, int regno);
+
+bool insn_has_rep_prefix(struct insn *insn);
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
+int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
+int insn_fetch_from_user(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
+bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size);
+
+enum insn_mmio_type {
+ INSN_MMIO_DECODE_FAILED,
+ INSN_MMIO_WRITE,
+ INSN_MMIO_WRITE_IMM,
+ INSN_MMIO_READ,
+ INSN_MMIO_READ_ZERO_EXTEND,
+ INSN_MMIO_READ_SIGN_EXTEND,
+ INSN_MMIO_MOVS,
+};
+
+enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+
+bool insn_is_nop(struct insn *insn);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index c2c01f84df75..846d21c1a7f8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -1,27 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INSN_H
#define _ASM_X86_INSN_H
/*
* x86 instruction analysis
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2009
*/
+#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
-#include <asm/inat.h>
+#include <asm/inat.h> /* __ignore_sync_check__ */
+
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
struct insn_field {
union {
@@ -33,13 +23,58 @@ struct insn_field {
unsigned char nbytes;
};
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+ insn_value_t value;
+ union {
+ insn_value_t little;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+ p->value = v;
+ p->little = __cpu_to_le32(v);
+ p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+ insn_byte_t v)
+{
+ p->bytes[n] = v;
+ p->value = __le32_to_cpu(p->little);
+}
+#endif
+
struct insn {
struct insn_field prefixes; /*
* Prefixes
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
- struct insn_field vex_prefix; /* VEX prefix */
+ union {
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field xop_prefix; /* XOP prefix */
+ };
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
@@ -58,6 +93,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
+ int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@@ -79,10 +115,15 @@ struct insn {
#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
#define X86_SIB_BASE(sib) ((sib) & 0x07)
-#define X86_REX_W(rex) ((rex) & 8)
-#define X86_REX_R(rex) ((rex) & 4)
-#define X86_REX_X(rex) ((rex) & 2)
-#define X86_REX_B(rex) ((rex) & 1)
+#define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */
+#define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */
+#define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */
+#define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */
+
+#define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */
+#define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */
+#define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */
+#define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */
/* VEX bit flags */
#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
@@ -91,21 +132,44 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */
+#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */
+#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */
+#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */
+#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */
+#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */
+#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */
+#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */
+#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */
+#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+ INSN_MODE_32,
+ INSN_MODE_64,
+ /* Mode is determined by the current kernel build. */
+ INSN_MODE_KERN,
+ INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
@@ -116,18 +180,19 @@ static inline void insn_get_attribute(struct insn *insn)
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
- const void *kaddr, int buf_len)
+static inline int insn_is_rex2(struct insn *insn)
{
-#ifdef CONFIG_X86_64
- insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
- insn_init(insn, kaddr, buf_len, 0);
-#endif
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return insn->rex_prefix.nbytes == 2;
}
-static inline int insn_is_avx(struct insn *insn)
+static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
+{
+ return X86_REX2_M(insn->rex_prefix.bytes[1]);
+}
+
+static inline int insn_is_avx_or_xop(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
@@ -141,11 +206,25 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+ insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+ return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+ if (!insn_is_avx_or_xop(insn))
+ return 0;
+
+ return avx_insn_is_xop(insn);
+}
+
+static inline int insn_has_emulate_prefix(struct insn *insn)
{
- return insn->opcode.got && insn->modrm.got && insn->sib.got &&
- insn->displacement.got && insn->immediate.got;
+ return !!insn->emulate_prefix_size;
}
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
@@ -166,11 +245,33 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes < 3)
+ return 0;
+ return X86_VEX_W(insn->vex_prefix.bytes[2]);
+}
+
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+ if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */
+ return 0;
+ return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+ return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
- if (insn_is_avx(insn))
+ if (insn_is_avx_or_xop(insn)) {
+ if (avx_insn_is_xop(insn))
+ return insn_xop_p_bits(insn);
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+ }
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
@@ -208,6 +309,20 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, prefix) \
+ for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index f5a796da07f8..e48a00b3311d 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -6,13 +6,12 @@
#ifndef X86_ASM_INST_H
#define X86_ASM_INST_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define REG_NUM_INVALID 100
#define REG_TYPE_R32 0
#define REG_TYPE_R64 1
-#define REG_TYPE_XMM 2
#define REG_TYPE_INVALID 100
.macro R32_NUM opd r32
@@ -123,77 +122,18 @@
#endif
.endm
- .macro XMM_NUM opd xmm
- \opd = REG_NUM_INVALID
- .ifc \xmm,%xmm0
- \opd = 0
- .endif
- .ifc \xmm,%xmm1
- \opd = 1
- .endif
- .ifc \xmm,%xmm2
- \opd = 2
- .endif
- .ifc \xmm,%xmm3
- \opd = 3
- .endif
- .ifc \xmm,%xmm4
- \opd = 4
- .endif
- .ifc \xmm,%xmm5
- \opd = 5
- .endif
- .ifc \xmm,%xmm6
- \opd = 6
- .endif
- .ifc \xmm,%xmm7
- \opd = 7
- .endif
- .ifc \xmm,%xmm8
- \opd = 8
- .endif
- .ifc \xmm,%xmm9
- \opd = 9
- .endif
- .ifc \xmm,%xmm10
- \opd = 10
- .endif
- .ifc \xmm,%xmm11
- \opd = 11
- .endif
- .ifc \xmm,%xmm12
- \opd = 12
- .endif
- .ifc \xmm,%xmm13
- \opd = 13
- .endif
- .ifc \xmm,%xmm14
- \opd = 14
- .endif
- .ifc \xmm,%xmm15
- \opd = 15
- .endif
- .endm
-
.macro REG_TYPE type reg
R32_NUM reg_type_r32 \reg
R64_NUM reg_type_r64 \reg
- XMM_NUM reg_type_xmm \reg
.if reg_type_r64 <> REG_NUM_INVALID
\type = REG_TYPE_R64
.elseif reg_type_r32 <> REG_NUM_INVALID
\type = REG_TYPE_R32
- .elseif reg_type_xmm <> REG_NUM_INVALID
- \type = REG_TYPE_XMM
.else
\type = REG_TYPE_INVALID
.endif
.endm
- .macro PFX_OPD_SIZE
- .byte 0x66
- .endm
-
.macro PFX_REX opd1 opd2 W=0
.if ((\opd1 | \opd2) & 8) || \W
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
@@ -203,109 +143,6 @@
.macro MODRM mod opd1 opd2
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
.endm
-
- .macro PSHUFB_XMM xmm1 xmm2
- XMM_NUM pshufb_opd1 \xmm1
- XMM_NUM pshufb_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX pshufb_opd1 pshufb_opd2
- .byte 0x0f, 0x38, 0x00
- MODRM 0xc0 pshufb_opd1 pshufb_opd2
- .endm
-
- .macro PCLMULQDQ imm8 xmm1 xmm2
- XMM_NUM clmul_opd1 \xmm1
- XMM_NUM clmul_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX clmul_opd1 clmul_opd2
- .byte 0x0f, 0x3a, 0x44
- MODRM 0xc0 clmul_opd1 clmul_opd2
- .byte \imm8
- .endm
-
- .macro PEXTRD imm8 xmm gpr
- R32_NUM extrd_opd1 \gpr
- XMM_NUM extrd_opd2 \xmm
- PFX_OPD_SIZE
- PFX_REX extrd_opd1 extrd_opd2
- .byte 0x0f, 0x3a, 0x16
- MODRM 0xc0 extrd_opd1 extrd_opd2
- .byte \imm8
- .endm
-
- .macro AESKEYGENASSIST rcon xmm1 xmm2
- XMM_NUM aeskeygen_opd1 \xmm1
- XMM_NUM aeskeygen_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aeskeygen_opd1 aeskeygen_opd2
- .byte 0x0f, 0x3a, 0xdf
- MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
- .byte \rcon
- .endm
-
- .macro AESIMC xmm1 xmm2
- XMM_NUM aesimc_opd1 \xmm1
- XMM_NUM aesimc_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesimc_opd1 aesimc_opd2
- .byte 0x0f, 0x38, 0xdb
- MODRM 0xc0 aesimc_opd1 aesimc_opd2
- .endm
-
- .macro AESENC xmm1 xmm2
- XMM_NUM aesenc_opd1 \xmm1
- XMM_NUM aesenc_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesenc_opd1 aesenc_opd2
- .byte 0x0f, 0x38, 0xdc
- MODRM 0xc0 aesenc_opd1 aesenc_opd2
- .endm
-
- .macro AESENCLAST xmm1 xmm2
- XMM_NUM aesenclast_opd1 \xmm1
- XMM_NUM aesenclast_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesenclast_opd1 aesenclast_opd2
- .byte 0x0f, 0x38, 0xdd
- MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
- .endm
-
- .macro AESDEC xmm1 xmm2
- XMM_NUM aesdec_opd1 \xmm1
- XMM_NUM aesdec_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesdec_opd1 aesdec_opd2
- .byte 0x0f, 0x38, 0xde
- MODRM 0xc0 aesdec_opd1 aesdec_opd2
- .endm
-
- .macro AESDECLAST xmm1 xmm2
- XMM_NUM aesdeclast_opd1 \xmm1
- XMM_NUM aesdeclast_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesdeclast_opd1 aesdeclast_opd2
- .byte 0x0f, 0x38, 0xdf
- MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
- .endm
-
- .macro MOVQ_R64_XMM opd1 opd2
- REG_TYPE movq_r64_xmm_opd1_type \opd1
- .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
- XMM_NUM movq_r64_xmm_opd1 \opd1
- R64_NUM movq_r64_xmm_opd2 \opd2
- .else
- R64_NUM movq_r64_xmm_opd1 \opd1
- XMM_NUM movq_r64_xmm_opd2 \opd2
- .endif
- PFX_OPD_SIZE
- PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
- .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
- .byte 0x0f, 0x7e
- .else
- .byte 0x0f, 0x6e
- .endif
- MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
- .endm
#endif
#endif
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0dd6b0f4000e..950bfd006905 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -5,89 +5,223 @@
/*
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
- * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E.
- *
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
+ *
+ * The defined symbol names have the following form:
+ * INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * where:
+ * OPTFAMILY Describes the family of CPUs that this belongs to. Default
+ * is assumed to be "_CORE" (and should be omitted). Other values
+ * currently in use are _ATOM and _XEON_PHI
+ * MICROARCH Is the code name for the micro-architecture for this core.
+ * N.B. Not the platform name.
+ * OPTDIFF If needed, a short string to differentiate by market segment.
+ *
+ * Common OPTDIFFs:
+ *
+ * - regular client parts
+ * _L - regular mobile parts
+ * _G - parts with extra graphics on
+ * _X - regular server parts
+ * _D - micro server parts
+ * _N,_P - other mobile parts
+ * _H - premium mobile parts
+ * _S - other client parts
+ *
+ * Historical OPTDIFFs:
+ *
+ * _EP - 2 socket server parts
+ * _EX - 4+ socket server parts
+ *
+ * The #define line may optionally include a comment including platform or core
+ * names. An exception is made for skylake/kabylake where steppings seem to have gotten
+ * their own names :-(
*/
-#define INTEL_FAM6_CORE_YONAH 0x0E
+#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
+
+/* Wildcard match so X86_MATCH_VFM(ANY) works */
+#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
+
+/* Family 5 */
+#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */
+#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */
+#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */
+#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+
+/* Family 6, 18, 19 */
+#define INTEL_PENTIUM_PRO IFM(6, 0x01)
+#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03)
+#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05)
+#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B)
+#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D)
+
+#define INTEL_CORE_YONAH IFM(6, 0x0E)
+
+#define INTEL_CORE2_MEROM IFM(6, 0x0F)
+#define INTEL_CORE2_MEROM_L IFM(6, 0x16)
+#define INTEL_CORE2_PENRYN IFM(6, 0x17)
+#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D)
+
+#define INTEL_NEHALEM IFM(6, 0x1E)
+#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */
+#define INTEL_NEHALEM_EP IFM(6, 0x1A)
+#define INTEL_NEHALEM_EX IFM(6, 0x2E)
+
+#define INTEL_WESTMERE IFM(6, 0x25)
+#define INTEL_WESTMERE_EP IFM(6, 0x2C)
+#define INTEL_WESTMERE_EX IFM(6, 0x2F)
+
+#define INTEL_SANDYBRIDGE IFM(6, 0x2A)
+#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D)
+#define INTEL_IVYBRIDGE IFM(6, 0x3A)
+#define INTEL_IVYBRIDGE_X IFM(6, 0x3E)
+
+#define INTEL_HASWELL IFM(6, 0x3C)
+#define INTEL_HASWELL_X IFM(6, 0x3F)
+#define INTEL_HASWELL_L IFM(6, 0x45)
+#define INTEL_HASWELL_G IFM(6, 0x46)
+
+#define INTEL_BROADWELL IFM(6, 0x3D)
+#define INTEL_BROADWELL_G IFM(6, 0x47)
+#define INTEL_BROADWELL_X IFM(6, 0x4F)
+#define INTEL_BROADWELL_D IFM(6, 0x56)
+
+#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */
+#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */
+#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */
+/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
+/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
+
+#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */
+/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
+/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
+/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
+
+#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */
+/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
+
+#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */
+#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */
+
+#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */
-#define INTEL_FAM6_CORE2_MEROM 0x0F
-#define INTEL_FAM6_CORE2_MEROM_L 0x16
-#define INTEL_FAM6_CORE2_PENRYN 0x17
-#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
+#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */
+#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */
+#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */
+#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */
+#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */
-#define INTEL_FAM6_NEHALEM 0x1E
-#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
-#define INTEL_FAM6_NEHALEM_EP 0x1A
-#define INTEL_FAM6_NEHALEM_EX 0x2E
+#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */
-#define INTEL_FAM6_WESTMERE 0x25
-#define INTEL_FAM6_WESTMERE_EP 0x2C
-#define INTEL_FAM6_WESTMERE_EX 0x2F
+#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */
+#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */
-#define INTEL_FAM6_SANDYBRIDGE 0x2A
-#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
-#define INTEL_FAM6_IVYBRIDGE 0x3A
-#define INTEL_FAM6_IVYBRIDGE_X 0x3E
+#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
-#define INTEL_FAM6_HASWELL_CORE 0x3C
-#define INTEL_FAM6_HASWELL_X 0x3F
-#define INTEL_FAM6_HASWELL_ULT 0x45
-#define INTEL_FAM6_HASWELL_GT3E 0x46
+#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) /* Raptor Cove */
-#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_GT3E 0x47
-#define INTEL_FAM6_BROADWELL_X 0x4F
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */
+#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
-#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
-#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
-#define INTEL_FAM6_SKYLAKE_X 0x55
-#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
-#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
+#define INTEL_DIAMONDRAPIDS_X IFM(19, 0x01) /* Panther Cove */
-#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */
-/* "Small Core" Processors (Atom) */
+/* "Hybrid" Processors (P-Core/E-Core) */
-#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
-#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
-#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
-#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
-#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
+#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */
+#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
-#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
+#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
+#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
+#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
-#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
+#define INTEL_METEORLAKE IFM(6, 0xAC) /* Redwood Cove / Crestmont */
+#define INTEL_METEORLAKE_L IFM(6, 0xAA)
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
-#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+#define INTEL_ARROWLAKE_H IFM(6, 0xC5) /* Lion Cove / Skymont */
+#define INTEL_ARROWLAKE IFM(6, 0xC6)
+#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
+
+#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */
+
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */
+
+#define INTEL_WILDCATLAKE_L IFM(6, 0xD5)
+
+#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */
+#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */
+
+/* "Small Core" Processors (Atom/E-Core) */
+
+#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
+#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */
+
+#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */
+#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */
+#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */
+
+#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
+#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
+#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
+#define INTEL_ATOM_SILVERMONT_MID2 IFM(6, 0x5A) /* Anniedale */
+
+#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
+#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
+
+#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
+#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */
+
+/* Note: the micro-architecture is "Goldmont Plus" */
+#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */
+
+#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */
+#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */
+#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */
+
+#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */
+
+#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */
+#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */
+
+#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */
/* Xeon Phi */
-#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
-#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
-
-/* Useful macros */
-#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
-{ \
- .vendor = X86_VENDOR_INTEL, \
- .family = _family, \
- .model = _model, \
- .feature = X86_FEATURE_ANY, \
- .driver_data = (kernel_ulong_t)&_driver_data \
-}
-
-#define INTEL_CPU_FAM6(_model, _driver_data) \
- INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
+#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
+#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
+
+/* Notational marker denoting the last Family 6 model */
+#define INTEL_FAM6_LAST IFM(6, 0xFF)
+
+/* Family 15 - NetBurst */
+#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */
+#define INTEL_P4_PRESCOTT IFM(15, 0x03)
+#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04)
+#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */
+
+/*
+ * Intel CPU core types
+ *
+ * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
+ * of the core. Bits 31-24 indicates its core type (Core or Atom)
+ * and Bits [23:0] indicates the native model ID of the core.
+ * Core type and native model ID are defined in below enumerations.
+ */
+enum intel_cpu_type {
+ INTEL_CPU_TYPE_UNKNOWN,
+ INTEL_CPU_TYPE_ATOM = 0x20,
+ INTEL_CPU_TYPE_CORE = 0x40,
+};
+
+enum intel_native_id {
+ INTEL_ATOM_CMT_NATIVE_ID = 0x2, /* Crestmont */
+ INTEL_ATOM_SKT_NATIVE_ID = 0x3, /* Skymont */
+};
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 52f815a80539..a3abdcd89a32 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -1,19 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * intel-mid.h: Intel MID specific setup code
+ * Intel MID specific setup code
*
- * (C) Copyright 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
+ * (C) Copyright 2009, 2021 Intel Corporation
*/
#ifndef _ASM_X86_INTEL_MID_H
#define _ASM_X86_INTEL_MID_H
-#include <linux/sfi.h>
#include <linux/pci.h>
-#include <linux/platform_device.h>
extern int intel_mid_pci_init(void);
extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
@@ -26,109 +20,4 @@ extern void intel_mid_pwr_power_off(void);
extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev);
-extern int get_gpio_by_name(const char *name);
-extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
-extern int __init sfi_parse_mtmr(struct sfi_table_header *table);
-extern int sfi_mrtc_num;
-extern struct sfi_rtc_table_entry sfi_mrtc_array[];
-
-/*
- * Here defines the array of devices platform data that IAFW would export
- * through SFI "DEVS" table, we use name and type to match the device and
- * its platform data.
- */
-struct devs_id {
- char name[SFI_NAME_LEN + 1];
- u8 type;
- u8 delay;
- u8 msic;
- void *(*get_platform_data)(void *info);
-};
-
-#define sfi_device(i) \
- static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
- __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
-
-/**
-* struct mid_sd_board_info - template for SD device creation
-* @name: identifies the driver
-* @bus_num: board-specific identifier for a given SD controller
-* @max_clk: the maximum frequency device supports
-* @platform_data: the particular data stored there is driver-specific
-*/
-struct mid_sd_board_info {
- char name[SFI_NAME_LEN];
- int bus_num;
- unsigned short addr;
- u32 max_clk;
- void *platform_data;
-};
-
-/*
- * Medfield is the follow-up of Moorestown, it combines two chip solution into
- * one. Other than that it also added always-on and constant tsc and lapic
- * timers. Medfield is the platform name, and the chip name is called Penwell
- * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be
- * identified via MSRs.
- */
-enum intel_mid_cpu_type {
- /* 1 was Moorestown */
- INTEL_MID_CPU_CHIP_PENWELL = 2,
- INTEL_MID_CPU_CHIP_CLOVERVIEW,
- INTEL_MID_CPU_CHIP_TANGIER,
-};
-
-extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
-
-#ifdef CONFIG_X86_INTEL_MID
-
-static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
-{
- return __intel_mid_cpu_chip;
-}
-
-static inline bool intel_mid_has_msic(void)
-{
- return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
-}
-
-#else /* !CONFIG_X86_INTEL_MID */
-
-#define intel_mid_identify_cpu() 0
-#define intel_mid_has_msic() 0
-
-#endif /* !CONFIG_X86_INTEL_MID */
-
-enum intel_mid_timer_options {
- INTEL_MID_TIMER_DEFAULT,
- INTEL_MID_TIMER_APBT_ONLY,
- INTEL_MID_TIMER_LAPIC_APBT,
-};
-
-extern enum intel_mid_timer_options intel_mid_timer_options;
-
-/* Bus Select SoC Fuse value */
-#define BSEL_SOC_FUSE_MASK 0x7
-/* FSB 133MHz */
-#define BSEL_SOC_FUSE_001 0x1
-/* FSB 100MHz */
-#define BSEL_SOC_FUSE_101 0x5
-/* FSB 83MHz */
-#define BSEL_SOC_FUSE_111 0x7
-
-#define SFI_MTMR_MAX_NUM 8
-#define SFI_MRTC_MAX 8
-
-extern void intel_scu_devices_create(void);
-extern void intel_scu_devices_destroy(void);
-
-/* VRTC timer */
-#define MRST_VRTC_MAP_SZ 1024
-/* #define MRST_VRTC_PGOFFSET 0xc00 */
-
-extern void intel_mid_rtc_init(void);
-
-/* The offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET 0x100
-
#endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index ae26df1c2789..695f87efbeb8 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -4,11 +4,21 @@
#include <linux/percpu-defs.h>
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SHIFT 4
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
+
+/*
+ * The largest PEBS record could consume a page, ensure
+ * a record at least can be written after triggering PMI.
+ */
+#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
+#define ARCH_PEBS_THRESH_SINGLE 1
/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
-#define MAX_FIXED_PEBS_EVENTS 3
+#define MAX_PEBS_EVENTS_FMT4 8
+#define MAX_PEBS_EVENTS 32
+#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
+#define MAX_FIXED_PEBS_EVENTS 16
/*
* A debug store configuration.
diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
deleted file mode 100644
index 0b44b1abe4d9..000000000000
--- a/arch/x86/include/asm/intel_mid_vrtc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _INTEL_MID_VRTC_H
-#define _INTEL_MID_VRTC_H
-
-extern unsigned char vrtc_cmos_read(unsigned char reg);
-extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern void vrtc_get_time(struct timespec64 *now);
-extern int vrtc_set_mmss(const struct timespec64 *now);
-
-#endif
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
deleted file mode 100644
index 3cb002b1d0f9..000000000000
--- a/arch/x86/include/asm/intel_pconfig.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_X86_INTEL_PCONFIG_H
-#define _ASM_X86_INTEL_PCONFIG_H
-
-#include <asm/asm.h>
-#include <asm/processor.h>
-
-enum pconfig_target {
- INVALID_TARGET = 0,
- MKTME_TARGET = 1,
- PCONFIG_TARGET_NR
-};
-
-int pconfig_target_supported(enum pconfig_target target);
-
-enum pconfig_leaf {
- MKTME_KEY_PROGRAM = 0,
- PCONFIG_LEAF_INVALID,
-};
-
-#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
-
-/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
-
-/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
-#define MKTME_KEYID_SET_KEY_DIRECT 0
-#define MKTME_KEYID_SET_KEY_RANDOM 1
-#define MKTME_KEYID_CLEAR_KEY 2
-#define MKTME_KEYID_NO_ENCRYPT 3
-
-/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
-#define MKTME_AES_XTS_128 (1 << 8)
-
-/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
-#define MKTME_PROG_SUCCESS 0
-#define MKTME_INVALID_PROG_CMD 1
-#define MKTME_ENTROPY_ERROR 2
-#define MKTME_INVALID_KEYID 3
-#define MKTME_INVALID_ENC_ALG 4
-#define MKTME_DEVICE_BUSY 5
-
-/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
-struct mktme_key_program {
- u16 keyid;
- u32 keyid_ctrl;
- u8 __rsvd[58];
- u8 key_field_1[64];
- u8 key_field_2[64];
-} __packed __aligned(256);
-
-static inline int mktme_key_program(struct mktme_key_program *key_program)
-{
- unsigned long rax = MKTME_KEY_PROGRAM;
-
- if (!pconfig_target_supported(MKTME_TARGET))
- return -ENXIO;
-
- asm volatile(PCONFIG
- : "=a" (rax), "=b" (key_program)
- : "0" (rax), "1" (key_program)
- : "memory", "cc");
-
- return rax;
-}
-
-#endif /* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
deleted file mode 100644
index 9e7adcdbe031..000000000000
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_PMC_IPC_H_
-#define _ASM_X86_INTEL_PMC_IPC_H_
-
-/* Commands */
-#define PMC_IPC_PMIC_ACCESS 0xFF
-#define PMC_IPC_PMIC_ACCESS_READ 0x0
-#define PMC_IPC_PMIC_ACCESS_WRITE 0x1
-#define PMC_IPC_USB_PWR_CTRL 0xF0
-#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF
-#define PMC_IPC_PHY_CONFIG 0xEE
-#define PMC_IPC_NORTHPEAK_CTRL 0xED
-#define PMC_IPC_PM_DEBUG 0xEC
-#define PMC_IPC_PMC_TELEMTRY 0xEB
-#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA
-
-/* IPC return code */
-#define IPC_ERR_NONE 0
-#define IPC_ERR_CMD_NOT_SUPPORTED 1
-#define IPC_ERR_CMD_NOT_SERVICED 2
-#define IPC_ERR_UNABLE_TO_SERVICE 3
-#define IPC_ERR_CMD_INVALID 4
-#define IPC_ERR_CMD_FAILED 5
-#define IPC_ERR_EMSECURITY 6
-#define IPC_ERR_UNSIGNEDKERNEL 7
-
-/* GCR reg offsets from gcr base*/
-#define PMC_GCR_PMC_CFG_REG 0x08
-#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78
-#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80
-
-#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
-
-int intel_pmc_ipc_simple_command(int cmd, int sub);
-int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen, u32 dptr, u32 sptr);
-int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen);
-int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read(u32 offset, u32 *data);
-int intel_pmc_gcr_read64(u32 offset, u64 *data);
-int intel_pmc_gcr_write(u32 offset, u32 data);
-int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
-
-#else
-
-static inline int intel_pmc_ipc_simple_command(int cmd, int sub)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen, u32 dptr, u32 sptr)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_s0ix_counter_read(u64 *data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_write(u32 offset, u32 data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
-{
- return -EINVAL;
-}
-
-#endif /*CONFIG_INTEL_PMC_IPC*/
-
-#endif
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..c796e9bc98b6 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -3,7 +3,7 @@
#define _ASM_X86_INTEL_PT_H
#define PT_CPUID_LEAVES 2
-#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+#define PT_CPUID_REGS_NUM 4 /* number of registers (eax, ebx, ecx, edx) */
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
@@ -13,6 +13,8 @@ enum pt_capabilities {
PT_CAP_mtc,
PT_CAP_ptwrite,
PT_CAP_power_event_trace,
+ PT_CAP_event_trace,
+ PT_CAP_tnt_disable,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
@@ -28,10 +30,12 @@ enum pt_capabilities {
void cpu_emergency_stop_pt(void);
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
#else
static inline void cpu_emergency_stop_pt(void) {}
static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
#endif
#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h
index ce16da719596..1f9b5d225912 100644
--- a/arch/x86/include/asm/intel_punit_ipc.h
+++ b/arch/x86/include/asm/intel_punit_ipc.h
@@ -80,17 +80,10 @@ typedef enum {
#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC)
-int intel_punit_ipc_simple_command(int cmd, int para1, int para2);
int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out);
#else
-static inline int intel_punit_ipc_simple_command(int cmd,
- int para1, int para2)
-{
- return -ENODEV;
-}
-
static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2,
u32 *in, u32 *out)
{
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
deleted file mode 100644
index 4a8c6e817398..000000000000
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_SCU_IPC_H_
-#define _ASM_X86_INTEL_SCU_IPC_H_
-
-#include <linux/notifier.h>
-
-#define IPCMSG_INDIRECT_READ 0x02
-#define IPCMSG_INDIRECT_WRITE 0x05
-
-#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
-
-#define IPCMSG_WARM_RESET 0xF0
-#define IPCMSG_COLD_RESET 0xF1
-#define IPCMSG_SOFT_RESET 0xF2
-#define IPCMSG_COLD_BOOT 0xF3
-
-#define IPCMSG_VRTC 0xFA /* Set vRTC device */
- /* Command id associated with message IPCMSG_VRTC */
- #define IPC_CMD_VRTC_SETTIME 1 /* Set time */
- #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */
-
-/* Read single register */
-int intel_scu_ipc_ioread8(u16 addr, u8 *data);
-
-/* Read two sequential registers */
-int intel_scu_ipc_ioread16(u16 addr, u16 *data);
-
-/* Read four sequential registers */
-int intel_scu_ipc_ioread32(u16 addr, u32 *data);
-
-/* Read a vector */
-int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
-
-/* Write single register */
-int intel_scu_ipc_iowrite8(u16 addr, u8 data);
-
-/* Write two sequential registers */
-int intel_scu_ipc_iowrite16(u16 addr, u16 data);
-
-/* Write four sequential registers */
-int intel_scu_ipc_iowrite32(u16 addr, u32 data);
-
-/* Write a vector */
-int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
-
-/* Update single register based on the mask */
-int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
-
-/* Issue commands to the SCU with or without data */
-int intel_scu_ipc_simple_command(int cmd, int sub);
-int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
- u32 *out, int outlen);
-int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen,
- u32 *out, int outlen, u32 dptr, u32 sptr);
-
-/* I2C control api */
-int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
-
-/* Update FW version */
-int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
-
-extern struct blocking_notifier_head intel_scu_notifier;
-
-static inline void intel_scu_notifier_add(struct notifier_block *nb)
-{
- blocking_notifier_chain_register(&intel_scu_notifier, nb);
-}
-
-static inline void intel_scu_notifier_remove(struct notifier_block *nb)
-{
- blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
-}
-
-static inline int intel_scu_notifier_post(unsigned long v, void *p)
-{
- return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
-}
-
-#define SCU_AVAILABLE 1
-#define SCU_DOWN 2
-
-#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 85029b58d0cd..944637a4e6de 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -1,17 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Intel SOC Telemetry Driver Header File
* Copyright (C) 2015, Intel Corporation.
* All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#ifndef INTEL_TELEMETRY_H
#define INTEL_TELEMETRY_H
@@ -19,6 +10,8 @@
#define TELEM_MAX_EVENTS_SRAM 28
#define TELEM_MAX_OS_ALLOCATED_EVENTS 20
+#include <linux/platform_data/x86/intel_scu_ipc.h>
+
enum telemetry_unit {
TELEM_PSS = 0,
TELEM_IOSS,
@@ -49,13 +42,10 @@ struct telemetry_evtmap {
struct telemetry_unit_config {
struct telemetry_evtmap *telem_evts;
void __iomem *regmap;
- u32 ssram_base_addr;
u8 ssram_evts_used;
u8 curr_period;
u8 max_period;
u8 min_period;
- u32 ssram_size;
-
};
struct telemetry_plt_config {
@@ -63,22 +53,12 @@ struct telemetry_plt_config {
struct telemetry_unit_config ioss_config;
struct mutex telem_trace_lock;
struct mutex telem_lock;
+ struct intel_pmc_dev *pmc;
+ struct intel_scu_ipc_dev *scu;
bool telem_in_use;
};
struct telemetry_core_ops {
- int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period,
- u8 *ioss_min_period, u8 *ioss_max_period);
-
- int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig,
- struct telemetry_evtconfig *ioss_evtconfig,
- int pss_len, int ioss_len);
-
- int (*update_events)(struct telemetry_evtconfig pss_evtconfig,
- struct telemetry_evtconfig ioss_evtconfig);
-
- int (*set_sampling_period)(u8 pss_period, u8 ioss_period);
-
int (*get_trace_verbosity)(enum telemetry_unit telem_unit,
u32 *verbosity);
@@ -92,11 +72,6 @@ struct telemetry_core_ops {
int (*read_eventlog)(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog,
int len, int log_all_evts);
-
- int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts,
- u32 *pss_evtmap, u32 *ioss_evtmap);
-
- int (*reset_events)(void);
};
int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
@@ -104,40 +79,20 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
int telemetry_clear_pltdata(void);
-int telemetry_pltconfig_valid(void);
+struct telemetry_plt_config *telemetry_get_pltdata(void);
int telemetry_get_evtname(enum telemetry_unit telem_unit,
const char **name, int len);
-int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig,
- struct telemetry_evtconfig ioss_evtconfig);
-
-int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts,
- u32 *pss_evtmap, u32 *ioss_evtmap);
-
-int telemetry_reset_events(void);
-
-int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config,
- struct telemetry_evtconfig *ioss_config,
- int pss_len, int ioss_len);
-
int telemetry_read_events(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
-int telemetry_raw_read_events(enum telemetry_unit telem_unit,
- struct telemetry_evtlog *evtlog, int len);
-
int telemetry_read_eventlog(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
-int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period,
- u8 *ioss_min_period, u8 *ioss_max_period);
-
-int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period);
-
int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit,
u32 verbosity);
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
index 989cfa86de85..734482afbf81 100644
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
* stale TLB entries and, especially if we're flushing global
* mappings, we don't want the compiler to reorder any subsequent
* memory accesses before the TLB flush.
- *
- * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
- * invpcid (%rcx), %rax in long mode.
*/
- asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
- : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+ asm volatile("invpcid %[desc], %[type]"
+ :: [desc] "m" (desc), [type] "r" (type) : "memory");
}
#define INVPCID_TYPE_INDIV_ADDR 0
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 686247db3106..ca309a3227c7 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -35,14 +35,14 @@
* - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#define ARCH_HAS_IOREMAP_WC
-#define ARCH_HAS_IOREMAP_WT
-
#include <linux/string.h>
#include <linux/compiler.h>
+#include <linux/cc_platform.h>
#include <asm/page.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
+#include <asm/special_insns.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -90,8 +90,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define __raw_writew __writew
#define __raw_writel __writel
-#define mmiowb() barrier()
-
#ifdef CONFIG_X86_64
build_mmio_read(readq, "q", u64, "=r", :"memory")
@@ -154,46 +152,32 @@ static inline void *phys_to_virt(phys_addr_t address)
#define phys_to_virt phys_to_virt
/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
- * promitions in legacy drivers.
+ * promotions in legacy drivers.
*/
static inline unsigned int isa_virt_to_bus(volatile void *address)
{
return (unsigned int)virt_to_phys(address);
}
-#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page))
#define isa_bus_to_virt phys_to_virt
/*
- * However PCI ones are not necessarily 1:1 and therefore these interfaces
- * are forbidden in portable PCI drivers.
- *
- * Allow them on x86 for legacy drivers, though.
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-/*
* The default ioremap() behavior is non-cached; if you need something
* else, you probably want one of the following.
*/
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-#define ioremap_nocache ioremap_nocache
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
#define ioremap_cache ioremap_cache
-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot);
#define ioremap_prot ioremap_prot
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
@@ -208,17 +192,12 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long
* If the area you are trying to map is a PCI BAR you should have a
* look at pci_iomap().
*/
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
- return ioremap_nocache(offset, size);
-}
+void __iomem *ioremap(resource_size_t offset, unsigned long size);
#define ioremap ioremap
extern void iounmap(volatile void __iomem *addr);
#define iounmap iounmap
-extern void set_iounmap_nonlazy(void);
-
#ifdef __KERNEL__
void memcpy_fromio(void *, const volatile void __iomem *, size_t);
@@ -229,7 +208,22 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
-#include <asm-generic/iomap.h>
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ asm volatile("rep movsl"
+ : "=&c"(count), "=&D"(to), "=&S"(from)
+ : "0"(count), "1"(to), "2"(from)
+ : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
@@ -264,88 +258,57 @@ static inline void slow_down_io(void)
#endif
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-#include <linux/jump_label.h>
-
-extern struct static_key_false sev_enable_key;
-static inline bool sev_key_active(void)
-{
- return static_branch_unlikely(&sev_enable_key);
-}
-
-#else /* !CONFIG_AMD_MEM_ENCRYPT */
-
-static inline bool sev_key_active(void) { return false; }
-
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
-
-#define BUILDIO(bwl, bw, type) \
-static inline void out##bwl(unsigned type value, int port) \
-{ \
- asm volatile("out" #bwl " %" #bw "0, %w1" \
- : : "a"(value), "Nd"(port)); \
-} \
- \
-static inline unsigned type in##bwl(int port) \
-{ \
- unsigned type value; \
- asm volatile("in" #bwl " %w1, %" #bw "0" \
- : "=a"(value) : "Nd"(port)); \
- return value; \
-} \
- \
-static inline void out##bwl##_p(unsigned type value, int port) \
+#define BUILDIO(bwl, type) \
+static inline void out##bwl##_p(type value, u16 port) \
{ \
out##bwl(value, port); \
slow_down_io(); \
} \
\
-static inline unsigned type in##bwl##_p(int port) \
+static inline type in##bwl##_p(u16 port) \
{ \
- unsigned type value = in##bwl(port); \
+ type value = in##bwl(port); \
slow_down_io(); \
return value; \
} \
\
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
- unsigned type *value = (unsigned type *)addr; \
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
+ type *value = (type *)addr; \
while (count) { \
out##bwl(*value, port); \
value++; \
count--; \
} \
} else { \
- asm volatile("rep; outs" #bwl \
+ asm volatile("rep outs" #bwl \
: "+S"(addr), "+c"(count) \
: "d"(port) : "memory"); \
} \
} \
\
-static inline void ins##bwl(int port, void *addr, unsigned long count) \
+static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
- unsigned type *value = (unsigned type *)addr; \
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
+ type *value = (type *)addr; \
while (count) { \
*value = in##bwl(port); \
value++; \
count--; \
} \
} else { \
- asm volatile("rep; ins" #bwl \
+ asm volatile("rep ins" #bwl \
: "+D"(addr), "+c"(count) \
: "d"(port) : "memory"); \
} \
}
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, u8)
+BUILDIO(w, u16)
+BUILDIO(l, u32)
+#undef BUILDIO
-#define inb inb
-#define inw inw
-#define inl inl
#define inb_p inb_p
#define inw_p inw_p
#define inl_p inl_p
@@ -353,9 +316,6 @@ BUILDIO(l, , int)
#define insw insw
#define insl insl
-#define outb outb
-#define outw outw
-#define outl outl
#define outb_p outb_p
#define outw_p outw_p
#define outl_p outl_p
@@ -399,6 +359,7 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
extern bool arch_memremap_can_ram_remap(resource_size_t offset,
unsigned long size,
unsigned long flags);
@@ -406,5 +367,37 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset,
extern bool phys_mem_access_encrypted(unsigned long phys_addr,
unsigned long size);
+#else
+static inline bool phys_mem_access_encrypted(unsigned long phys_addr,
+ unsigned long size)
+{
+ return true;
+}
+#endif
+
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
+ size_t count)
+{
+ const u8 *from = src;
+ const u8 *end = from + count * 64;
+
+ while (from < end) {
+ movdir64b_io(dst, from);
+ from += 64;
+ }
+}
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index fd20a2334885..0d806513c4b3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -13,15 +13,6 @@
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
*/
-/* I/O Unit Redirection Table */
-#define IO_APIC_REDIR_VECTOR_MASK 0x000FF
-#define IO_APIC_REDIR_DEST_LOGICAL 0x00800
-#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
-#define IO_APIC_REDIR_SEND_PENDING (1 << 12)
-#define IO_APIC_REDIR_REMOTE_IRR (1 << 14)
-#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
-#define IO_APIC_REDIR_MASKED (1 << 16)
-
/*
* The structure of the IO-APIC:
*/
@@ -65,53 +56,40 @@ union IO_APIC_reg_03 {
};
struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- __u32 __reserved_3 : 24,
- dest : 8;
-} __attribute__ ((packed));
-
-struct IR_IO_APIC_route_entry {
- __u64 vector : 8,
- zero : 3,
- index2 : 1,
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1,
- mask : 1,
- reserved : 31,
- format : 1,
- index : 15;
+ union {
+ struct {
+ u64 vector : 8,
+ delivery_mode : 3,
+ dest_mode_logical : 1,
+ delivery_status : 1,
+ active_low : 1,
+ irr : 1,
+ is_level : 1,
+ masked : 1,
+ reserved_0 : 15,
+ reserved_1 : 17,
+ virt_destid_8_14 : 7,
+ destid_0_7 : 8;
+ };
+ struct {
+ u64 ir_shared_0 : 8,
+ ir_zero : 3,
+ ir_index_15 : 1,
+ ir_shared_1 : 5,
+ ir_reserved_0 : 31,
+ ir_format : 1,
+ ir_index_0_14 : 15;
+ };
+ struct {
+ u64 w1 : 32,
+ w2 : 32;
+ };
+ };
} __attribute__ ((packed));
struct irq_alloc_info;
struct ioapic_domain_cfg;
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
-#define IOAPIC_MASKED 1
-#define IOAPIC_UNMASKED 0
-
-#define IOAPIC_POL_HIGH 0
-#define IOAPIC_POL_LOW 1
-
-#define IOAPIC_DEST_MODE_PHYSICAL 0
-#define IOAPIC_DEST_MODE_LOGICAL 1
-
#define IOAPIC_MAP_ALLOC 0x1
#define IOAPIC_MAP_CHECK 0x2
@@ -131,8 +109,8 @@ extern int mp_irq_entries;
/* MP IRQ source entries */
extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
+/* True if "noapic" boot option passed */
+extern bool ioapic_is_disabled;
/* 1 if "noapic" boot option passed */
extern int noioapicquirk;
@@ -151,7 +129,7 @@ extern unsigned long io_apic_irqs;
* assignment of PCI IRQ's.
*/
#define io_apic_assign_pci_irqs \
- (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
+ (mp_irq_entries && !ioapic_is_disabled && io_apic_irqs)
struct irq_cfg;
extern void ioapic_insert_resources(void);
@@ -162,7 +140,6 @@ extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
-extern void setup_ioapic_ids_from_mpc_nocheck(void);
extern int mp_find_ioapic(u32 gsi);
extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
@@ -201,6 +178,7 @@ extern void print_IO_APICs(void);
#define IO_APIC_IRQ(x) 0
#define io_apic_assign_pci_irqs 0
#define setup_ioapic_ids_from_mpc x86_init_noop
+#define nr_ioapics (0)
static inline void ioapic_insert_resources(void) { }
static inline int arch_early_ioapic_init(void) { return 0; }
static inline void print_IO_APICs(void) {}
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..7f080f5c7def
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+ u64 sequence;
+ refcount_t refcnt;
+ /* The maximum number of bytes to copy so all zero bits are covered */
+ unsigned int max;
+ unsigned long bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(struct task_struct *tsk);
+
+static inline void native_tss_invalidate_io_bitmap(void)
+{
+ /*
+ * Invalidate the I/O bitmap by moving io_bitmap_base outside the
+ * TSS limit so any subsequent I/O access from user space will
+ * trigger a #GP.
+ *
+ * This is correct even when VMEXIT rewrites the TSS limit
+ * to 0x67 as the only requirement is that the base points
+ * outside the limit.
+ */
+ this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base,
+ IO_BITMAP_OFFSET_INVALID);
+}
+
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap
+#endif
+
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(struct task_struct *tsk) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 363e33eb6ec1..e2de092fc38c 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,41 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_IOMAP_H
#define _ASM_X86_IOMAP_H
/*
* Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <linux/highmem.h>
#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
-void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
+void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
-void
-iounmap_atomic(void __iomem *kvaddr);
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
-int
-iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
-
-void
-iomap_free(resource_size_t base, unsigned long size);
+void iomap_free(resource_size_t base, unsigned long size);
#endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..3be2451e7bc8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,11 +2,39 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
+#include <linux/acpi.h>
+
+#include <asm/e820/api.h>
+
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int iommu_pass_through;
+extern int iommu_merge;
+extern int panic_on_overflow;
+extern bool amd_iommu_snp_en;
+
+#ifdef CONFIG_SWIOTLB
+extern bool x86_swiotlb_enable;
+#else
+#define x86_swiotlb_enable false
+#endif
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+static inline int __init
+arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+ u64 start = rmrr->base_address;
+ u64 end = rmrr->end_address + 1;
+ int entry_type;
+
+ entry_type = e820__get_entry_type(start, end);
+ if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS)
+ return 0;
+
+ pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+ start, end - 1);
+ return -EINVAL;
+}
+
#endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
deleted file mode 100644
index 1fb3fd1a83c2..000000000000
--- a/arch/x86/include/asm/iommu_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IOMMU_TABLE_H
-#define _ASM_X86_IOMMU_TABLE_H
-
-#include <asm/swiotlb.h>
-
-/*
- * History lesson:
- * The execution chain of IOMMUs in 2.6.36 looks as so:
- *
- * [xen-swiotlb]
- * |
- * +----[swiotlb *]--+
- * / | \
- * / | \
- * [GART] [Calgary] [Intel VT-d]
- * /
- * /
- * [AMD-Vi]
- *
- * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
- * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
- * Also it would surreptitiously initialize set the swiotlb=1 if there were
- * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
- * flag would be turned off by all IOMMUs except the Calgary one.
- *
- * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
- * to be built by defining who we depend on.
- *
- * And all that needs to be done is to use one of the macros in the IOMMU
- * and the pci-dma.c will take care of the rest.
- */
-
-struct iommu_table_entry {
- initcall_t detect;
- initcall_t depend;
- void (*early_init)(void); /* No memory allocate available. */
- void (*late_init)(void); /* Yes, can allocate memory. */
-#define IOMMU_FINISH_IF_DETECTED (1<<0)
-#define IOMMU_DETECTED (1<<1)
- int flags;
-};
-/*
- * Macro fills out an entry in the .iommu_table that is equivalent
- * to the fields that 'struct iommu_table_entry' has. The entries
- * that are put in the .iommu_table section are not put in any order
- * hence during boot-time we will have to resort them based on
- * dependency. */
-
-
-#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
- static const struct iommu_table_entry \
- __iommu_entry_##_detect __used \
- __attribute__ ((unused, __section__(".iommu_table"), \
- aligned((sizeof(void *))))) \
- = {_detect, _depend, _early_init, _late_init, \
- _finish ? IOMMU_FINISH_IF_DETECTED : 0}
-/*
- * The simplest IOMMU definition. Provide the detection routine
- * and it will be run after the SWIOTLB and the other IOMMUs
- * that utilize this macro. If the IOMMU is detected (ie, the
- * detect routine returns a positive value), the other IOMMUs
- * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer
- * to stop detecting the other IOMMUs after yours has been detected.
- */
-#define IOMMU_INIT_POST(_detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0)
-
-#define IOMMU_INIT_POST_FINISH(detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1)
-
-/*
- * A more sophisticated version of IOMMU_INIT. This variant requires:
- * a). A detection routine function.
- * b). The name of the detection routine we depend on to get called
- * before us.
- * c). The init routine which gets called if the detection routine
- * returns a positive value from the pci_iommu_alloc. This means
- * no presence of a memory allocator.
- * d). Similar to the 'init', except that this gets called from pci_iommu_init
- * where we do have a memory allocator.
- *
- * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
- * in that the former will continue detecting other IOMMUs in the call
- * list after the detection routine returns a positive number, while the
- * latter will stop the execution chain upon first successful detection.
- * Both variants will still call the 'init' and 'late_init' functions if
- * they are set.
- */
-#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
- __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
-
-#define IOMMU_INIT(_detect, _depend, _init, _late_init) \
- __IOMMU_INIT(_detect, _depend, _init, _late_init, 0)
-
-void sort_iommu_table(struct iommu_table_entry *start,
- struct iommu_table_entry *finish);
-
-void check_iommu_entries(struct iommu_table_entry *start,
- struct iommu_table_entry *finish);
-
-#endif /* _ASM_X86_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 5270ff39b9af..8ace6559d399 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -39,6 +39,7 @@
#define BT_MBI_UNIT_PMC 0x04
#define BT_MBI_UNIT_GFX 0x06
#define BT_MBI_UNIT_SMI 0x0C
+#define BT_MBI_UNIT_CCK 0x14
#define BT_MBI_UNIT_USB 0x43
#define BT_MBI_UNIT_SATA 0xA3
#define BT_MBI_UNIT_PCIE 0xA6
@@ -110,7 +111,7 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
* This function will block all kernel access to the PMIC I2C bus, so that the
* P-Unit can safely access the PMIC over the shared I2C bus.
*
- * Note on these systems the i2c-bus driver will request a sempahore from the
+ * Note on these systems the i2c-bus driver will request a semaphore from the
* P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing
* it, but this does not appear to be sufficient, we still need to avoid making
* certain P-Unit requests during the access window to avoid problems.
@@ -167,13 +168,6 @@ void iosf_mbi_unblock_punit_i2c_access(void);
int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb);
/**
- * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier
- *
- * @nb: notifier_block to unregister
- */
-int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb);
-
-/**
* iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus
* notifier, unlocked
*
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
deleted file mode 100644
index a4fe16e42b7b..000000000000
--- a/arch/x86/include/asm/ipi.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _ASM_X86_IPI_H
-#define _ASM_X86_IPI_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC InterProcessor Interrupt code.
- *
- * Moved to include file by James Cleverdon from
- * arch/x86-64/kernel/smp.c
- *
- * Copyrights from kernel/smp.c:
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
- */
-
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
- unsigned int dest)
-{
- unsigned int icr = shortcut | dest;
-
- switch (vector) {
- default:
- icr |= APIC_DM_FIXED | vector;
- break;
- case NMI_VECTOR:
- icr |= APIC_DM_NMI;
- break;
- }
- return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __xapic_wait_icr_idle(void)
-{
- while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
-}
-
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
-
-extern void default_send_IPI_single(int cpu, int vector);
-extern void default_send_IPI_single_phys(int cpu, int vector);
-extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
- int vector);
-
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
-extern int no_broadcast;
-
-static inline void __default_local_send_IPI_allbutself(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
- else
- __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
-}
-
-static inline void __default_local_send_IPI_all(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- apic->send_IPI_mask(cpu_online_mask, vector);
- else
- __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
-}
-
-#ifdef CONFIG_X86_32
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_allbutself(int vector);
-extern void default_send_IPI_all(int vector);
-extern void default_send_IPI_self(int vector);
-#endif
-
-#endif
-
-#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index fbb16e6b6c18..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -11,46 +11,39 @@
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
+/*
+ * The irq entry code is in the noinstr section and the start/end of
+ * __irqentry_text is emitted via labels. Make the build fail if
+ * something moves a C function into the __irq_entry section.
+ */
+#define __irq_entry __invalid_section
+
static inline int irq_canonicalize(int irq)
{
return ((irq == 2) ? 9 : irq);
}
-#ifdef CONFIG_X86_32
-extern void irq_ctx_init(int cpu);
-#else
-# define irq_ctx_init(cpu) do { } while (0)
-#endif
-
-#define __ARCH_HAS_DO_SOFTIRQ
+extern int irq_init_percpu_irqstack(unsigned int cpu);
struct irq_desc;
extern void fixup_irqs(void);
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
-extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs);
#endif
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
-extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-
-extern __visible unsigned int do_IRQ(struct pt_regs *regs);
+extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern void init_ISA_irqs(void);
-extern void __init init_IRQ(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
- bool exclude_self);
+ int exclude_cpu);
-extern __visible void smp_x86_platform_ipi(struct pt_regs *regs);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
deleted file mode 100644
index 8f3bee821e6c..000000000000
--- a/arch/x86/include/asm/irq_regs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Per-cpu current frame pointer - the location of the last exception frame on
- * the stack, stored in the per-cpu area.
- *
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-#ifndef _ASM_X86_IRQ_REGS_H
-#define _ASM_X86_IRQ_REGS_H
-
-#include <asm/percpu.h>
-
-#define ARCH_HAS_OWN_IRQ_REGS
-
-DECLARE_PER_CPU(struct pt_regs *, irq_regs);
-
-static inline struct pt_regs *get_irq_regs(void)
-{
- return this_cpu_read(irq_regs);
-}
-
-static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
-{
- struct pt_regs *old_regs;
-
- old_regs = get_irq_regs();
- this_cpu_write(irq_regs, new_regs);
-
- return old_regs;
-}
-
-#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 67ed72f31cc2..5a0d42464d44 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* This header file contains the interface of the interrupt remapping code to
* the x86 interrupt management code.
*/
@@ -38,7 +26,22 @@ enum {
IRQ_REMAP_X2APIC_MODE,
};
-struct vcpu_data {
+/*
+ * This is mainly used to communicate information back-and-forth
+ * between SVM and IOMMU for setting up and tearing down posted
+ * interrupt
+ */
+struct amd_iommu_pi_data {
+ u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */
+ u32 ga_tag;
+ u32 vector; /* Guest vector of the interrupt */
+ int cpu;
+ bool ga_log_intr;
+ bool is_guest_mode;
+ void *ir_data;
+};
+
+struct intel_iommu_pi_data {
u64 pi_desc_addr; /* Physical address of PI Descriptor */
u32 vector; /* Guest vector of the interrupt */
};
@@ -56,21 +59,19 @@ extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
extern void panic_if_irq_remap(const char *msg);
-extern struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info);
-extern struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info);
-
-/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
-extern struct irq_domain *
-arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
-
/* Get parent irqdomain for interrupt remapping irqdomain */
static inline struct irq_domain *arch_get_ir_parent_domain(void)
{
return x86_vector_domain;
}
+extern bool enable_posted_msi;
+
+static inline bool posted_msi_supported(void)
+{
+ return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
#else /* CONFIG_IRQ_REMAP */
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
@@ -85,17 +86,5 @@ static inline void panic_if_irq_remap(const char *msg)
{
}
-static inline struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
-{
- return NULL;
-}
-
-static inline struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
- return NULL;
-}
-
#endif /* CONFIG_IRQ_REMAP */
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
new file mode 100644
index 000000000000..8325b79f2ac6
--- /dev/null
+++ b/arch/x86/include/asm/irq_stack.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IRQ_STACK_H
+#define _ASM_X86_IRQ_STACK_H
+
+#include <linux/ptrace.h>
+#include <linux/objtool.h>
+
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Macro to inline switching to an interrupt stack and invoking function
+ * calls from there. The following rules apply:
+ *
+ * - Ordering:
+ *
+ * 1. Write the stack pointer into the top most place of the irq
+ * stack. This ensures that the various unwinders can link back to the
+ * original stack.
+ *
+ * 2. Switch the stack pointer to the top of the irq stack.
+ *
+ * 3. Invoke whatever needs to be done (@asm_call argument)
+ *
+ * 4. Pop the original stack pointer from the top of the irq stack
+ * which brings it back to the original stack where it left off.
+ *
+ * - Function invocation:
+ *
+ * To allow flexible usage of the macro, the actual function code including
+ * the store of the arguments in the call ABI registers is handed in via
+ * the @asm_call argument.
+ *
+ * - Local variables:
+ *
+ * @tos:
+ * The @tos variable holds a pointer to the top of the irq stack and
+ * _must_ be allocated in a non-callee saved register as this is a
+ * restriction coming from objtool.
+ *
+ * Note, that (tos) is both in input and output constraints to ensure
+ * that the compiler does not assume that R11 is left untouched in
+ * case this macro is used in some place where the per cpu interrupt
+ * stack pointer is used again afterwards
+ *
+ * - Function arguments:
+ * The function argument(s), if any, have to be defined in register
+ * variables at the place where this is invoked. Storing the
+ * argument(s) in the proper register(s) is part of the @asm_call
+ *
+ * - Constraints:
+ *
+ * The constraints have to be done very carefully because the compiler
+ * does not know about the assembly call.
+ *
+ * output:
+ * As documented already above the @tos variable is required to be in
+ * the output constraints to make the compiler aware that R11 cannot be
+ * reused after the asm() statement.
+ *
+ * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is
+ * required as well as this prevents certain creative GCC variants from
+ * misplacing the ASM code.
+ *
+ * input:
+ * - func:
+ * Immediate, which tells the compiler that the function is referenced.
+ *
+ * - tos:
+ * Register. The actual register is defined by the variable declaration.
+ *
+ * - function arguments:
+ * The constraints are handed in via the 'argconstr' argument list. They
+ * describe the register arguments which are used in @asm_call.
+ *
+ * clobbers:
+ * Function calls can clobber anything except the callee-saved
+ * registers. Tell the compiler.
+ */
+#define call_on_stack(stack, func, asm_call, argconstr...) \
+{ \
+ register void *tos asm("r11"); \
+ \
+ tos = ((void *)(stack)); \
+ \
+ asm_inline volatile( \
+ "movq %%rsp, (%[tos]) \n" \
+ "movq %[tos], %%rsp \n" \
+ \
+ asm_call \
+ \
+ "popq %%rsp \n" \
+ \
+ : "+r" (tos), ASM_CALL_CONSTRAINT \
+ : [__func] "i" (func), [tos] "r" (tos) argconstr \
+ : "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", \
+ "memory" \
+ ); \
+}
+
+#define ASM_CALL_ARG0 \
+ "1: call %c[__func] \n" \
+ ANNOTATE_REACHABLE(1b) " \n"
+
+#define ASM_CALL_ARG1 \
+ "movq %[arg1], %%rdi \n" \
+ ASM_CALL_ARG0
+
+#define ASM_CALL_ARG2 \
+ "movq %[arg2], %%rsi \n" \
+ ASM_CALL_ARG1
+
+#define ASM_CALL_ARG3 \
+ "movq %[arg3], %%rdx \n" \
+ ASM_CALL_ARG2
+
+#define call_on_irqstack(func, asm_call, argconstr...) \
+ call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
+ func, asm_call, argconstr)
+
+/* Macros to assert type correctness for run_*_on_irqstack macros */
+#define assert_function_type(func, proto) \
+ static_assert(__builtin_types_compatible_p(typeof(&func), proto))
+
+#define assert_arg_type(arg, proto) \
+ static_assert(__builtin_types_compatible_p(typeof(arg), proto))
+
+/*
+ * Macro to invoke system vector and device interrupt C handlers.
+ */
+#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...) \
+{ \
+ /* \
+ * User mode entry and interrupt on the irq stack do not \
+ * switch stacks. If from user mode the task stack is empty. \
+ */ \
+ if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \
+ irq_enter_rcu(); \
+ func(c_args); \
+ irq_exit_rcu(); \
+ } else { \
+ /* \
+ * Mark the irq stack inuse _before_ and unmark _after_ \
+ * switching stacks. Interrupts are disabled in both \
+ * places. Invoke the stack switch macro with the call \
+ * sequence which matches the above direct invocation. \
+ */ \
+ __this_cpu_write(hardirq_stack_inuse, true); \
+ call_on_irqstack(func, asm_call, constr); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
+ } \
+}
+
+/*
+ * Function call sequence for __call_on_irqstack() for system vectors.
+ *
+ * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input
+ * mechanism because these functions are global and cannot be optimized out
+ * when compiling a particular source file which uses one of these macros.
+ *
+ * The argument (regs) does not need to be pushed or stashed in a callee
+ * saved register to be safe vs. the irq_enter_rcu() call because the
+ * clobbers already prevent the compiler from storing it in a callee
+ * clobbered register. As the compiler has to preserve @regs for the final
+ * call to idtentry_exit() anyway, it's likely that it does not cause extra
+ * effort for this asm magic.
+ */
+#define ASM_CALL_SYSVEC \
+ "call irq_enter_rcu \n" \
+ ASM_CALL_ARG1 \
+ "call irq_exit_rcu \n"
+
+#define SYSVEC_CONSTRAINTS , [arg1] "r" (regs)
+
+#define run_sysvec_on_irqstack_cond(func, regs) \
+{ \
+ assert_function_type(func, void (*)(struct pt_regs *)); \
+ assert_arg_type(regs, struct pt_regs *); \
+ \
+ call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC, \
+ SYSVEC_CONSTRAINTS, regs); \
+}
+
+/*
+ * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store
+ * @regs and @vector in callee saved registers.
+ */
+#define ASM_CALL_IRQ \
+ "call irq_enter_rcu \n" \
+ ASM_CALL_ARG2 \
+ "call irq_exit_rcu \n"
+
+#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector)
+
+#define run_irq_on_irqstack_cond(func, regs, vector) \
+{ \
+ assert_function_type(func, void (*)(struct pt_regs *, u32)); \
+ assert_arg_type(regs, struct pt_regs *); \
+ assert_arg_type(vector, u32); \
+ \
+ call_on_irqstack_cond(func, regs, ASM_CALL_IRQ, \
+ IRQ_CONSTRAINTS, regs, vector); \
+}
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+/*
+ * Macro to invoke __do_softirq on the irq stack. This is only called from
+ * task context when bottom halves are about to be reenabled and soft
+ * interrupts are pending to be processed. The interrupt stack cannot be in
+ * use here.
+ */
+#define do_softirq_own_stack() \
+{ \
+ __this_cpu_write(hardirq_stack_inuse, true); \
+ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
+}
+
+#endif
+
+#else /* CONFIG_X86_64 */
+/* System vector handlers always run on the stack they interrupted. */
+#define run_sysvec_on_irqstack_cond(func, regs) \
+{ \
+ irq_enter_rcu(); \
+ func(regs); \
+ irq_exit_rcu(); \
+}
+
+/* Switches to the irq stack within func() */
+#define run_irq_on_irqstack_cond(func, regs, vector) \
+{ \
+ irq_enter_rcu(); \
+ func(regs, vector); \
+ irq_exit_rcu(); \
+}
+
+#endif /* !CONFIG_X86_64 */
+
+#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919..47051871b436 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,16 +18,16 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
- * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
* This file enumerates the exact layout of them:
*/
+/* This is used as an interrupt vector when programming the APIC. */
#define NMI_VECTOR 0x02
-#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
@@ -35,13 +35,6 @@
*/
#define FIRST_EXTERNAL_VECTOR 0x20
-/*
- * Reserve the lowest usable vector (and hence lowest priority) 0x20 for
- * triggering cleanup after irq migration. 0x21-0x2f will still be used
- * for device interrupts.
- */
-#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
-
#define IA32_SYSCALL_VECTOR 0x80
/*
@@ -84,18 +77,16 @@
*/
#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
-#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
@@ -106,14 +97,23 @@
#define LOCAL_TIMER_VECTOR 0xec
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb
+
#define NR_VECTORS 256
#ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
+#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR
#else
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
+#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
/*
* Size the maximum number of interrupts.
*
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 80b35e3adf03..6b4d36c95165 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -9,8 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return boot_cpu_has(X86_FEATURE_APIC);
}
-extern void arch_irq_work_raise(void);
-extern __visible void smp_irq_work_interrupt(struct pt_regs *regs);
#else
static inline bool arch_irq_work_has_interrupt(void)
{
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index c066ffae222b..30c325c235c0 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -7,11 +7,12 @@
#ifdef CONFIG_X86_LOCAL_APIC
enum {
- /* Allocate contiguous CPU vectors */
- X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1,
- X86_IRQ_ALLOC_LEGACY = 0x2,
+ X86_IRQ_ALLOC_LEGACY = 0x1,
};
+extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec);
+extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec);
+
extern struct irq_domain *x86_vector_domain;
extern void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -51,9 +52,13 @@ extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
#endif /* CONFIG_X86_IO_APIC */
#ifdef CONFIG_PCI_MSI
-extern void arch_init_msi_domain(struct irq_domain *domain);
+void x86_create_pci_msi_domain(void);
+struct irq_domain *native_create_pci_msi_domain(void);
+extern struct irq_domain *x86_pci_msi_default_domain;
#else
-static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+static inline void x86_create_pci_msi_domain(void) { }
+#define native_create_pci_msi_domain NULL
+#define x86_pci_msi_default_domain NULL
#endif
#endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..b30e5474c18e 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,10 +4,9 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
-/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
-#define __cpuidle __attribute__((__section__(".cpuidle.text")))
+#include <asm/nospec-branch.h>
/*
* Interrupt control:
@@ -15,7 +14,7 @@
/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
extern inline unsigned long native_save_fl(void);
-extern inline unsigned long native_save_fl(void)
+extern __always_inline unsigned long native_save_fl(void)
{
unsigned long flags;
@@ -33,68 +32,57 @@ extern inline unsigned long native_save_fl(void)
return flags;
}
-extern inline void native_restore_fl(unsigned long flags);
-extern inline void native_restore_fl(unsigned long flags)
-{
- asm volatile("push %0 ; popf"
- : /* no output */
- :"g" (flags)
- :"memory", "cc");
-}
-
-static inline void native_irq_disable(void)
+static __always_inline void native_irq_disable(void)
{
asm volatile("cli": : :"memory");
}
-static inline void native_irq_enable(void)
+static __always_inline void native_irq_enable(void)
{
asm volatile("sti": : :"memory");
}
-static inline __cpuidle void native_safe_halt(void)
+static __always_inline void native_safe_halt(void)
{
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
-static inline __cpuidle void native_halt(void)
+static __always_inline void native_halt(void)
{
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
-#endif
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline int native_irqs_disabled_flags(unsigned long flags)
{
- return native_save_fl();
+ return !(flags & X86_EFLAGS_IF);
}
-static inline notrace void arch_local_irq_restore(unsigned long flags)
+static __always_inline unsigned long native_local_irq_save(void)
{
- native_restore_fl(flags);
-}
+ unsigned long flags = native_save_fl();
-static inline notrace void arch_local_irq_disable(void)
-{
native_irq_disable();
+
+ return flags;
}
-static inline notrace void arch_local_irq_enable(void)
+static __always_inline void native_local_irq_restore(unsigned long flags)
{
- native_irq_enable();
+ if (!native_irqs_disabled_flags(flags))
+ native_irq_enable();
}
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#ifndef __ASSEMBLY__
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
-static inline __cpuidle void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
{
native_safe_halt();
}
@@ -103,15 +91,38 @@ static inline __cpuidle void arch_safe_halt(void)
* Used when interrupts are already enabled or to
* shutdown the processor:
*/
-static inline __cpuidle void halt(void)
+static __always_inline void halt(void)
{
native_halt();
}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+static __always_inline unsigned long arch_local_save_flags(void)
+{
+ return native_save_fl();
+}
+
+static __always_inline void arch_local_irq_disable(void)
+{
+ native_irq_disable();
+}
+
+static __always_inline void arch_local_irq_enable(void)
+{
+ native_irq_enable();
+}
/*
* For spinlocks, etc:
*/
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
arch_local_irq_disable();
@@ -119,87 +130,34 @@ static inline notrace unsigned long arch_local_irq_save(void)
}
#else
-#define ENABLE_INTERRUPTS(x) sti
-#define DISABLE_INTERRUPTS(x) cli
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x) pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
#endif
-#define SWAPGS swapgs
-/*
- * Currently paravirt can't handle swapgs nicely when we
- * don't have a stack we can rely on (such as a user space
- * stack). So we either find a way around these or just fault
- * and emulate if a guest tries to call swapgs directly.
- *
- * Either way, this is a good way to document that we don't
- * have a reliable stack. x86_64 only.
- */
-#define SWAPGS_UNSAFE_STACK swapgs
-
-#define INTERRUPT_RETURN jmp native_iret
-#define USERGS_SYSRET64 \
- swapgs; \
- sysretq;
-#define USERGS_SYSRET32 \
- swapgs; \
- sysretl
-
-#else
-#define INTERRUPT_RETURN iret
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PARAVIRT_XXL */
-#ifndef __ASSEMBLY__
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+#ifndef __ASSEMBLER__
+static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
{
return !(flags & X86_EFLAGS_IF);
}
-static inline int arch_irqs_disabled(void)
+static __always_inline int arch_irqs_disabled(void)
{
unsigned long flags = arch_local_save_flags();
return arch_irqs_disabled_flags(flags);
}
-#endif /* !__ASSEMBLY__ */
-#ifdef __ASSEMBLY__
-#ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
-# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
-#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_X86_64
-# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
-# define LOCKDEP_SYS_EXIT_IRQ \
- TRACE_IRQS_ON; \
- sti; \
- call lockdep_sys_exit_thunk; \
- cli; \
- TRACE_IRQS_OFF;
-# else
-# define LOCKDEP_SYS_EXIT \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call lockdep_sys_exit; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-# define LOCKDEP_SYS_EXIT_IRQ
-# endif
-#else
-# define LOCKDEP_SYS_EXIT
-# define LOCKDEP_SYS_EXIT_IRQ
-#endif
-#endif /* __ASSEMBLY__ */
+static __always_inline void arch_local_irq_restore(unsigned long flags)
+{
+ if (!arch_irqs_disabled_flags(flags))
+ arch_local_irq_enable();
+}
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h
index c9803f1a2033..7ede2731dc92 100644
--- a/arch/x86/include/asm/ist.h
+++ b/arch/x86/include/asm/ist.h
@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Include file for the interface to IST BIOS
* Copyright 2002 Andy Grover <andrew.grover@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#ifndef _ASM_X86_IST_H
#define _ASM_X86_IST_H
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..05b16299588d 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,31 +2,39 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#define JUMP_LABEL_NOP_SIZE 5
-
-#ifdef CONFIG_X86_64
-# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
-#else
-# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
-#endif
+#define HAVE_JUMP_LABEL_BATCH
#include <asm/asm.h>
#include <asm/nops.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
#include <linux/types.h>
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+#define JUMP_TABLE_ENTRY(key, label) \
+ ".pushsection __jump_table, \"aw\" \n\t" \
+ _ASM_ALIGN "\n\t" \
+ ANNOTATE_DATA_SPECIAL "\n" \
+ ".long 1b - . \n\t" \
+ ".long " label " - . \n\t" \
+ _ASM_PTR " " key " - . \n\t" \
+ ".popsection \n\t"
+
+/* This macro is also expanded on the Rust side. */
+#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: jmp " label " # objtool NOPs this \n\t" \
+ JUMP_TABLE_ENTRY(key " + 2", label)
+#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: .byte " __stringify(BYTES_NOP5) "\n\t" \
+ JUMP_TABLE_ENTRY(key, label)
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
+
+static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
- ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ asm goto(ARCH_STATIC_BRANCH_ASM("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -34,16 +42,11 @@ l_yes:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
- ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
- "2:\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ asm goto("1:"
+ "jmp %l[l_yes]\n\t"
+ JUMP_TABLE_ENTRY("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -51,42 +54,8 @@ l_yes:
return true;
}
-#else /* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
- .if \def
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .else
- .byte STATIC_KEY_INIT_NOP
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key - .
- .popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
- .if \def
- .byte STATIC_KEY_INIT_NOP
- .else
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key + 1 - .
- .popsection
-.endm
+extern int arch_jump_entry_size(struct jump_entry *entry);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 13e70da38bed..d7e33c7f096b 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -23,14 +23,17 @@
(1ULL << (__VIRTUAL_MASK_SHIFT - \
KASAN_SHADOW_SCALE_SHIFT)))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_KASAN
void __init kasan_early_init(void);
void __init kasan_init(void);
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
#else
static inline void kasan_early_init(void) { }
static inline void kasan_init(void) { }
+static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
+ int nid) { }
#endif
#endif
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index db7ba2feb947..0648190467ba 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose);
#ifdef CONFIG_RANDOMIZE_MEMORY
void kernel_randomize_memory(void);
+void init_trampoline_kaslr(void);
#else
static inline void kernel_randomize_memory(void) { }
+static inline void init_trampoline_kaslr(void) {}
#endif /* CONFIG_RANDOMIZE_MEMORY */
#endif
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..d1514e70477b 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,10 +33,12 @@ enum show_regs_mode {
};
extern void die(const char *, struct pt_regs *,long);
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
-extern void show_iret_regs(struct pt_regs *regs);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode,
+ const char *log_lvl);
+extern void show_iret_regs(struct pt_regs *regs, const char *log_lvl);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5cfb27f26583 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,23 +9,30 @@
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#else
-# define PA_CONTROL_PAGE 0
-# define VA_CONTROL_PAGE 1
-# define PA_TABLE_PAGE 2
-# define PA_SWAP_PAGE 3
-# define PAGES_NR 4
+/* Size of each exception handler referenced by the IDT */
+# define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */
+#endif
+
+#ifdef CONFIG_X86_64
+
+#include <linux/bits.h>
+
+#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0)
+#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1)
+
#endif
+# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/string.h>
#include <linux/kernel.h>
+#include <asm/asm.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/bootparam.h>
struct kimage;
@@ -44,7 +51,6 @@ struct kimage;
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-# define KEXEC_CONTROL_PAGE_SIZE 4096
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
@@ -59,32 +65,17 @@ struct kimage;
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
-/* Allocate one page for the pdp and the second for the code */
-# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
-
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64
-#endif
-/* Memory to backup during crash kdump */
-#define KEXEC_BACKUP_SRC_START (0UL)
-#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
-
-/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
- newregs->sp = (unsigned long)&(oldregs->sp);
- asm volatile("xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->ss));
+extern unsigned long kexec_va_control_page;
+extern unsigned long kexec_pa_table_page;
+extern unsigned long kexec_pa_swap_page;
+extern gate_desc kexec_debug_idt[];
+extern unsigned char kexec_debug_exc_vectors[];
+extern uint16_t kexec_debug_8250_port;
+extern unsigned long kexec_debug_8250_mmio32;
#endif
-}
/*
* This function is responsible for capturing register states if coming
@@ -96,63 +87,52 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
- crash_fixup_ss_esp(newregs, oldregs);
} else {
+ asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx));
+ asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx));
+ asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx));
+ asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si));
+ asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di));
+ asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp));
+ asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax));
+ asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp));
+#ifdef CONFIG_X86_64
+ asm volatile("mov %%r8,%0" : "=m"(newregs->r8));
+ asm volatile("mov %%r9,%0" : "=m"(newregs->r9));
+ asm volatile("mov %%r10,%0" : "=m"(newregs->r10));
+ asm volatile("mov %%r11,%0" : "=m"(newregs->r11));
+ asm volatile("mov %%r12,%0" : "=m"(newregs->r12));
+ asm volatile("mov %%r13,%0" : "=m"(newregs->r13));
+ asm volatile("mov %%r14,%0" : "=m"(newregs->r14));
+ asm volatile("mov %%r15,%0" : "=m"(newregs->r15));
+#endif
+ asm volatile("mov %%ss,%k0" : "=a"(newregs->ss));
+ asm volatile("mov %%cs,%k0" : "=a"(newregs->cs));
#ifdef CONFIG_X86_32
- asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
- asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
- asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
- asm volatile("movl %%esi,%0" : "=m"(newregs->si));
- asm volatile("movl %%edi,%0" : "=m"(newregs->di));
- asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
- asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
- asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
- asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
- asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
-#else
- asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
- asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
- asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
- asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
- asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
- asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
- asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
- asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
- asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
- asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
- asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
- asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
- asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
- asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
- asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
- asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
+ asm volatile("mov %%ds,%k0" : "=a"(newregs->ds));
+ asm volatile("mov %%es,%k0" : "=a"(newregs->es));
#endif
+ asm volatile("pushf\n\t"
+ "pop %0" : "=m"(newregs->flags));
newregs->ip = _THIS_IP_;
}
}
#ifdef CONFIG_X86_32
-asmlinkage unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long control_page,
- unsigned long start_address,
- unsigned int has_pae,
- unsigned int preserve_context);
+typedef asmlinkage unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
#else
-unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long page_list,
- unsigned long start_address,
- unsigned int preserve_context,
- unsigned int sme_active);
+typedef unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long pa_control_page,
+ unsigned long start_address,
+ unsigned int flags);
#endif
-
+extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH
#ifdef CONFIG_X86_32
@@ -167,21 +147,23 @@ struct kimage_arch {
};
#else
struct kimage_arch {
+ /*
+ * This is a kimage control page, as it must not overlap with either
+ * source or destination address ranges.
+ */
+ pgd_t *pgd;
+ /*
+ * The virtual mapping of the control code page itself is used only
+ * during the transition, while the current kernel's pages are all
+ * in place. Thus the intermediate page table pages used to map it
+ * are not control pages, but instead just normal pages obtained
+ * with get_zeroed_page(). And have to be tracked (below) so that
+ * they can be freed.
+ */
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- /* Details of backup region */
- unsigned long backup_src_start;
- unsigned long backup_src_sz;
-
- /* Physical address of backup segment */
- unsigned long backup_load_addr;
-
- /* Core ELF header buffer */
- void *elf_headers;
- unsigned long elf_headers_sz;
- unsigned long elf_load_addr;
};
#endif /* CONFIG_X86_32 */
@@ -218,12 +200,38 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+#endif
#endif
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_CRASH_HOTPLUG
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
+#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
+#endif
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
new file mode 100644
index 000000000000..ff5c7134a37a
--- /dev/null
+++ b/arch/x86/include/asm/kfence.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _ASM_X86_KFENCE_H
+#define _ASM_X86_KFENCE_H
+
+#ifndef MODULE
+
+#include <linux/bug.h>
+#include <linux/kfence.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/set_memory.h>
+#include <asm/tlbflush.h>
+
+/* Force 4K pages for __kfence_pool. */
+static inline bool arch_kfence_init_pool(void)
+{
+ unsigned long addr;
+
+ for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+ addr += PAGE_SIZE) {
+ unsigned int level;
+
+ if (!lookup_address(addr, &level))
+ return false;
+
+ if (level != PG_LEVEL_4K)
+ set_memory_4k(addr, 1);
+ }
+
+ return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ unsigned int level;
+ pte_t *pte = lookup_address(addr, &level);
+
+ if (WARN_ON(!pte || level != PG_LEVEL_4K))
+ return false;
+
+ /*
+ * We need to avoid IPIs, as we may get KFENCE allocations or faults
+ * with interrupts disabled. Therefore, the below is best-effort, and
+ * does not flush TLBs on all CPUs. We can tolerate some inaccuracy;
+ * lazy fault handling takes care of faults after the page is PRESENT.
+ */
+
+ if (protect)
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ else
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+
+ /*
+ * Flush this CPU's TLB, assuming whoever did the allocation/free is
+ * likely to continue running on this CPU.
+ */
+ preempt_disable();
+ flush_tlb_one_kernel(addr);
+ preempt_enable();
+ return true;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KFENCE_H */
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h
deleted file mode 100644
index 04ab8266e347..000000000000
--- a/arch/x86/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
-
-#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
-#define __WITH_KM_FENCE
-#endif
-
-#include <asm-generic/kmap_types.h>
-
-#undef __WITH_KM_FENCE
-
-#endif /* _ASM_X86_KMAP_TYPES_H */
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
new file mode 100644
index 000000000000..d91b37f5b4bb
--- /dev/null
+++ b/arch/x86/include/asm/kmsan.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KMSAN support.
+ *
+ * Copyright (C) 2022, Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#ifndef _ASM_X86_KMSAN_H
+#define _ASM_X86_KMSAN_H
+
+#ifndef MODULE
+
+#include <asm/cpu_entry_area.h>
+#include <asm/processor.h>
+#include <linux/mmzone.h>
+
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
+
+/*
+ * Functions below are declared in the header to make sure they are inlined.
+ * They all are called from kmsan_get_metadata() for every memory access in
+ * the kernel, so speed is important here.
+ */
+
+/*
+ * Compute metadata addresses for the CPU entry area on x86.
+ */
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+ unsigned long addr64 = (unsigned long)addr;
+ char *metadata_array;
+ unsigned long off;
+ int cpu;
+
+ if ((addr64 < CPU_ENTRY_AREA_BASE) ||
+ (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
+ return NULL;
+ cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE;
+ off = addr64 - (unsigned long)get_cpu_entry_area(cpu);
+ if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
+ return NULL;
+ metadata_array = is_origin ? cpu_entry_area_origin :
+ cpu_entry_area_shadow;
+ return &per_cpu(metadata_array[off], cpu);
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
+ */
+static inline bool kmsan_phys_addr_valid(unsigned long addr)
+{
+ if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ return !(addr >> boot_cpu_data.x86_phys_bits);
+ else
+ return true;
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
+ */
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+ unsigned long x = (unsigned long)addr;
+ unsigned long y = x - __START_KERNEL_map;
+ bool ret;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ if (unlikely(x > y)) {
+ x = y + phys_base;
+
+ if (y >= KERNEL_IMAGE_SIZE)
+ return false;
+ } else {
+ x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+ /* carry flag will be set if starting x was >= PAGE_OFFSET */
+ if ((x > y) || !kmsan_phys_addr_valid(x))
+ return false;
+ }
+
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = pfn_valid(x >> PAGE_SHIFT);
+ preempt_enable_no_resched();
+
+ return ret;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KMSAN_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index c8cec1b39b88..5939694dfb28 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_KPROBES_H
#define _ASM_X86_KPROBES_H
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* See arch/x86/kernel/kprobes.c for x86 kprobes history.
@@ -24,12 +11,11 @@
#include <asm-generic/kprobes.h>
-#define BREAKPOINT_INSTRUCTION 0xcc
-
#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#include <asm/text-patching.h>
#include <asm/insn.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
@@ -38,10 +24,7 @@ struct pt_regs;
struct kprobe;
typedef u8 kprobe_opcode_t;
-#define RELATIVEJUMP_OPCODE 0xe9
-#define RELATIVEJUMP_SIZE 5
-#define RELATIVECALL_OPCODE 0xe8
-#define RELATIVE_ADDR_SIZE 4
+
#define MAX_STACK_SIZE 64
#define CUR_STACK_SIZE(ADDR) \
(current_top_of_stack() - (unsigned long)(ADDR))
@@ -53,40 +36,55 @@ typedef u8 kprobe_opcode_t;
/* optinsn template addresses */
extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_clac[];
extern __visible kprobe_opcode_t optprobe_template_val[];
extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
-#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
#define MAX_OPTINSN_SIZE \
(((unsigned long)optprobe_template_end - \
(unsigned long)optprobe_template_entry) + \
- MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+ MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
-asmlinkage void kretprobe_trampoline(void);
-
-extern void arch_kprobe_override_function(struct pt_regs *regs);
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
kprobe_opcode_t *insn;
/*
- * boostable = false: This instruction type is not boostable.
- * boostable = true: This instruction has been boosted: we have
+ * boostable = 0: This instruction type is not boostable.
+ * boostable = 1: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
* a post_handler).
*/
- bool boostable;
- bool if_modifier;
+ unsigned boostable:1;
+ unsigned char size; /* The size of insn */
+ union {
+ unsigned char opcode;
+ struct {
+ unsigned char type;
+ } jcc;
+ struct {
+ unsigned char type;
+ unsigned char asize;
+ } loop;
+ struct {
+ unsigned char reg;
+ } indirect;
+ };
+ s32 rel32; /* relative offset must be s32, s16, or s8 */
+ void (*emulate_op)(struct kprobe *p, struct pt_regs *regs);
+ /* Number of bytes of text poked */
+ int tp_len;
};
struct arch_optimized_insn {
/* copy of the original instructions */
- kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ kprobe_opcode_t copied_insn[DISP32_SIZE];
/* detour code buffer */
kprobe_opcode_t *insn;
/* the size of instructions copied to detour code buffer */
@@ -115,10 +113,11 @@ struct kprobe_ctlblk {
};
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
extern int kprobe_int3_handler(struct pt_regs *regs);
-extern int kprobe_debug_handler(struct pt_regs *regs);
+
+#else
+
+static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
#endif /* CONFIG_KPROBES */
#endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
new file mode 100644
index 000000000000..de709fb5bd76
--- /dev/null
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * to make a definition optional, but in this case the default will
+ * be __static_call_return0.
+ */
+KVM_X86_OP(check_processor_compatibility)
+KVM_X86_OP(enable_virtualization_cpu)
+KVM_X86_OP(disable_virtualization_cpu)
+KVM_X86_OP(hardware_unsetup)
+KVM_X86_OP(has_emulated_msr)
+KVM_X86_OP(vcpu_after_set_cpuid)
+KVM_X86_OP(vm_init)
+KVM_X86_OP_OPTIONAL(vm_destroy)
+KVM_X86_OP_OPTIONAL(vm_pre_destroy)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
+KVM_X86_OP(vcpu_create)
+KVM_X86_OP(vcpu_free)
+KVM_X86_OP(vcpu_reset)
+KVM_X86_OP(prepare_switch_to_guest)
+KVM_X86_OP(vcpu_load)
+KVM_X86_OP(vcpu_put)
+KVM_X86_OP(update_exception_bitmap)
+KVM_X86_OP(get_msr)
+KVM_X86_OP(set_msr)
+KVM_X86_OP(get_segment_base)
+KVM_X86_OP(get_segment)
+KVM_X86_OP(get_cpl)
+KVM_X86_OP(get_cpl_no_cache)
+KVM_X86_OP(set_segment)
+KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
+KVM_X86_OP(set_cr0)
+KVM_X86_OP_OPTIONAL(post_set_cr3)
+KVM_X86_OP(is_valid_cr4)
+KVM_X86_OP(set_cr4)
+KVM_X86_OP(set_efer)
+KVM_X86_OP(get_idt)
+KVM_X86_OP(set_idt)
+KVM_X86_OP(get_gdt)
+KVM_X86_OP(set_gdt)
+KVM_X86_OP(sync_dirty_debug_regs)
+KVM_X86_OP(set_dr7)
+KVM_X86_OP(cache_reg)
+KVM_X86_OP(get_rflags)
+KVM_X86_OP(set_rflags)
+KVM_X86_OP(get_if_flag)
+KVM_X86_OP(flush_tlb_all)
+KVM_X86_OP(flush_tlb_current)
+#if IS_ENABLED(CONFIG_HYPERV)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range)
+#endif
+KVM_X86_OP(flush_tlb_gva)
+KVM_X86_OP(flush_tlb_guest)
+KVM_X86_OP(vcpu_pre_run)
+KVM_X86_OP(vcpu_run)
+KVM_X86_OP(handle_exit)
+KVM_X86_OP(skip_emulated_instruction)
+KVM_X86_OP_OPTIONAL(update_emulated_instruction)
+KVM_X86_OP(set_interrupt_shadow)
+KVM_X86_OP(get_interrupt_shadow)
+KVM_X86_OP(patch_hypercall)
+KVM_X86_OP(inject_irq)
+KVM_X86_OP(inject_nmi)
+KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
+KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
+KVM_X86_OP(inject_exception)
+KVM_X86_OP(cancel_injection)
+KVM_X86_OP(interrupt_allowed)
+KVM_X86_OP(nmi_allowed)
+KVM_X86_OP(get_nmi_mask)
+KVM_X86_OP(set_nmi_mask)
+KVM_X86_OP(enable_nmi_window)
+KVM_X86_OP(enable_irq_window)
+KVM_X86_OP_OPTIONAL(update_cr8_intercept)
+KVM_X86_OP(refresh_apicv_exec_ctrl)
+KVM_X86_OP_OPTIONAL(hwapic_isr_update)
+KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
+KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
+KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
+KVM_X86_OP(deliver_interrupt)
+KVM_X86_OP_OPTIONAL(sync_pir_to_irr)
+KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
+KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
+KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
+KVM_X86_OP(load_mmu_pgd)
+KVM_X86_OP_OPTIONAL(link_external_spt)
+KVM_X86_OP_OPTIONAL(set_external_spte)
+KVM_X86_OP_OPTIONAL(free_external_spt)
+KVM_X86_OP_OPTIONAL(remove_external_spte)
+KVM_X86_OP(has_wbinvd_exit)
+KVM_X86_OP(get_l2_tsc_offset)
+KVM_X86_OP(get_l2_tsc_multiplier)
+KVM_X86_OP(write_tsc_offset)
+KVM_X86_OP(write_tsc_multiplier)
+KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
+KVM_X86_OP(check_intercept)
+KVM_X86_OP(handle_exit_irqoff)
+KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
+KVM_X86_OP_OPTIONAL(vcpu_blocking)
+KVM_X86_OP_OPTIONAL(vcpu_unblocking)
+KVM_X86_OP_OPTIONAL(pi_update_irte)
+KVM_X86_OP_OPTIONAL(pi_start_bypass)
+KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
+KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
+KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
+KVM_X86_OP_OPTIONAL(protected_apic_has_interrupt)
+KVM_X86_OP_OPTIONAL(set_hv_timer)
+KVM_X86_OP_OPTIONAL(cancel_hv_timer)
+KVM_X86_OP(setup_mce)
+#ifdef CONFIG_KVM_SMM
+KVM_X86_OP(smi_allowed)
+KVM_X86_OP(enter_smm)
+KVM_X86_OP(leave_smm)
+KVM_X86_OP(enable_smi_window)
+#endif
+KVM_X86_OP_OPTIONAL(dev_get_attr)
+KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_unlocked_ioctl)
+KVM_X86_OP_OPTIONAL(mem_enc_register_region)
+KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
+KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
+KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
+KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
+KVM_X86_OP(get_feature_msr)
+KVM_X86_OP(check_emulate_instruction)
+KVM_X86_OP(apic_init_signal_blocked)
+KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
+KVM_X86_OP_OPTIONAL(migrate_timers)
+KVM_X86_OP(recalc_intercepts)
+KVM_X86_OP(complete_emulated_msr)
+KVM_X86_OP(vcpu_deliver_sipi_vector)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
+KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)
+
+#undef KVM_X86_OP
+#undef KVM_X86_OP_OPTIONAL
+#undef KVM_X86_OP_OPTIONAL_RET0
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
new file mode 100644
index 000000000000..9159bf1a4730
--- /dev/null
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition.
+ */
+KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
+KVM_X86_PMU_OP(msr_idx_to_pmc)
+KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
+KVM_X86_PMU_OP(is_valid_msr)
+KVM_X86_PMU_OP(get_msr)
+KVM_X86_PMU_OP(set_msr)
+KVM_X86_PMU_OP(refresh)
+KVM_X86_PMU_OP(init)
+KVM_X86_PMU_OP_OPTIONAL(reset)
+KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
+KVM_X86_PMU_OP_OPTIONAL(cleanup)
+
+#undef KVM_X86_PMU_OP
+#undef KVM_X86_PMU_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
deleted file mode 100644
index 93c4bf598fb0..000000000000
--- a/arch/x86/include/asm/kvm_emulate.h
+++ /dev/null
@@ -1,457 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/******************************************************************************
- * x86_emulate.h
- *
- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
- *
- * Copyright (c) 2005 Keir Fraser
- *
- * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
- */
-
-#ifndef _ASM_X86_KVM_X86_EMULATE_H
-#define _ASM_X86_KVM_X86_EMULATE_H
-
-#include <asm/desc_defs.h>
-
-struct x86_emulate_ctxt;
-enum x86_intercept;
-enum x86_intercept_stage;
-
-struct x86_exception {
- u8 vector;
- bool error_code_valid;
- u16 error_code;
- bool nested_page_fault;
- u64 address; /* cr2 or nested page fault gpa */
- u8 async_page_fault;
-};
-
-/*
- * This struct is used to carry enough information from the instruction
- * decoder to main KVM so that a decision can be made whether the
- * instruction needs to be intercepted or not.
- */
-struct x86_instruction_info {
- u8 intercept; /* which intercept */
- u8 rep_prefix; /* rep prefix? */
- u8 modrm_mod; /* mod part of modrm */
- u8 modrm_reg; /* index of register used */
- u8 modrm_rm; /* rm part of modrm */
- u64 src_val; /* value of source operand */
- u64 dst_val; /* value of destination operand */
- u8 src_bytes; /* size of source operand */
- u8 dst_bytes; /* size of destination operand */
- u8 ad_bytes; /* size of src/dst address */
- u64 next_rip; /* rip following the instruction */
-};
-
-/*
- * x86_emulate_ops:
- *
- * These operations represent the instruction emulator's interface to memory.
- * There are two categories of operation: those that act on ordinary memory
- * regions (*_std), and those that act on memory regions known to require
- * special treatment or emulation (*_emulated).
- *
- * The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, that this location is the given linear faulting address (cr2), and
- * that this is one of the instruction's data operands. Instruction fetches and
- * stack operations are assumed never to access emulated memory. The emulator
- * automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and assumes that the other operand accesses normal memory.
- *
- * NOTES:
- * 1. The emulator isn't very smart about emulated vs. standard memory.
- * 'Emulated memory' access addresses should be checked for sanity.
- * 'Normal memory' accesses may fault, and the caller must arrange to
- * detect and handle reentrancy into the emulator via recursive faults.
- * Accesses may be unaligned and may cross page boundaries.
- * 2. If the access fails (cannot emulate, or a standard access faults) then
- * it is up to the memop to propagate the fault to the guest VM via
- * some out-of-band mechanism, unknown to the emulator. The memop signals
- * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
- * then immediately bail.
- * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
- * cmpxchg8b_emulated need support 8-byte accesses.
- * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
- */
-/* Access completed successfully: continue emulation as normal. */
-#define X86EMUL_CONTINUE 0
-/* Access is unhandleable: bail from emulation and return error to caller. */
-#define X86EMUL_UNHANDLEABLE 1
-/* Terminate emulation but return success to the caller. */
-#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
-#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */
-#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
-#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
-#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
-
-struct x86_emulate_ops {
- /*
- * read_gpr: read a general purpose register (rax - r15)
- *
- * @reg: gpr number.
- */
- ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
- /*
- * write_gpr: write a general purpose register (rax - r15)
- *
- * @reg: gpr number.
- * @val: value to write.
- */
- void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
- /*
- * read_std: Read bytes of standard (non-emulated/special) memory.
- * Used for descriptor reading.
- * @addr: [IN ] Linear address from which to read.
- * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
- * @bytes: [IN ] Number of bytes to read from memory.
- * @system:[IN ] Whether the access is forced to be at CPL0.
- */
- int (*read_std)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val,
- unsigned int bytes,
- struct x86_exception *fault, bool system);
-
- /*
- * read_phys: Read bytes of standard (non-emulated/special) memory.
- * Used for descriptor reading.
- * @addr: [IN ] Physical address from which to read.
- * @val: [OUT] Value read from memory.
- * @bytes: [IN ] Number of bytes to read from memory.
- */
- int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
- void *val, unsigned int bytes);
-
- /*
- * write_std: Write bytes of standard (non-emulated/special) memory.
- * Used for descriptor writing.
- * @addr: [IN ] Linear address to which to write.
- * @val: [OUT] Value write to memory, zero-extended to 'u_long'.
- * @bytes: [IN ] Number of bytes to write to memory.
- * @system:[IN ] Whether the access is forced to be at CPL0.
- */
- int (*write_std)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes,
- struct x86_exception *fault, bool system);
- /*
- * fetch: Read bytes of standard (non-emulated/special) memory.
- * Used for instruction fetch.
- * @addr: [IN ] Linear address from which to read.
- * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
- * @bytes: [IN ] Number of bytes to read from memory.
- */
- int (*fetch)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes,
- struct x86_exception *fault);
-
- /*
- * read_emulated: Read bytes from emulated/special memory area.
- * @addr: [IN ] Linear address from which to read.
- * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
- * @bytes: [IN ] Number of bytes to read from memory.
- */
- int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes,
- struct x86_exception *fault);
-
- /*
- * write_emulated: Write bytes to emulated/special memory area.
- * @addr: [IN ] Linear address to which to write.
- * @val: [IN ] Value to write to memory (low-order bytes used as
- * required).
- * @bytes: [IN ] Number of bytes to write to memory.
- */
- int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, const void *val,
- unsigned int bytes,
- struct x86_exception *fault);
-
- /*
- * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
- * emulated/special memory area.
- * @addr: [IN ] Linear address to access.
- * @old: [IN ] Value expected to be current at @addr.
- * @new: [IN ] Value to write to @addr.
- * @bytes: [IN ] Number of bytes to access using CMPXCHG.
- */
- int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
- unsigned long addr,
- const void *old,
- const void *new,
- unsigned int bytes,
- struct x86_exception *fault);
- void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
-
- int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
- int size, unsigned short port, void *val,
- unsigned int count);
-
- int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
- int size, unsigned short port, const void *val,
- unsigned int count);
-
- bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
- struct desc_struct *desc, u32 *base3, int seg);
- void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
- struct desc_struct *desc, u32 base3, int seg);
- unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
- int seg);
- void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
- void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
- void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
- void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
- ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
- int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
- int (*cpl)(struct x86_emulate_ctxt *ctxt);
- int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
- int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
- u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
- void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
- int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
- int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
- int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
- int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
- void (*halt)(struct x86_emulate_ctxt *ctxt);
- void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
- int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
- int (*intercept)(struct x86_emulate_ctxt *ctxt,
- struct x86_instruction_info *info,
- enum x86_intercept_stage stage);
-
- bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
- u32 *ecx, u32 *edx, bool check_limit);
- void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
-
- unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
- void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
- int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
-
-};
-
-typedef u32 __attribute__((vector_size(16))) sse128_t;
-
-/* Type, address-of, and value of an instruction's operand. */
-struct operand {
- enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
- unsigned int bytes;
- unsigned int count;
- union {
- unsigned long orig_val;
- u64 orig_val64;
- };
- union {
- unsigned long *reg;
- struct segmented_address {
- ulong ea;
- unsigned seg;
- } mem;
- unsigned xmm;
- unsigned mm;
- } addr;
- union {
- unsigned long val;
- u64 val64;
- char valptr[sizeof(sse128_t)];
- sse128_t vec_val;
- u64 mm_val;
- void *data;
- };
-};
-
-struct fetch_cache {
- u8 data[15];
- u8 *ptr;
- u8 *end;
-};
-
-struct read_cache {
- u8 data[1024];
- unsigned long pos;
- unsigned long end;
-};
-
-/* Execution mode, passed to the emulator. */
-enum x86emul_mode {
- X86EMUL_MODE_REAL, /* Real mode. */
- X86EMUL_MODE_VM86, /* Virtual 8086 mode. */
- X86EMUL_MODE_PROT16, /* 16-bit protected mode. */
- X86EMUL_MODE_PROT32, /* 32-bit protected mode. */
- X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
-};
-
-/* These match some of the HF_* flags defined in kvm_host.h */
-#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
-#define X86EMUL_SMM_MASK (1 << 6)
-#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
-
-struct x86_emulate_ctxt {
- const struct x86_emulate_ops *ops;
-
- /* Register state before/after emulation. */
- unsigned long eflags;
- unsigned long eip; /* eip before instruction emulation */
- /* Emulated execution mode, represented by an X86EMUL_MODE value. */
- enum x86emul_mode mode;
-
- /* interruptibility state, as a result of execution of STI or MOV SS */
- int interruptibility;
-
- bool perm_ok; /* do not check permissions if true */
- bool ud; /* inject an #UD if host doesn't support insn */
- bool tf; /* TF value before instruction (after for syscall/sysret) */
-
- bool have_exception;
- struct x86_exception exception;
-
- /*
- * decode cache
- */
-
- /* current opcode length in bytes */
- u8 opcode_len;
- u8 b;
- u8 intercept;
- u8 op_bytes;
- u8 ad_bytes;
- struct operand src;
- struct operand src2;
- struct operand dst;
- int (*execute)(struct x86_emulate_ctxt *ctxt);
- int (*check_perm)(struct x86_emulate_ctxt *ctxt);
- /*
- * The following six fields are cleared together,
- * the rest are initialized unconditionally in x86_decode_insn
- * or elsewhere
- */
- bool rip_relative;
- u8 rex_prefix;
- u8 lock_prefix;
- u8 rep_prefix;
- /* bitmaps of registers in _regs[] that can be read */
- u32 regs_valid;
- /* bitmaps of registers in _regs[] that have been written */
- u32 regs_dirty;
- /* modrm */
- u8 modrm;
- u8 modrm_mod;
- u8 modrm_reg;
- u8 modrm_rm;
- u8 modrm_seg;
- u8 seg_override;
- u64 d;
- unsigned long _eip;
- struct operand memop;
- /* Fields above regs are cleared together. */
- unsigned long _regs[NR_VCPU_REGS];
- struct operand *memopp;
- struct fetch_cache fetch;
- struct read_cache io_read;
- struct read_cache mem_read;
-};
-
-/* Repeat String Operation Prefix */
-#define REPE_PREFIX 0xf3
-#define REPNE_PREFIX 0xf2
-
-/* CPUID vendors */
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
-
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
-
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e
-
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
-
-enum x86_intercept_stage {
- X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
- X86_ICPT_PRE_EXCEPT,
- X86_ICPT_POST_EXCEPT,
- X86_ICPT_POST_MEMACCESS,
-};
-
-enum x86_intercept {
- x86_intercept_none,
- x86_intercept_cr_read,
- x86_intercept_cr_write,
- x86_intercept_clts,
- x86_intercept_lmsw,
- x86_intercept_smsw,
- x86_intercept_dr_read,
- x86_intercept_dr_write,
- x86_intercept_lidt,
- x86_intercept_sidt,
- x86_intercept_lgdt,
- x86_intercept_sgdt,
- x86_intercept_lldt,
- x86_intercept_sldt,
- x86_intercept_ltr,
- x86_intercept_str,
- x86_intercept_rdtsc,
- x86_intercept_rdpmc,
- x86_intercept_pushf,
- x86_intercept_popf,
- x86_intercept_cpuid,
- x86_intercept_rsm,
- x86_intercept_iret,
- x86_intercept_intn,
- x86_intercept_invd,
- x86_intercept_pause,
- x86_intercept_hlt,
- x86_intercept_invlpg,
- x86_intercept_invlpga,
- x86_intercept_vmrun,
- x86_intercept_vmload,
- x86_intercept_vmsave,
- x86_intercept_vmmcall,
- x86_intercept_stgi,
- x86_intercept_clgi,
- x86_intercept_skinit,
- x86_intercept_rdtscp,
- x86_intercept_icebp,
- x86_intercept_wbinvd,
- x86_intercept_monitor,
- x86_intercept_mwait,
- x86_intercept_rdmsr,
- x86_intercept_wrmsr,
- x86_intercept_in,
- x86_intercept_ins,
- x86_intercept_out,
- x86_intercept_outs,
-
- nr_x86_intercepts
-};
-
-/* Host execution mode. */
-#if defined(CONFIG_X86_32)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
-#elif defined(CONFIG_X86_64)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
-#endif
-
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
-bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
-#define EMULATION_FAILED -1
-#define EMULATION_OK 0
-#define EMULATION_RESTART 1
-#define EMULATION_INTERCEPTED 2
-void init_decode_cache(struct x86_emulate_ctxt *ctxt);
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
-int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
- u16 tss_selector, int idt_index, int reason,
- bool has_error_code, u32 error_code);
-int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
-void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
-void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
-bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
-
-#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce90de7f..5a3bfa293e8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Kernel-based Virtual Machine driver for Linux
*
* This header defines architecture specific interfaces, x86 version
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#ifndef _ASM_X86_KVM_HOST_H
@@ -18,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/irq_work.h>
#include <linux/irq.h>
+#include <linux/workqueue.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -26,43 +24,81 @@
#include <linux/pvclock_gtod.h>
#include <linux/clocksource.h>
#include <linux/irqbypass.h>
-#include <linux/hyperv.h>
+#include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
+#include <linux/atomic.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
+#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/asm.h>
+#include <asm/irq_remapping.h>
#include <asm/kvm_page_track.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm/kvm_vcpu_regs.h>
+#include <asm/reboot.h>
+#include <hyperv/hvhdk.h>
+
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+
+/*
+ * CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if
+ * KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS).
+ */
+#ifdef CONFIG_KVM_MAX_NR_VCPUS
+#define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS
+#else
+#define KVM_MAX_VCPUS 1024
+#endif
+
+/*
+ * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
+ * might be larger than the actual number of VCPUs because the
+ * APIC ID encodes CPU topology information.
+ *
+ * In the worst case, we'll need less than one extra bit for the
+ * Core ID, and less than one extra bit for the Package (Die) ID,
+ * so ratio of 4 should be enough.
+ */
+#define KVM_VCPU_ID_RATIO 4
+#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
-#define KVM_MAX_VCPUS 288
-#define KVM_SOFT_MAX_VCPUS 240
-#define KVM_MAX_VCPU_ID 1023
-#define KVM_USER_MEM_SLOTS 509
/* memory slots that are not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 3
-#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+#define KVM_INTERNAL_MEM_SLOTS 3
#define KVM_HALT_POLL_NS_DEFAULT 200000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
+
+#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \
+ KVM_BUS_LOCK_DETECTION_EXIT)
+
+#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \
+ KVM_X86_NOTIFY_VMEXIT_USER)
+
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
-#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
+#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
#define KVM_REQ_NMI KVM_ARCH_REQ(9)
#define KVM_REQ_PMU KVM_ARCH_REQ(10)
#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#endif
#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
#define KVM_REQ_MCLOCK_INPROGRESS \
KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -77,7 +113,22 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_APICV_UPDATE \
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
+#define KVM_REQ_TLB_FLUSH_GUEST \
+ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
+#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
+ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_TLB_FLUSH \
+ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \
+ KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -90,7 +141,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_LAM_SUP | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -99,71 +151,56 @@
#define INVALID_PAGE (~(hpa_t)0)
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
-#define UNMAPPED_GVA (~(gpa_t)0)
-
/* KVM Hugepage definitions for x86 */
-enum {
- PT_PAGE_TABLE_LEVEL = 1,
- PT_DIRECTORY_LEVEL = 2,
- PT_PDPE_LEVEL = 3,
- /* set max level to the biggest one */
- PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
-};
-#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \
- PT_PAGE_TABLE_LEVEL + 1)
+#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
+#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
- return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
- (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
-#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 80
-#define KVM_NR_FIXED_MTRR_REGION 88
+#define KVM_MAX_CPUID_ENTRIES 256
#define KVM_NR_VAR_MTRR 8
#define ASYNC_PF_PER_VCPU 64
enum kvm_reg {
- VCPU_REGS_RAX = 0,
- VCPU_REGS_RCX = 1,
- VCPU_REGS_RDX = 2,
- VCPU_REGS_RBX = 3,
- VCPU_REGS_RSP = 4,
- VCPU_REGS_RBP = 5,
- VCPU_REGS_RSI = 6,
- VCPU_REGS_RDI = 7,
+ VCPU_REGS_RAX = __VCPU_REGS_RAX,
+ VCPU_REGS_RCX = __VCPU_REGS_RCX,
+ VCPU_REGS_RDX = __VCPU_REGS_RDX,
+ VCPU_REGS_RBX = __VCPU_REGS_RBX,
+ VCPU_REGS_RSP = __VCPU_REGS_RSP,
+ VCPU_REGS_RBP = __VCPU_REGS_RBP,
+ VCPU_REGS_RSI = __VCPU_REGS_RSI,
+ VCPU_REGS_RDI = __VCPU_REGS_RDI,
#ifdef CONFIG_X86_64
- VCPU_REGS_R8 = 8,
- VCPU_REGS_R9 = 9,
- VCPU_REGS_R10 = 10,
- VCPU_REGS_R11 = 11,
- VCPU_REGS_R12 = 12,
- VCPU_REGS_R13 = 13,
- VCPU_REGS_R14 = 14,
- VCPU_REGS_R15 = 15,
+ VCPU_REGS_R8 = __VCPU_REGS_R8,
+ VCPU_REGS_R9 = __VCPU_REGS_R9,
+ VCPU_REGS_R10 = __VCPU_REGS_R10,
+ VCPU_REGS_R11 = __VCPU_REGS_R11,
+ VCPU_REGS_R12 = __VCPU_REGS_R12,
+ VCPU_REGS_R13 = __VCPU_REGS_R13,
+ VCPU_REGS_R14 = __VCPU_REGS_R14,
+ VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
VCPU_REGS_RIP,
- NR_VCPU_REGS
-};
+ NR_VCPU_REGS,
-enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+ VCPU_EXREG_CR0,
VCPU_EXREG_CR3,
+ VCPU_EXREG_CR4,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_SEGMENTS,
+ VCPU_EXREG_EXIT_INFO_1,
+ VCPU_EXREG_EXIT_INFO_2,
};
enum {
@@ -177,54 +214,79 @@ enum {
VCPU_SREG_LDTR,
};
-#include <asm/kvm_emulate.h>
+enum exit_fastpath_completion {
+ EXIT_FASTPATH_NONE,
+ EXIT_FASTPATH_REENTER_GUEST,
+ EXIT_FASTPATH_EXIT_HANDLED,
+ EXIT_FASTPATH_EXIT_USERSPACE,
+};
+typedef enum exit_fastpath_completion fastpath_t;
-#define KVM_NR_MEM_OBJS 40
+struct x86_emulate_ctxt;
+struct x86_exception;
+union kvm_smram;
+enum x86_intercept;
+enum x86_intercept_stage;
#define KVM_NR_DB_REGS 4
+#define DR6_BUS_LOCK (1 << 11)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
#define DR6_RTM (1 << 16)
-#define DR6_FIXED_1 0xfffe0ff0
-#define DR6_INIT 0xffff0ff0
-#define DR6_VOLATILE 0x0001e00f
+/*
+ * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
+ * We can regard all the bits in DR6_FIXED_1 as active_low bits;
+ * they will never be 0 for now, but when they are defined
+ * in the future it will require no code change.
+ *
+ * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
+ */
+#define DR6_ACTIVE_LOW 0xffff0ff0
+#define DR6_VOLATILE 0x0001e80f
+#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
#define DR7_BP_EN_MASK 0x000000ff
#define DR7_GE (1 << 9)
#define DR7_GD (1 << 13)
-#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-
-#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
-#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
-#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
-#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
-#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
-
-#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
- PFERR_WRITE_MASK | \
- PFERR_PRESENT_MASK)
+#define KVM_GUESTDBG_VALID_MASK \
+ (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_SINGLESTEP | \
+ KVM_GUESTDBG_USE_HW_BP | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_INJECT_BP | \
+ KVM_GUESTDBG_INJECT_DB | \
+ KVM_GUESTDBG_BLOCKIRQ)
+
+#define PFERR_PRESENT_MASK BIT(0)
+#define PFERR_WRITE_MASK BIT(1)
+#define PFERR_USER_MASK BIT(2)
+#define PFERR_RSVD_MASK BIT(3)
+#define PFERR_FETCH_MASK BIT(4)
+#define PFERR_PK_MASK BIT(5)
+#define PFERR_SS_MASK BIT(6)
+#define PFERR_SGX_MASK BIT(15)
+#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
+#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
+#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
+#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
+ * when emulating instructions that triggers implicit access.
*/
-#define SPTE_SPECIAL_MASK (1ULL << 62)
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
+/*
+ * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
+ * when the guest was accessing private memory.
+ */
+#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
+#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -236,41 +298,66 @@ enum {
*/
#define KVM_APIC_PV_EOI_PENDING 1
+struct kvm_kernel_irqfd;
struct kvm_kernel_irq_routing_entry;
/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
-};
-
-/*
- * the pages used as guest page table on soft mmu are tracked by
- * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
- * by indirect shadow page can not be more than 15 bits.
+ * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
+ * also includes TDP pages) to determine whether or not a page can be used in
+ * the given MMU context. This is a subset of the overall kvm_cpu_role to
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
+ *
+ * Upper-level shadow pages having gptes are tracked for write-protection via
+ * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
*
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
- * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ * A unique shadow page (SP) for a gfn is created if and only if an existing SP
+ * cannot be reused. The ability to reuse a SP is tracked by its role, which
+ * incorporates various mode bits and properties of the SP. Roughly speaking,
+ * the number of unique SPs that can theoretically be created is 2^n, where n
+ * is the number of bits that are used to compute the role.
+ *
+ * But, even though there are 20 bits in the mask below, not all combinations
+ * of modes and flags are possible:
+ *
+ * - invalid shadow pages are not accounted, mirror pages are not shadowed,
+ * so the bits are effectively 18.
+ *
+ * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
+ * execonly and ad_disabled are only used for nested EPT which has
+ * has_4_byte_gpte=0. Therefore, 2 bits are always unused.
+ *
+ * - the 4 bits of level are effectively limited to the values 2/3/4/5,
+ * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE
+ * paging has exactly one upper level, making level completely redundant
+ * when has_4_byte_gpte=1.
+ *
+ * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
+ * cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
+ *
+ * Therefore, the maximum number of possible upper-level shadow pages for a
+ * single gfn is a bit less than 2^13.
*/
union kvm_mmu_page_role {
u32 word;
struct {
unsigned level:4;
- unsigned cr4_pae:1;
+ unsigned has_4_byte_gpte:1;
unsigned quadrant:2;
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
- unsigned nxe:1;
+ unsigned efer_nx:1;
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
unsigned ad_disabled:1;
unsigned guest_mode:1;
- unsigned :6;
+ unsigned passthrough:1;
+ unsigned is_mirror:1;
+ unsigned :4;
/*
* This is left at the top of the word so that
@@ -282,27 +369,40 @@ union kvm_mmu_page_role {
};
};
-union kvm_mmu_extended_role {
/*
- * This structure complements kvm_mmu_page_role caching everything needed for
- * MMU configuration. If nothing in both these structures changed, MMU
- * re-configuration can be skipped. @valid bit is set on first usage so we don't
- * treat all-zero structure as valid data.
+ * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
+ * relevant to the current MMU configuration. When loading CR0, CR4, or EFER,
+ * including on nested transitions, if nothing in the full role changes then
+ * MMU re-configuration can be skipped. @valid bit is set on first usage so we
+ * don't treat all-zero structure as valid data.
+ *
+ * The properties that are tracked in the extended role but not the page role
+ * are for things that either (a) do not affect the validity of the shadow page
+ * or (b) are indirectly reflected in the shadow page's role. For example,
+ * CR4.PKE only affects permission checks for software walks of the guest page
+ * tables (because KVM doesn't support Protection Keys with shadow paging), and
+ * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
+ *
+ * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
+ * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
+ * SMAP, but the MMU's permission checks for software walks need to be SMEP and
+ * SMAP aware regardless of CR0.WP.
*/
+union kvm_mmu_extended_role {
u32 word;
struct {
unsigned int valid:1;
unsigned int execonly:1;
- unsigned int cr0_pg:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
+ unsigned int efer_lma:1;
};
};
-union kvm_mmu_role {
+union kvm_cpu_role {
u64 as_u64;
struct {
union kvm_mmu_page_role base;
@@ -311,43 +411,7 @@ union kvm_mmu_role {
};
struct kvm_rmap_head {
- unsigned long val;
-};
-
-struct kvm_mmu_page {
- struct list_head link;
- struct hlist_node hash_link;
- bool unsync;
-
- /*
- * The following two entries are used to key the shadow page in the
- * hash table.
- */
- union kvm_mmu_page_role role;
- gfn_t gfn;
-
- u64 *spt;
- /* hold the gfn of each spte inside spt */
- gfn_t *gfns;
- int root_count; /* Currently serving as active root */
- unsigned int unsync_children;
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-
- /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
- unsigned long mmu_valid_gen;
-
- DECLARE_BITMAP(unsync_child_bitmap, 512);
-
-#ifdef CONFIG_X86_32
- /*
- * Used out of the mmu-lock to avoid reading spte values while an
- * update is in progress; see the comments in __get_spte_lockless().
- */
- int clear_spte_count;
-#endif
-
- /* Number of writes since the last time traversal visited this page. */
- atomic_t write_flooding_count;
+ atomic_long_t val;
};
struct kvm_pio_request {
@@ -365,51 +429,44 @@ struct rsvd_bits_validate {
};
struct kvm_mmu_root_info {
- gpa_t cr3;
+ gpa_t pgd;
hpa_t hpa;
};
#define KVM_MMU_ROOT_INFO_INVALID \
- ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
#define KVM_MMU_NUM_PREV_ROOTS 3
+#define KVM_MMU_ROOT_CURRENT BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
+#define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1)
+
+#define KVM_HAVE_MMU_RWLOCK
+
+struct kvm_mmu_page;
+struct kvm_page_fault;
+
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
* current mmu mode.
*/
struct kvm_mmu {
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
- unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+ unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
- bool prefault);
+ int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gpa_t gva_or_gpa, u64 access,
struct x86_exception *exception);
- gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
- int (*sync_page)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp);
- void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
- void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte);
- hpa_t root_hpa;
- union kvm_mmu_role mmu_role;
- u8 root_level;
- u8 shadow_root_level;
- u8 ept_ad;
- bool direct_map;
- struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
-
- /*
- * Bitmap; bit set = permission fault
- * Byte index: page fault error code [4:1]
- * Bit index: pte permissions in ACC_* format
- */
- u8 permissions[16];
+ int (*sync_spte)(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, int i);
+ struct kvm_mmu_root_info root;
+ hpa_t mirror_root_hpa;
+ union kvm_cpu_role cpu_role;
+ union kvm_mmu_page_role root_role;
/*
* The pkru_mask indicates if protection key checks are needed. It
@@ -419,8 +476,18 @@ struct kvm_mmu {
*/
u32 pkru_mask;
+ struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
+
+ /*
+ * Bitmap; bit set = permission fault
+ * Byte index: page fault error code [4:1]
+ * Bit index: pte permissions in ACC_* format
+ */
+ u8 permissions[16];
+
u64 *pae_root;
- u64 *lm_root;
+ u64 *pml4_root;
+ u64 *pml5_root;
/*
* check zero bits on shadow page table entries, these
@@ -431,19 +498,9 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
- /* Can have large pages at levels 2..last_nonleaf_level-1. */
- u8 last_nonleaf_level;
-
- bool nx;
-
u64 pdptrs[4]; /* pae */
};
-struct kvm_tlb_range {
- u64 start_gfn;
- u64 pages;
-};
-
enum pmc_type {
KVM_PMC_GP = 0,
KVM_PMC_FIXED,
@@ -452,50 +509,128 @@ enum pmc_type {
struct kvm_pmc {
enum pmc_type type;
u8 idx;
+ bool is_paused;
+ bool intr;
+ /*
+ * Base value of the PMC counter, relative to the *consumed* count in
+ * the associated perf_event. This value includes counter updates from
+ * the perf_event and emulated_count since the last time the counter
+ * was reprogrammed, but it is *not* the current value as seen by the
+ * guest or userspace.
+ *
+ * The count is relative to the associated perf_event so that KVM
+ * doesn't need to reprogram the perf_event every time the guest writes
+ * to the counter.
+ */
u64 counter;
+ /*
+ * PMC events triggered by KVM emulation that haven't been fully
+ * processed, i.e. haven't undergone overflow detection.
+ */
+ u64 emulated_counter;
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
+ /*
+ * only for creating or reusing perf_event,
+ * eventsel value for general purpose counters,
+ * ctrl value for fixed counters.
+ */
+ u64 current_config;
};
+/* More counters may conflict with other existing Architectural MSRs */
+#define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define KVM_MAX_NR_INTEL_GP_COUNTERS 8
+#define KVM_MAX_NR_AMD_GP_COUNTERS 6
+#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
+ KVM_MAX_NR_AMD_GP_COUNTERS)
+
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
+ KVM_MAX_NR_AMD_FIXED_COUNTERS)
+
struct kvm_pmu {
+ u8 version;
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
u64 fixed_ctr_ctrl;
+ u64 fixed_ctr_ctrl_rsvd;
u64 global_ctrl;
u64 global_status;
- u64 global_ovf_ctrl;
u64 counter_bitmask[2];
- u64 global_ctrl_mask;
+ u64 global_ctrl_rsvd;
+ u64 global_status_rsvd;
u64 reserved_bits;
- u8 version;
- struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
- struct irq_work irq_work;
- u64 reprogram_pmi;
+ u64 raw_event_mask;
+ struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
+ struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
+
+ /*
+ * Overlay the bitmap with a 64-bit atomic so that all bits can be
+ * set in a single access, e.g. to reprogram all counters when the PMU
+ * filter changes.
+ */
+ union {
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ atomic64_t __reprogram_pmi;
+ };
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+ DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
+ u64 ds_area;
+ u64 pebs_enable;
+ u64 pebs_enable_rsvd;
+ u64 pebs_data_cfg;
+ u64 pebs_data_cfg_rsvd;
+
+ /*
+ * If a guest counter is cross-mapped to host counter with different
+ * index, its PEBS capability will be temporarily disabled.
+ *
+ * The user should make sure that this mask is updated
+ * after disabling interrupts and before perf_guest_get_msrs();
+ */
+ u64 host_cross_mapped_mask;
+
+ /*
+ * The gate to release perf_events not marked in
+ * pmc_in_use only once in a vcpu time slice.
+ */
+ bool need_cleanup;
+
+ /*
+ * The total number of programmed perf_events and it helps to avoid
+ * redundant check before cleanup if guest don't use vPMU at all.
+ */
+ u8 event_count;
};
struct kvm_pmu_ops;
enum {
- KVM_DEBUGREG_BP_ENABLED = 1,
- KVM_DEBUGREG_WONT_EXIT = 2,
- KVM_DEBUGREG_RELOAD = 4,
-};
-
-struct kvm_mtrr_range {
- u64 base;
- u64 mask;
- struct list_head node;
+ KVM_DEBUGREG_BP_ENABLED = BIT(0),
+ KVM_DEBUGREG_WONT_EXIT = BIT(1),
+ /*
+ * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by
+ * hardware on exit from or enter to guest. KVM needn't switch them.
+ * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM
+ * exit, host values need to be restored.
+ */
+ KVM_DEBUGREG_AUTO_SWITCH = BIT(2),
};
struct kvm_mtrr {
- struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
- mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+ u64 var[KVM_NR_VAR_MTRR * 2];
+ u64 fixed_64k;
+ u64 fixed_16k[2];
+ u64 fixed_4k[8];
u64 deftype;
-
- struct list_head head;
};
/* Hyper-V SynIC timer */
@@ -523,8 +658,32 @@ struct kvm_vcpu_hv_synic {
bool dont_zero_synic_pages;
};
+/* The maximum number of entries on the TLB flush fifo. */
+#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
+/*
+ * Note: the following 'magic' entry is made up by KVM to avoid putting
+ * anything besides GVA on the TLB flush fifo. It is theoretically possible
+ * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
+ * which will look identical. KVM's action to 'flush everything' instead of
+ * flushing these particular addresses is, however, fully legitimate as
+ * flushing more than requested is always OK.
+ */
+#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1)
+
+enum hv_tlb_flush_fifos {
+ HV_L1_TLB_FLUSH_FIFO,
+ HV_L2_TLB_FLUSH_FIFO,
+ HV_NR_TLB_FLUSH_FIFOS,
+};
+
+struct kvm_vcpu_hv_tlb_flush_fifo {
+ spinlock_t write_lock;
+ DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+};
+
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
+ struct kvm_vcpu *vcpu;
u32 vp_index;
u64 hv_vapic;
s64 runtime_offset;
@@ -532,7 +691,94 @@ struct kvm_vcpu_hv {
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
- cpumask_t tlb_flush;
+ bool enforce_cpuid;
+ struct {
+ u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
+ u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
+ u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
+ u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+ u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
+ u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+ u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
+ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
+ } cpuid_cache;
+
+ struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
+
+ /*
+ * Preallocated buffers for handling hypercalls that pass sparse vCPU
+ * sets (for high vCPU counts, they're too large to comfortably fit on
+ * the stack).
+ */
+ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+
+ struct hv_vp_assist_page vp_assist_page;
+
+ struct {
+ u64 pa_page_gpa;
+ u64 vm_id;
+ u32 vp_id;
+ } nested;
+};
+
+struct kvm_hypervisor_cpuid {
+ u32 base;
+ u32 limit;
+};
+
+#ifdef CONFIG_KVM_XEN
+/* Xen HVM per vcpu emulation context */
+struct kvm_vcpu_xen {
+ u64 hypercall_rip;
+ u32 current_runstate;
+ u8 upcall_vector;
+ struct gfn_to_pfn_cache vcpu_info_cache;
+ struct gfn_to_pfn_cache vcpu_time_info_cache;
+ struct gfn_to_pfn_cache runstate_cache;
+ struct gfn_to_pfn_cache runstate2_cache;
+ u64 last_steal;
+ u64 runstate_entry_time;
+ u64 runstate_times[4];
+ unsigned long evtchn_pending_sel;
+ u32 vcpu_id; /* The Xen / ACPI vCPU ID */
+ u32 timer_virq;
+ u64 timer_expires; /* In guest epoch */
+ atomic_t timer_pending;
+ struct hrtimer timer;
+ int poll_evtchn;
+ struct timer_list poll_timer;
+ struct kvm_hypervisor_cpuid cpuid;
+};
+#endif
+
+struct kvm_queued_exception {
+ bool pending;
+ bool injected;
+ bool has_error_code;
+ u8 vector;
+ u32 error_code;
+ unsigned long payload;
+ bool has_payload;
+};
+
+/*
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM. Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+ CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
+ CPUID_8000_0007_EDX,
+ CPUID_8000_0022_EAX,
+ CPUID_7_2_EDX,
+ CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
+ CPUID_7_1_ECX,
+ NR_KVM_CPU_CAPS,
+
+ NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
};
struct kvm_vcpu_arch {
@@ -550,13 +796,15 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
+ unsigned long cr4_guest_rsvd_bits;
unsigned long cr8;
+ u32 host_pkru;
u32 pkru;
u32 hflags;
u64 efer;
+ u64 host_debugctl;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
- bool apicv_active;
bool load_eoi_exitmap_pending;
DECLARE_BITMAP(ioapic_handled_vectors, 256);
unsigned long apic_attention;
@@ -565,9 +813,12 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+ bool at_instruction_boundary;
bool tpr_access_reporting;
- u64 ia32_xss;
+ bool xfd_no_write_intercept;
u64 microcode_version;
+ u64 arch_capabilities;
+ u64 perf_capabilities;
/*
* Paging state of the vcpu
@@ -588,7 +839,7 @@ struct kvm_vcpu_arch {
* Paging state of an L2 guest (used for nested npt)
*
* This context will save all necessary information to walk page tables
- * of the an L2 guest. This context is only initialized for page table
+ * of an L2 guest. This context is only initialized for page table
* walking and not for faulting since we never handle l2 page faults on
* the host.
*/
@@ -601,41 +852,46 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
- struct kvm_mmu_memory_cache mmu_page_cache;
+ struct kvm_mmu_memory_cache mmu_shadow_page_cache;
+ struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * This cache is to allocate external page table. E.g. private EPT used
+ * by the TDX module.
+ */
+ struct kvm_mmu_memory_cache mmu_external_spt_cache;
/*
* QEMU userspace and the guest each have their own FPU state.
- * In vcpu_run, we switch between the user, maintained in the
- * task_struct struct, and guest FPU contexts. While running a VCPU,
- * the VCPU thread will have the guest FPU context.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
*
* Note that while the PKRU state lives inside the fpu registers,
* it is switched out separately at VMENTER and VMEXIT time. The
- * "guest_fpu" state here contains the guest FPU context, with the
+ * "guest_fpstate" state here contains the guest FPU context, with the
* host PRKU bits.
*/
- struct fpu *guest_fpu;
+ struct fpu_guest guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
- u32 guest_xstate_size;
+ u64 ia32_xss;
+ u64 guest_supported_xss;
struct kvm_pio_request pio;
void *pio_data;
+ void *sev_pio_data;
+ unsigned sev_pio_count;
u8 event_exit_inst_len;
- struct kvm_queued_exception {
- bool pending;
- bool injected;
- bool has_error_code;
- u8 nr;
- u32 error_code;
- unsigned long payload;
- bool has_payload;
- u8 nested_apf;
- } exception;
+ bool exception_from_userspace;
+
+ /* Exceptions to be injected to the guest. */
+ struct kvm_queued_exception exception;
+ /* Exception VM-Exits to be synthesized to L1. */
+ struct kvm_queued_exception exception_vmexit;
struct kvm_queued_interrupt {
bool injected;
@@ -646,33 +902,55 @@ struct kvm_vcpu_arch {
int halt_request; /* real mode on Intel only */
int cpuid_nent;
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+ struct kvm_cpuid_entry2 *cpuid_entries;
+ bool cpuid_dynamic_bits_dirty;
+ bool is_amd_compatible;
+ /*
+ * cpu_caps holds the effective guest capabilities, i.e. the features
+ * the vCPU is allowed to use. Typically, but not always, features can
+ * be used by the guest if and only if both KVM and userspace want to
+ * expose the feature to the guest.
+ *
+ * A common exception is for virtualization holes, i.e. when KVM can't
+ * prevent the guest from using a feature, in which case the vCPU "has"
+ * the feature regardless of what KVM or userspace desires.
+ *
+ * Note, features that don't require KVM involvement in any way are
+ * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
+ * guest CPUID provided by userspace.
+ */
+ u32 cpu_caps[NR_KVM_CPU_CAPS];
+
+ u64 reserved_gpa_bits;
int maxphyaddr;
/* emulate context */
- struct x86_emulate_ctxt emulate_ctxt;
+ struct x86_emulate_ctxt *emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
+ unsigned long cui_linear_rip;
+ int cui_rdmsr_imm_reg;
gpa_t time;
- struct pvclock_vcpu_time_info hv_clock;
+ s8 pvclock_tsc_shift;
+ u32 pvclock_tsc_mul;
unsigned int hw_tsc_khz;
- struct gfn_to_hva_cache pv_time;
- bool pv_time_enabled;
+ struct gfn_to_pfn_cache pv_time;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
struct {
+ u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_hva_cache stime;
- struct kvm_steal_time steal;
+ struct gfn_to_hva_cache cache;
} st;
- u64 tsc_offset;
+ u64 l1_tsc_offset;
+ u64 tsc_offset; /* current tsc offset */
u64 last_guest_tsc;
u64 last_host_tsc;
u64 tsc_offset_adjustment;
@@ -685,12 +963,16 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
- u64 tsc_scaling_ratio;
+ u64 msr_ia32_power_ctl;
+ u64 l1_tsc_scaling_ratio;
+ u64 tsc_scaling_ratio; /* current scaling ratio */
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
- unsigned nmi_pending; /* NMI queued after currently running handler */
+ /* Number of NMIs pending injection, not including hardware vNMIs. */
+ unsigned int nmi_pending;
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
+ u8 handling_intr_from_guest;
struct kvm_mtrr mtrr_state;
u64 pat;
@@ -709,10 +991,11 @@ struct kvm_vcpu_arch {
u64 mcg_ctl;
u64 mcg_ext_ctl;
u64 *mce_banks;
+ u64 *mci_ctl2_banks;
/* Cache MMIO info */
u64 mmio_gva;
- unsigned access;
+ unsigned mmio_access;
gfn_t mmio_gfn;
u64 mmio_gen;
@@ -721,8 +1004,13 @@ struct kvm_vcpu_arch {
/* used for guest single stepping over the given code position */
unsigned long singlestep_rip;
- struct kvm_vcpu_hv hyperv;
-
+#ifdef CONFIG_KVM_HYPERV
+ bool hyperv_enabled;
+ struct kvm_vcpu_hv *hyperv;
+#endif
+#ifdef CONFIG_KVM_XEN
+ struct kvm_vcpu_xen xen;
+#endif
cpumask_var_t wbinvd_dirty_mask;
unsigned long last_retry_eip;
@@ -730,14 +1018,16 @@ struct kvm_vcpu_arch {
struct {
bool halted;
- gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
+ gfn_t gfns[ASYNC_PF_PER_VCPU];
struct gfn_to_hva_cache data;
- u64 msr_val;
+ u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
+ u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
+ u16 vec;
u32 id;
- bool send_user_only;
- u32 host_apf_reason;
- unsigned long nested_apf_token;
+ u32 host_apf_flags;
+ bool send_always;
bool delivery_as_pf_vmexit;
+ bool pageready_pending;
} apf;
/* OSVW MSRs (AMD only) */
@@ -751,15 +1041,7 @@ struct kvm_vcpu_arch {
struct gfn_to_hva_cache data;
} pv_eoi;
- /*
- * Indicate whether the access faults on its page table in guest
- * which is set when fix page fault and used to detect unhandeable
- * instruction.
- */
- bool write_fault_to_shadow_pgtable;
-
- /* set at EPT violation at this point */
- unsigned long exit_qualification;
+ u64 msr_kvm_poll_control;
/* pv related host specific info */
struct {
@@ -768,16 +1050,45 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;
-
- /* GPA available */
- bool gpa_available;
- gpa_t gpa_val;
+ int highest_stale_pending_ioapic_eoi;
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
- /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
- bool l1tf_flush_l1d;
+ /* Host CPU on which VM-entry was most recently attempted */
+ int last_vmentry_cpu;
+
+ /* AMD MSRC001_0015 Hardware Configuration */
+ u64 msr_hwcr;
+
+ /* pv related cpuid info */
+ struct {
+ /*
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
+ * leaf.
+ */
+ u32 features;
+
+ /*
+ * indicates whether pv emulation should be disabled if features
+ * are not present in the guest's cpuid
+ */
+ bool enforce;
+ } pv_cpuid;
+
+ /* Protected Guests */
+ bool guest_state_protected;
+ bool guest_tsc_protected;
+
+ /*
+ * Set when PDPTS were loaded directly by the userspace without
+ * reading the guest memory
+ */
+ bool pdptrs_from_userspace;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ hpa_t hv_root_tdp;
+#endif
};
struct kvm_lpage_info {
@@ -787,23 +1098,34 @@ struct kvm_lpage_info {
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
- unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned short *gfn_write_track;
};
/*
- * We use as the mode the number of bits allocated in the LDR for the
- * logical processor ID. It happens that these are all powers of two.
- * This makes it is very easy to detect cases where the APICs are
- * configured for multiple modes; in that case, we cannot use the map and
- * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ * Track the mode of the optimized logical map, as the rules for decoding the
+ * destination vary per mode. Enabling the optimized logical map requires all
+ * software-enabled local APIs to be in the same mode, each addressable APIC to
+ * be mapped to only one MDA, and each MDA to map to at most one APIC.
*/
-#define KVM_APIC_MODE_XAPIC_CLUSTER 4
-#define KVM_APIC_MODE_XAPIC_FLAT 8
-#define KVM_APIC_MODE_X2APIC 16
+enum kvm_apic_logical_mode {
+ /* All local APICs are software disabled. */
+ KVM_APIC_MODE_SW_DISABLED,
+ /* All software enabled local APICs in xAPIC cluster addressing mode. */
+ KVM_APIC_MODE_XAPIC_CLUSTER,
+ /* All software enabled local APICs in xAPIC flat addressing mode. */
+ KVM_APIC_MODE_XAPIC_FLAT,
+ /* All software enabled local APICs in x2APIC mode. */
+ KVM_APIC_MODE_X2APIC,
+ /*
+ * Optimized map disabled, e.g. not all local APICs in the same logical
+ * mode, same logical ID assigned to multiple APICs, etc.
+ */
+ KVM_APIC_MODE_MAP_DISABLED,
+};
struct kvm_apic_map {
struct rcu_head rcu;
- u8 mode;
+ enum kvm_apic_logical_mode logical_mode;
u32 max_apic_id;
union {
struct kvm_lapic *xapic_flat_map[8];
@@ -812,28 +1134,91 @@ struct kvm_apic_map {
struct kvm_lapic *phys_map[];
};
+/* Hyper-V synthetic debugger (SynDbg)*/
+struct kvm_hv_syndbg {
+ struct {
+ u64 control;
+ u64 status;
+ u64 send_page;
+ u64 recv_page;
+ u64 pending_page;
+ } control;
+ u64 options;
+};
+
+/* Current state of Hyper-V TSC page clocksource */
+enum hv_tsc_page_status {
+ /* TSC page was not set up or disabled */
+ HV_TSC_PAGE_UNSET = 0,
+ /* TSC page MSR was written by the guest, update pending */
+ HV_TSC_PAGE_GUEST_CHANGED,
+ /* TSC page update was triggered from the host side */
+ HV_TSC_PAGE_HOST_CHANGED,
+ /* TSC page was properly set up and is currently active */
+ HV_TSC_PAGE_SET,
+ /* TSC page was set up with an inaccessible GPA */
+ HV_TSC_PAGE_BROKEN,
+};
+
+#ifdef CONFIG_KVM_HYPERV
/* Hyper-V emulation context */
struct kvm_hv {
struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
+ enum hv_tsc_page_status hv_tsc_page_status;
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
- HV_REFERENCE_TSC_PAGE tsc_ref;
+ struct ms_hyperv_tsc_page tsc_ref;
struct idr conn_to_evt;
u64 hv_reenlightenment_control;
u64 hv_tsc_emulation_control;
u64 hv_tsc_emulation_status;
+ u64 hv_invtsc_control;
/* How many vCPUs have VP index != vCPU index */
atomic_t num_mismatched_vp_indexes;
+
+ /*
+ * How many SynICs use 'AutoEOI' feature
+ * (protected by arch.apicv_update_lock)
+ */
+ unsigned int synic_auto_eoi_used;
+
+ struct kvm_hv_syndbg hv_syndbg;
+
+ bool xsaves_xsavec_checked;
};
+#endif
+
+struct msr_bitmap_range {
+ u32 flags;
+ u32 nmsrs;
+ u32 base;
+ unsigned long *bitmap;
+};
+
+#ifdef CONFIG_KVM_XEN
+/* Xen emulation context */
+struct kvm_xen {
+ struct mutex xen_lock;
+ u32 xen_version;
+ bool long_mode;
+ bool runstate_update_flag;
+ u8 upcall_vector;
+ struct gfn_to_pfn_cache shinfo_cache;
+ struct idr evtchn_ports;
+ unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+
+ struct kvm_xen_hvm_config hvm_config;
+};
+#endif
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
@@ -841,72 +1226,241 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
-struct kvm_arch {
- unsigned int n_used_mmu_pages;
- unsigned int n_requested_mmu_pages;
- unsigned int n_max_mmu_pages;
- unsigned int indirect_shadow_pages;
- unsigned long mmu_valid_gen;
- struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+struct kvm_x86_msr_filter {
+ u8 count;
+ bool default_allow:1;
+ struct msr_bitmap_range ranges[16];
+};
+
+struct kvm_x86_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 nr_includes;
+ __u32 nr_excludes;
+ __u64 *includes;
+ __u64 *excludes;
+ __u64 events[];
+};
+
+enum kvm_apicv_inhibit {
+
+ /********************************************************************/
+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+ /********************************************************************/
+
+ /*
+ * APIC acceleration is disabled by a module parameter
+ * and/or not supported in hardware.
+ */
+ APICV_INHIBIT_REASON_DISABLED,
+
+ /*
+ * APIC acceleration is inhibited because AutoEOI feature is
+ * being used by a HyperV guest.
+ */
+ APICV_INHIBIT_REASON_HYPERV,
+
+ /*
+ * APIC acceleration is inhibited because the userspace didn't yet
+ * enable the kernel/split irqchip.
+ */
+ APICV_INHIBIT_REASON_ABSENT,
+
+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+ * (out of band, debug measure of blocking all interrupts on this vCPU)
+ * was enabled, to avoid AVIC/APICv bypassing it.
+ */
+ APICV_INHIBIT_REASON_BLOCKIRQ,
+
+ /*
+ * APICv is disabled because not all vCPUs have a 1:1 mapping between
+ * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
+ */
+ APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
+
/*
- * Hash table of struct kvm_mmu_page.
+ * For simplicity, the APIC acceleration is inhibited
+ * first time either APIC ID or APIC base are changed by the guest
+ * from their reset values.
*/
+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+ /******************************************************/
+ /* INHIBITs that are relevant only to the AMD's AVIC. */
+ /******************************************************/
+
+ /*
+ * AVIC is inhibited on a vCPU because it runs a nested guest.
+ *
+ * This is needed because unlike APICv, the peers of this vCPU
+ * cannot use the doorbell mechanism to signal interrupts via AVIC when
+ * a vCPU runs nested.
+ */
+ APICV_INHIBIT_REASON_NESTED,
+
+ /*
+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+ * which cannot be injected when the AVIC is enabled, thus AVIC
+ * is inhibited while KVM waits for IRQ window.
+ */
+ APICV_INHIBIT_REASON_IRQWIN,
+
+ /*
+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+ * which AVIC doesn't support for edge triggered interrupts.
+ */
+ APICV_INHIBIT_REASON_PIT_REINJ,
+
+ /*
+ * AVIC is disabled because SEV doesn't support it.
+ */
+ APICV_INHIBIT_REASON_SEV,
+
+ /*
+ * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
+ * mapping between logical ID and vCPU.
+ */
+ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+
+ /*
+ * AVIC is disabled because the vCPU's APIC ID is beyond the max
+ * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable.
+ */
+ APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG,
+
+ NR_APICV_INHIBIT_REASONS,
+};
+
+#define __APICV_INHIBIT_REASON(reason) \
+ { BIT(APICV_INHIBIT_REASON_##reason), #reason }
+
+#define APICV_INHIBIT_REASONS \
+ __APICV_INHIBIT_REASON(DISABLED), \
+ __APICV_INHIBIT_REASON(HYPERV), \
+ __APICV_INHIBIT_REASON(ABSENT), \
+ __APICV_INHIBIT_REASON(BLOCKIRQ), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
+ __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
+ __APICV_INHIBIT_REASON(NESTED), \
+ __APICV_INHIBIT_REASON(IRQWIN), \
+ __APICV_INHIBIT_REASON(PIT_REINJ), \
+ __APICV_INHIBIT_REASON(SEV), \
+ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
+
+struct kvm_possible_nx_huge_pages {
+ /*
+ * A list of kvm_mmu_page structs that, if zapped, could possibly be
+ * replaced by an NX huge page. A shadow page is on this list if its
+ * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+ * and there are no other conditions that prevent a huge page, e.g.
+ * the backing host page is huge, dirtly logging is not enabled for its
+ * memslot, etc... Note, zapping shadow pages on this list doesn't
+ * guarantee an NX huge page will be created in its stead, e.g. if the
+ * guest attempts to execute from the region then KVM obviously can't
+ * create an NX huge page (without hanging the guest).
+ */
+ struct list_head pages;
+ u64 nr_pages;
+};
+
+enum kvm_mmu_type {
+ KVM_SHADOW_MMU,
+#ifdef CONFIG_X86_64
+ KVM_TDP_MMU,
+#endif
+ KVM_NR_MMU_TYPES,
+};
+
+struct kvm_arch {
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
+ unsigned int indirect_shadow_pages;
+ u8 mmu_valid_gen;
+ u8 vm_type;
+ bool has_private_mem;
+ bool has_protected_state;
+ bool has_protected_eoi;
+ bool pre_fault_allowed;
+ struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
- struct list_head zapped_obsolete_pages;
- struct kvm_page_track_notifier_node mmu_sp_tracker;
+ struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
struct kvm_page_track_notifier_head track_notifier_head;
+#endif
+ /*
+ * Protects marking pages unsync during page faults, as TDP MMU page
+ * faults only take mmu_lock for read. For simplicity, the unsync
+ * pages lock is always taken when marking pages unsync regardless of
+ * whether mmu_lock is held for read or write.
+ */
+ spinlock_t mmu_unsync_pages_lock;
+
+ u64 shadow_mmio_value;
- struct list_head assigned_dev_head;
- struct iommu_domain *iommu_domain;
- bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
-#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
- atomic_t assigned_device_count;
+ unsigned long nr_possible_bypass_irqs;
+
+#ifdef CONFIG_KVM_IOAPIC
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
+#endif
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
- struct kvm_apic_map *apic_map;
+ struct kvm_apic_map __rcu *apic_map;
+ atomic_t apic_map_dirty;
- bool apic_access_page_done;
+ bool apic_access_memslot_enabled;
+ bool apic_access_memslot_inhibited;
+
+ /* Protects apicv_inhibit_reasons */
+ struct rw_semaphore apicv_update_lock;
+ unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
- bool mwait_in_guest;
- bool hlt_in_guest;
- bool pause_in_guest;
+ u64 disabled_exits;
- unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
+
+ /*
+ * This also protects nr_vcpus_matched_tsc which is read from a
+ * preemption-disabled region, so it must be a raw spinlock.
+ */
raw_spinlock_t tsc_write_lock;
u64 last_tsc_nsec;
u64 last_tsc_write;
u32 last_tsc_khz;
+ u64 last_tsc_offset;
u64 cur_tsc_nsec;
u64 cur_tsc_write;
u64 cur_tsc_offset;
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
- spinlock_t pvclock_gtod_sync_lock;
+ u32 default_tsc_khz;
+ bool user_set_tsc;
+ u64 apic_bus_cycle_ns;
+
+ seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
- struct delayed_work kvmclock_update_work;
- struct delayed_work kvmclock_sync_work;
-
- struct kvm_xen_hvm_config xen_hvm_config;
-
- /* reads protected by irq_srcu, writes by irq_lock */
- struct hlist_head mask_notifier_list;
+#ifdef CONFIG_KVM_HYPERV
struct kvm_hv hyperv;
+#endif
- #ifdef CONFIG_KVM_MMU_AUDIT
- int audit_point;
- #endif
+#ifdef CONFIG_KVM_XEN
+ struct kvm_xen xen;
+#endif
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock;
@@ -922,26 +1476,175 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+ bool has_mapped_host_mmio;
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+
+ bool triple_fault_event;
+
+ bool bus_lock_detection_enabled;
+ bool enable_pmu;
+
+ u32 notify_window;
+ u32 notify_vmexit_flags;
+ /*
+ * If exit_on_emulation_error is set, and the in-kernel instruction
+ * emulator fails to emulate an instruction, allow userspace
+ * the opportunity to look at it.
+ */
+ bool exit_on_emulation_error;
+
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+ u32 user_space_msr_mask;
+ struct kvm_x86_msr_filter __rcu *msr_filter;
+
+ u32 hypercall_exit_enabled;
+
+ /* Guest can access the SGX PROVISIONKEY. */
+ bool sgx_provisioning_allowed;
+
+ struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
+ struct vhost_task *nx_huge_page_recovery_thread;
+ u64 nx_huge_page_last;
+ struct once nx_once;
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_KVM_PROVE_MMU
+ /*
+ * The number of TDP MMU pages across all roots. Used only to sanity
+ * check that KVM isn't leaking TDP MMU pages.
+ */
+ atomic64_t tdp_mmu_pages;
+#endif
+
+ /*
+ * List of struct kvm_mmu_pages being used as roots.
+ * All struct kvm_mmu_pages in the list should have
+ * tdp_mmu_page set.
+ *
+ * For reads, this list is protected by:
+ * RCU alone or
+ * the MMU lock in read mode + RCU or
+ * the MMU lock in write mode
+ *
+ * For writes, this list is protected by tdp_mmu_pages_lock; see
+ * below for the details.
+ *
+ * Roots will remain in the list until their tdp_mmu_root_count
+ * drops to zero, at which point the thread that decremented the
+ * count to zero should removed the root from the list and clean
+ * it up, freeing the root after an RCU grace period.
+ */
+ struct list_head tdp_mmu_roots;
+
+ /*
+ * Protects accesses to the following fields when the MMU lock
+ * is held in read mode:
+ * - tdp_mmu_roots (above)
+ * - the link field of kvm_mmu_page structs used by the TDP MMU
+ * - possible_nx_huge_pages[KVM_TDP_MMU];
+ * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
+ * by the TDP MMU
+ * Because the lock is only taken within the MMU lock, strictly
+ * speaking it is redundant to acquire this lock when the thread
+ * holds the MMU lock in write mode. However it often simplifies
+ * the code to do so.
+ */
+ spinlock_t tdp_mmu_pages_lock;
+#endif /* CONFIG_X86_64 */
+
+ /*
+ * If set, at least one shadow root has been allocated. This flag
+ * is used as one input when determining whether certain memslot
+ * related allocations are necessary.
+ */
+ bool shadow_root_allocated;
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+ /*
+ * If set, the VM has (or had) an external write tracking user, and
+ * thus all write tracking metadata has been allocated, even if KVM
+ * itself isn't using write tracking.
+ */
+ bool external_write_tracking_enabled;
+#endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ hpa_t hv_root_tdp;
+ spinlock_t hv_root_tdp_lock;
+ struct hv_partition_assist_pg *hv_pa_pg;
+#endif
+ /*
+ * VM-scope maximum vCPU ID. Used to determine the size of structures
+ * that increase along with the maximum vCPU ID, in which case, using
+ * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
+ */
+ u32 max_vcpu_ids;
+
+ bool disable_nx_huge_pages;
+
+ /*
+ * Memory caches used to allocate shadow pages when performing eager
+ * page splitting. No need for a shadowed_info_cache since eager page
+ * splitting only allocates direct shadow pages.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_shadow_page_cache;
+ struct kvm_mmu_memory_cache split_page_header_cache;
+
+ /*
+ * Memory cache used to allocate pte_list_desc structs while splitting
+ * huge pages. In the worst case, to split one huge page, 512
+ * pte_list_desc structs are needed to add each lower level leaf sptep
+ * to the rmap plus 1 to extend the parent_ptes rmap of the lower level
+ * page table.
+ *
+ * Protected by kvm->slots_lock.
+ */
+#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
+ struct kvm_mmu_memory_cache split_desc_cache;
+
+ gfn_t gfn_direct_bits;
+
+ /*
+ * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero
+ * value indicates CPU dirty logging is unsupported or disabled in
+ * current VM.
+ */
+ int cpu_dirty_log_size;
};
struct kvm_vm_stat {
- ulong mmu_shadow_zapped;
- ulong mmu_pte_write;
- ulong mmu_pte_updated;
- ulong mmu_pde_zapped;
- ulong mmu_flooded;
- ulong mmu_recycled;
- ulong mmu_cache_miss;
- ulong mmu_unsync;
- ulong remote_tlb_flush;
- ulong lpages;
- ulong max_mmu_page_hash_collisions;
+ struct kvm_vm_stat_generic generic;
+ u64 mmu_shadow_zapped;
+ u64 mmu_pte_write;
+ u64 mmu_pde_zapped;
+ u64 mmu_flooded;
+ u64 mmu_recycled;
+ u64 mmu_cache_miss;
+ u64 mmu_unsync;
+ union {
+ struct {
+ atomic64_t pages_4k;
+ atomic64_t pages_2m;
+ atomic64_t pages_1g;
+ };
+ atomic64_t pages[KVM_NR_PAGE_SIZES];
+ };
+ u64 nx_lpage_splits;
+ u64 max_mmu_page_hash_collisions;
+ u64 max_mmu_rmap_size;
};
struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
+ u64 pf_taken;
u64 pf_fixed;
+ u64 pf_emulate;
+ u64 pf_spurious;
+ u64 pf_fast;
+ u64 pf_mmio_spte_created;
u64 pf_guest;
u64 tlb_flush;
u64 invlpg;
@@ -954,10 +1657,6 @@ struct kvm_vcpu_stat {
u64 nmi_window_exits;
u64 l1d_flush;
u64 halt_exits;
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
@@ -968,6 +1667,13 @@ struct kvm_vcpu_stat {
u64 irq_injections;
u64 nmi_injections;
u64 req_event;
+ u64 nested_run;
+ u64 directed_yield_attempted;
+ u64 directed_yield_successful;
+ u64 preemption_reported;
+ u64 preemption_other;
+ u64 guest_mode;
+ u64 notify_window_exits;
};
struct x86_instruction_info;
@@ -989,65 +1695,86 @@ struct kvm_lapic_irq {
bool msi_redir_hint;
};
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
+{
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
+}
+
+enum kvm_x86_run_flags {
+ KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0),
+ KVM_RUN_LOAD_GUEST_DR6 = BIT(1),
+ KVM_RUN_LOAD_DEBUGCTL = BIT(2),
+};
+
struct kvm_x86_ops {
- int (*cpu_has_kvm_support)(void); /* __init */
- int (*disabled_by_bios)(void); /* __init */
- int (*hardware_enable)(void);
- void (*hardware_disable)(void);
- void (*check_processor_compatibility)(void *rtn);
- int (*hardware_setup)(void); /* __init */
- void (*hardware_unsetup)(void); /* __exit */
- bool (*cpu_has_accelerated_tpr)(void);
- bool (*has_emulated_msr)(int index);
- void (*cpuid_update)(struct kvm_vcpu *vcpu);
-
- struct kvm *(*vm_alloc)(void);
- void (*vm_free)(struct kvm *);
+ const char *name;
+
+ int (*check_processor_compatibility)(void);
+
+ int (*enable_virtualization_cpu)(void);
+ void (*disable_virtualization_cpu)(void);
+ cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
+
+ void (*hardware_unsetup)(void);
+ bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
+ void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
+
+ unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
+ void (*vm_pre_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+ int (*vcpu_precreate)(struct kvm *kvm);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
- void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
+ void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
+ /*
+ * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
+ * match the host's value even while the guest is active.
+ */
+ const u64 HOST_OWNED_DEBUGCTL;
+
+ void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
int (*get_cpl)(struct kvm_vcpu *vcpu);
+ int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
- void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
- void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
+ bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+ void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+ void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
- u64 (*get_dr6)(struct kvm_vcpu *vcpu);
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
-
- void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
- int (*tlb_remote_flush)(struct kvm *kvm);
- int (*tlb_remote_flush_with_range)(struct kvm *kvm,
- struct kvm_tlb_range *range);
+ bool (*get_if_flag)(struct kvm_vcpu *vcpu);
+
+ void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
+ void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
+#if IS_ENABLED(CONFIG_HYPERV)
+ int (*flush_remote_tlbs)(struct kvm *kvm);
+ int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
+ gfn_t nr_pages);
+#endif
/*
* Flush any TLB entries associated with the given GVA.
@@ -1055,145 +1782,194 @@ struct kvm_x86_ops {
* Can potentially get non-canonical addresses through INVLPGs, which
* the implementation may choose to ignore if appropriate.
*/
- void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
+ void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr);
- void (*run)(struct kvm_vcpu *vcpu);
- int (*handle_exit)(struct kvm_vcpu *vcpu);
- void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ /*
+ * Flush any TLB entries created by the guest. Like tlb_flush_gva(),
+ * does not need to flush GPA->HPA mappings.
+ */
+ void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
+
+ int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
+ enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+ u64 run_flags);
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath);
+ int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
- void (*set_irq)(struct kvm_vcpu *vcpu);
- void (*set_nmi)(struct kvm_vcpu *vcpu);
- void (*queue_exception)(struct kvm_vcpu *vcpu);
+ void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
+ void (*inject_nmi)(struct kvm_vcpu *vcpu);
+ void (*inject_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
- int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
- int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+ int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+ int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
+ /* Whether or not a virtual NMI is pending in hardware. */
+ bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
+ /*
+ * Attempt to pend a virtual NMI in hardware. Returns %true on success
+ * to allow using static_call_ret0 as the fallback.
+ */
+ bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+
+ const bool x2apic_icr_is_split;
+ const unsigned long required_apicv_inhibits;
+ bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
- void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
- bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
- void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
+ void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
- int (*get_tdp_level)(struct kvm_vcpu *vcpu);
- u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- int (*get_lpage_level)(void);
- bool (*rdtscp_supported)(void);
- bool (*invpcid_supported)(void);
+ u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+ int root_level);
+
+ /* Update external mapping with page table link. */
+ int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
+ /* Update the external page table from spte getting set. */
+ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ u64 mirror_spte);
- void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+ /* Update external page tables for page table about to be freed. */
+ int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
- void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
+ /* Update external page table from spte getting removed, and flush TLB. */
+ void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ u64 mirror_spte);
bool (*has_wbinvd_exit)(void);
- u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
- /* Returns actual tsc_offset set in active VMCS */
- u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
+ u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+ void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+ /*
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
+ * be used only from within tracepoints or error paths.
+ */
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2,
+ u32 *intr_info, u32 *error_code);
+
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
- enum x86_intercept_stage stage);
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
- bool (*mpx_supported)(void);
- bool (*xsaves_supported)(void);
- bool (*umip_emulated)(void);
- bool (*pt_supported)(void);
-
- int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
- void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
- void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- /*
- * Arch-specific dirty logging hooks. These hooks are only supposed to
- * be valid if the specific arch has hardware-accelerated dirty logging
- * mechanism. Currently only for PML on VMX.
- *
- * - slot_enable_log_dirty:
- * called when enabling log dirty mode for the slot.
- * - slot_disable_log_dirty:
- * called when disabling log dirty mode for the slot.
- * also called when slot is created with log dirty disabled.
- * - flush_log_dirty:
- * called before reporting dirty_bitmap to userspace.
- * - enable_log_dirty_pt_masked:
- * called when reenabling log dirty for the GFNs in the mask after
- * corresponding bits are cleared in slot->dirty_bitmap.
- */
- void (*slot_enable_log_dirty)(struct kvm *kvm,
- struct kvm_memory_slot *slot);
- void (*slot_disable_log_dirty)(struct kvm *kvm,
- struct kvm_memory_slot *slot);
- void (*flush_log_dirty)(struct kvm *kvm);
- void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t offset, unsigned long mask);
- int (*write_log_dirty)(struct kvm_vcpu *vcpu);
-
- /* pmu operations of sub-arch */
- const struct kvm_pmu_ops *pmu_ops;
+ void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
- /*
- * Architecture specific hooks for vCPU blocking due to
- * HLT instruction.
- * Returns for .pre_block():
- * - 0 means continue to block the vCPU.
- * - 1 means we cannot block the vCPU since some event
- * happens during this period, such as, 'ON' bit in
- * posted-interrupts descriptor is set.
- */
- int (*pre_block)(struct kvm_vcpu *vcpu);
- void (*post_block)(struct kvm_vcpu *vcpu);
+ const struct kvm_x86_nested_ops *nested_ops;
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
- int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set);
+ int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
+ unsigned int host_irq, uint32_t guest_irq,
+ struct kvm_vcpu *vcpu, u32 vector);
+ void (*pi_start_bypass)(struct kvm *kvm);
+ void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+ bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
+ bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu);
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
void (*setup_mce)(struct kvm_vcpu *vcpu);
- int (*get_nested_state)(struct kvm_vcpu *vcpu,
- struct kvm_nested_state __user *user_kvm_nested_state,
- unsigned user_data_size);
- int (*set_nested_state)(struct kvm_vcpu *vcpu,
- struct kvm_nested_state __user *user_kvm_nested_state,
- struct kvm_nested_state *kvm_state);
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_SMM
+ int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
+ void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif
+
+ int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
+ int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
+ int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
+ int (*vcpu_mem_enc_unlocked_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
+ int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
+
+ int (*get_feature_msr)(u32 msr, u64 *data);
+
+ int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
+
+ bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+ int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
+
+ void (*migrate_timers)(struct kvm_vcpu *vcpu);
+ void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
+ int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+ void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+ /*
+ * Returns vCPU specific APICv inhibit reasons
+ */
+ unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
- int (*smi_allowed)(struct kvm_vcpu *vcpu);
- int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
- int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+ gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+ void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+ int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+ void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
+ int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
+};
- int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
- int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
- int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
+ bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code);
+ int (*check_events)(struct kvm_vcpu *vcpu);
+ bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection);
+ void (*triple_fault)(struct kvm_vcpu *vcpu);
+ int (*get_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ unsigned user_data_size);
+ int (*set_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state);
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
+
+ int (*enable_evmcs)(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
+ uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+ void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
+};
- int (*get_msr_feature)(struct kvm_msr_entry *entry);
+struct kvm_x86_init_ops {
+ int (*hardware_setup)(void);
+ unsigned int (*handle_intel_pt_intr)(void);
- int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
- uint16_t *vmcs_version);
- uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
+ struct kvm_x86_ops *runtime_ops;
+ struct kvm_pmu_ops *pmu_ops;
};
struct kvm_arch_async_pf {
@@ -1201,160 +1977,264 @@ struct kvm_arch_async_pf {
gfn_t gfn;
unsigned long cr3;
bool direct_map;
+ u64 error_code;
};
-extern struct kvm_x86_ops *kvm_x86_ops;
-extern struct kmem_cache *x86_fpu_cache;
+extern u32 __read_mostly kvm_nr_uret_msrs;
+extern bool __read_mostly allow_smaller_maxphyaddr;
+extern bool __read_mostly enable_apicv;
+extern bool __read_mostly enable_ipiv;
+extern bool __read_mostly enable_device_posted_irqs;
+extern struct kvm_x86_ops kvm_x86_ops;
+
+#define kvm_x86_call(func) static_call(kvm_x86_##func)
+#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
+
+#define KVM_X86_OP(func) \
+ DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
+#define KVM_X86_OP_OPTIONAL KVM_X86_OP
+#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
+#include <asm/kvm-x86-ops.h>
+
+int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops);
+void kvm_x86_vendor_exit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return kvm_x86_ops->vm_alloc();
+ return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT);
}
-static inline void kvm_arch_free_vm(struct kvm *kvm)
-{
- return kvm_x86_ops->vm_free(kvm);
-}
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#if IS_ENABLED(CONFIG_HYPERV)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
- if (kvm_x86_ops->tlb_remote_flush &&
- !kvm_x86_ops->tlb_remote_flush(kvm))
+ if (kvm_x86_ops.flush_remote_tlbs &&
+ !kvm_x86_call(flush_remote_tlbs)(kvm))
return 0;
else
return -ENOTSUPP;
}
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
+ u64 nr_pages)
+{
+ if (!kvm_x86_ops.flush_remote_tlbs_range)
+ return -EOPNOTSUPP;
+
+ return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+}
+#endif /* CONFIG_HYPERV */
+
+enum kvm_intr_type {
+ /* Values are arbitrary, but must be non-zero. */
+ KVM_HANDLING_IRQ = 1,
+ KVM_HANDLING_NMI,
+};
+
+/* Enable perf NMI and timer modes to work, and minimise false positives. */
+#define kvm_arch_pmi_in_guest(vcpu) \
+ ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
+ (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
+
+void __init kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
-void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- u64 acc_track_mask, u64 me_mask);
+void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+
+void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
+ const struct kvm_memory_slot *memslot,
+ int start_level);
+void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ int target_level);
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ u64 start, u64 end,
+ int target_level);
+void kvm_mmu_recover_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask);
-void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
-unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
-
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
-bool pdptrs_changed(struct kvm_vcpu *vcpu);
+ const struct kvm_memory_slot *memslot);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
-struct kvm_irq_mask_notifier {
- void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
- int irq;
- struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
- struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
- struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
- bool mask);
-
extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
-/* control of guest tsc rate supported? */
-extern bool kvm_has_tsc_control;
-/* maximum supported tsc_khz for guests */
-extern u32 kvm_max_guest_tsc_khz;
-/* number of bits of the fractional part of the TSC scaling ratio */
-extern u8 kvm_tsc_scaling_ratio_frac_bits;
-/* maximum allowed value of TSC scaling ratio */
-extern u64 kvm_max_tsc_scaling_ratio;
-/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
-extern u64 kvm_default_tsc_scaling_ratio;
-
-extern u64 kvm_mce_cap_supported;
-
-enum emulation_result {
- EMULATE_DONE, /* no further processing */
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
- EMULATE_FAIL, /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ * userspace I/O) to indicate that the emulation context
+ * should be reused as is, i.e. skip initialization of
+ * emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ * Indicates that only select instructions (tagged with
+ * EmulateOnUD) should be emulated (to minimize the emulator
+ * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ * decode the instruction length. For use *only* by
+ * kvm_x86_ops.skip_emulated_instruction() implementations if
+ * EMULTYPE_COMPLETE_USER_EXIT is not set.
+ *
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
+ * retry native execution under certain conditions,
+ * Can only be set in conjunction with EMULTYPE_PF.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ * triggered by KVM's magic "force emulation" prefix,
+ * which is opt in via module param (off by default).
+ * Bypasses EmulateOnUD restriction despite emulating
+ * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ * Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ * backdoor emulation, which is opt in via module param.
+ * VMware backdoor emulation handles select instructions
+ * and reinjects the #GP for all other cases.
+ *
+ * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
+ * the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ * state and inject single-step #DBs after skipping
+ * an instruction (after completing userspace I/O).
+ *
+ * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
+ * is attempting to write a gfn that contains one or
+ * more of the PTEs used to translate the write itself,
+ * and the owning page table is being shadowed by KVM.
+ * If emulation of the faulting instruction fails and
+ * this flag is set, KVM will exit to userspace instead
+ * of retrying emulation as KVM cannot make forward
+ * progress.
+ *
+ * If emulation fails for a write to guest page tables,
+ * KVM unprotects (zaps) the shadow page for the target
+ * gfn and resumes the guest to retry the non-emulatable
+ * instruction (on hardware). Unprotecting the gfn
+ * doesn't allow forward progress for a self-changing
+ * access because doing so also zaps the translation for
+ * the gfn, i.e. retrying the instruction will hit a
+ * !PRESENT fault, which results in a new shadow page
+ * and sends KVM back to square one.
+ *
+ * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
+ * an instruction if it could generate a given software
+ * interrupt, which must be encoded via
+ * EMULTYPE_SET_SOFT_INT_VECTOR().
+ */
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_ALLOW_RETRY (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
-#define EMULTYPE_VMWARE (1 << 5)
+#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
+#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
+#define EMULTYPE_VMWARE_GP (1 << 5)
+#define EMULTYPE_PF (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
+#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+#define EMULTYPE_SKIP_SOFT_INT (1 << 9)
+
+#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16)
+#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff)
+
+static inline bool kvm_can_emulate_event_vectoring(int emul_type)
+{
+ return !(emul_type & EMULTYPE_PF);
+}
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
+ u64 *data, u8 ndata);
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
+
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+void kvm_prepare_unexpected_reason_exit(struct kvm_vcpu *vcpu, u64 exit_reason);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-
-struct x86_emulate_ctxt;
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
+int kvm_emulate_invd(struct kvm_vcpu *vcpu);
+int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
+int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
+int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
-void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
-int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
-bool kvm_rdpmc(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error_code, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
-int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- gfn_t gfn, void *data, int offset, int len,
- u32 access);
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
@@ -1370,59 +2250,68 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state);
}
-#define KVM_MMU_ROOT_CURRENT BIT(0)
-#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
-#define KVM_MMU_ROOTS_ALL (~0UL)
+void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
-int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
-void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
-void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ bool always_retry);
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
-int kvm_mmu_load(struct kvm_vcpu *vcpu);
-void kvm_mmu_unload(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
+ gpa_t cr2_or_gpa)
+{
+ return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
+}
+
+void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
ulong roots_to_free);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
+void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
-gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
- struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+bool kvm_apicv_activated(struct kvm *kvm);
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
+void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason, bool set);
+void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason, bool set);
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
+static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason)
+{
+ kvm_set_or_clear_apicv_inhibit(kvm, reason, true);
+}
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
+static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason)
+{
+ kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
+}
+
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ u64 addr, unsigned long roots);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
-void kvm_enable_tdp(void);
-void kvm_disable_tdp(void);
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+ int tdp_max_root_level, int tdp_huge_page_level);
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception)
-{
- return gpa;
-}
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
-{
- struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
+#endif
- return (struct kvm_mmu_page *)page_private(page);
-}
+#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
static inline u16 kvm_read_ldt(void)
{
@@ -1441,16 +2330,11 @@ static inline unsigned long read_msr(unsigned long msr)
{
u64 value;
- rdmsrl(msr, value);
+ rdmsrq(msr, value);
return value;
}
#endif
-static inline u32 get_rdx_init_val(void)
-{
- return 0x600; /* P6 family */
-}
-
static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
{
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -1470,113 +2354,90 @@ enum {
TASK_SWITCH_GATE = 3,
};
-#define HF_GIF_MASK (1 << 0)
-#define HF_HIF_MASK (1 << 1)
-#define HF_VINTR_MASK (1 << 2)
-#define HF_NMI_MASK (1 << 3)
-#define HF_IRET_MASK (1 << 4)
-#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
-#define HF_SMM_MASK (1 << 6)
-#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
+#define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
+#ifdef CONFIG_KVM_SMM
+#define HF_SMM_MASK (1 << 1)
+#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+# define KVM_MAX_NR_ADDRESS_SPACES 2
+/* SMM is currently unsupported for guests with private memory. */
+# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
- */
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
- "666: " insn "\n\t" \
- "668: \n\t" \
- ".pushsection .fixup, \"ax\" \n" \
- "667: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
- "jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
- "jmp kvm_spurious_fault \n\t" \
- ".popsection \n\t" \
- _ASM_EXTABLE(666b, 667b)
-
-#define __kvm_handle_fault_on_reboot(insn) \
- ____kvm_handle_fault_on_reboot(insn, "")
-
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-u64 kvm_get_arch_capabilities(void);
-void kvm_define_shared_msr(unsigned index, u32 msr);
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
+u64 kvm_get_user_return_msr(unsigned int slot);
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+ return kvm_find_user_return_msr(msr) >= 0;
+}
+
+u64 kvm_scale_tsc(u64 tsc, u64 ratio);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
+u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
+u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
-void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap);
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
-
-int kvm_is_in_guest(void);
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+ u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu);
-
-void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
- struct kvm_lapic_irq *irq);
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+{
+ /* We can only post Fixed and LowPrio IRQs */
+ return (irq->delivery_mode == APIC_DM_FIXED ||
+ irq->delivery_mode == APIC_DM_LOWEST);
+}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_blocking)
- kvm_x86_ops->vcpu_blocking(vcpu);
+ kvm_x86_call(vcpu_blocking)(vcpu);
}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_unblocking)
- kvm_x86_ops->vcpu_unblocking(vcpu);
+ kvm_x86_call(vcpu_unblocking)(vcpu);
}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-
static inline int kvm_cpu_get_apicid(int mps_cpu)
{
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1587,7 +2448,37 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}
-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
+int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
+
+#define KVM_CLOCK_VALID_FLAGS \
+ (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+#define KVM_X86_VALID_QUIRKS \
+ (KVM_X86_QUIRK_LINT0_REENABLED | \
+ KVM_X86_QUIRK_CD_NW_CLEARED | \
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \
+ KVM_X86_QUIRK_OUT_7E_INC_RIP | \
+ KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
+ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
+ KVM_X86_QUIRK_SLOT_ZAP_ALL | \
+ KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \
+ KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+
+#define KVM_X86_CONDITIONAL_QUIRKS \
+ (KVM_X86_QUIRK_CD_NW_CLEARED | \
+ KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+
+/*
+ * KVM previously used a u32 field in kvm_run to indicate the hypercall was
+ * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the
+ * remaining 31 lower bits must be 0 to preserve ABI.
+ */
+#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
+
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+ return enable_device_posted_irqs;
+}
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 172f9749dbb2..3d040741044b 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -2,11 +2,9 @@
#ifndef _ASM_X86_KVM_PAGE_TRACK_H
#define _ASM_X86_KVM_PAGE_TRACK_H
-enum kvm_page_track_mode {
- KVM_PAGE_TRACK_WRITE,
- KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
/*
* The notifier represented by @kvm_page_track_notifier_node is linked into
* the head which will be notified when guest is triggering the track event.
@@ -26,50 +24,39 @@ struct kvm_page_track_notifier_node {
* It is called when guest is writing the write-tracked page
* and write emulation is finished at that time.
*
- * @vcpu: the vcpu where the write access happened.
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
* @node: this node
*/
- void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes, struct kvm_page_track_notifier_node *node);
+ void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node);
+
/*
- * It is called when memory slot is being moved or removed
- * users can drop write-protection for the pages in that memory slot
+ * Invoked when a memory region is removed from the guest. Or in KVM
+ * terms, when a memslot is deleted.
*
- * @kvm: the kvm where memory slot being moved or removed
- * @slot: the memory slot being moved or removed
- * @node: this node
+ * @gfn: base gfn of the region being removed
+ * @nr_pages: number of pages in the to-be-removed region
+ * @node: this node
*/
- void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node);
+ void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+ struct kvm_page_track_notifier_node *node);
};
-void kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
- unsigned long npages);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
-void kvm_slot_page_track_add_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
- enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..4a47c16e2df8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,9 +4,10 @@
#include <asm/processor.h>
#include <asm/alternative.h>
+#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
-extern void kvmclock_init(void);
+#include <asm/tdx.h>
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
@@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
#endif /* CONFIG_KVM_GUEST */
#define KVM_HYPERCALL \
- ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+ ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
@@ -33,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
static inline long kvm_hypercall0(unsigned int nr)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr)
@@ -43,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1)
@@ -54,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2)
@@ -65,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -77,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p4)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
@@ -84,15 +105,37 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ long ret;
+
+ asm volatile("vmmcall"
+ : "=a"(ret)
+ : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+ : "memory");
+ return ret;
+}
+
#ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
-void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
-void kvm_async_pf_task_wake(u32 token);
-u32 kvm_read_and_reset_pf_reason(void);
-extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void kvm_async_pf_task_wait_schedule(u32 token);
+u32 kvm_read_and_reset_apf_flags(void);
+bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+
+DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+
+static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+{
+ if (static_branch_unlikely(&kvm_async_pf_enabled))
+ return __kvm_handle_async_pf(regs, token);
+ else
+ return false;
+}
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
@@ -103,8 +146,7 @@ static inline void kvm_spinlock_init(void)
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
#else /* CONFIG_KVM_GUEST */
-#define kvm_async_pf_task_wait(T, I) do {} while(0)
-#define kvm_async_pf_task_wake(T) do {} while(0)
+#define kvm_async_pf_task_wait_schedule(T) do {} while(0)
static inline bool kvm_para_available(void)
{
@@ -121,14 +163,14 @@ static inline unsigned int kvm_arch_para_hints(void)
return 0;
}
-static inline u32 kvm_read_and_reset_pf_reason(void)
+static inline u32 kvm_read_and_reset_apf_flags(void)
{
return 0;
}
-static inline void kvm_disable_steal_time(void)
+static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
- return;
+ return false;
}
#endif
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
new file mode 100644
index 000000000000..d7c704ed1be9
--- /dev/null
+++ b/arch/x86/include/asm/kvm_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_TYPES_H
+#define _ASM_X86_KVM_TYPES_H
+
+#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-amd,kvm-intel
+#elif IS_MODULE(CONFIG_KVM_AMD)
+#define KVM_SUB_MODULES kvm-amd
+#elif IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-intel
+#else
+#undef KVM_SUB_MODULES
+/*
+ * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff
+ * at least one vendor module is enabled.
+ */
+#define EXPORT_SYMBOL_FOR_KVM(symbol)
+#endif
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h
new file mode 100644
index 000000000000..1af2cb59233b
--- /dev/null
+++ b/arch/x86/include/asm/kvm_vcpu_regs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_VCPU_REGS_H
+#define _ASM_X86_KVM_VCPU_REGS_H
+
+#define __VCPU_REGS_RAX 0
+#define __VCPU_REGS_RCX 1
+#define __VCPU_REGS_RDX 2
+#define __VCPU_REGS_RBX 3
+#define __VCPU_REGS_RSP 4
+#define __VCPU_REGS_RBP 5
+#define __VCPU_REGS_RSI 6
+#define __VCPU_REGS_RDI 7
+
+#ifdef CONFIG_X86_64
+#define __VCPU_REGS_R8 8
+#define __VCPU_REGS_R9 9
+#define __VCPU_REGS_R10 10
+#define __VCPU_REGS_R11 11
+#define __VCPU_REGS_R12 12
+#define __VCPU_REGS_R13 13
+#define __VCPU_REGS_R14 14
+#define __VCPU_REGS_R15 15
+#endif
+
+#endif /* _ASM_X86_KVM_VCPU_REGS_H */
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index eceea9299097..f163176d6f7f 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -2,6 +2,18 @@
#ifndef _ASM_X86_KVM_CLOCK_H
#define _ASM_X86_KVM_CLOCK_H
-extern struct clocksource kvm_clock;
+#include <linux/percpu.h>
+
+DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+
+static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+{
+ return &this_cpu_read(hv_clock_per_cpu)->pvti;
+}
+
+static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
+{
+ return this_cpu_read(hv_clock_per_cpu);
+}
#endif /* _ASM_X86_KVM_CLOCK_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d9fb7f..9d38ae744a2e 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -3,26 +3,153 @@
#define _ASM_X86_LINKAGE_H
#include <linux/stringify.h>
+#include <asm/ibt.h>
#undef notrace
#define notrace __attribute__((no_instrument_function))
+#ifdef CONFIG_64BIT
+/*
+ * The generic version tends to create spurious ENDBR instructions under
+ * certain conditions.
+ */
+#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; })
+#endif
+
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
#endif /* CONFIG_X86_32 */
-#ifdef __ASSEMBLY__
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#define __ALIGN_STR __stringify(__ALIGN)
-#define GLOBAL(name) \
- .globl name; \
- name:
+#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#else
+#define FUNCTION_PADDING
+#endif
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
-#define __ALIGN .p2align 4, 0x90
-#define __ALIGN_STR __stringify(__ALIGN)
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING
+#else
+# define __FUNC_ALIGN __ALIGN
+#endif
+
+#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN)
+#define SYM_F_ALIGN __FUNC_ALIGN
+
+#ifdef __ASSEMBLER__
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET jmp __x86_return_thunk
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
+#define RET ret; int3
+#else
+#define RET ret
#endif
+#endif /* CONFIG_MITIGATION_RETPOLINE */
+
+#else /* __ASSEMBLER__ */
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET "jmp __x86_return_thunk\n\t"
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
+#define ASM_RET "ret; int3\n\t"
+#else
+#define ASM_RET "ret\n\t"
+#endif
+#endif /* CONFIG_MITIGATION_RETPOLINE */
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
+ * CFI symbol layout changes.
+ *
+ * Without CALL_THUNKS:
+ *
+ * .align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * .skip FUNCTION_PADDING, 0x90
+ * .byte 0xb8
+ * .long __kcfi_typeid_##name
+ * name:
+ *
+ * With CALL_THUNKS:
+ *
+ * .align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * .byte 0xb8
+ * .long __kcfi_typeid_##name
+ * .skip FUNCTION_PADDING, 0x90
+ * name:
+ *
+ * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
+ */
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_CALL_PADDING
+#define CFI_PRE_PADDING
+#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#else
+#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#define CFI_POST_PADDING
+#endif
+
+#define __CFI_TYPE(name) \
+ SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \
+ CFI_PRE_PADDING \
+ .byte 0xb8 ASM_NL \
+ .long __kcfi_typeid_##name ASM_NL \
+ CFI_POST_PADDING \
+ SYM_FUNC_END(__cfi_##name)
+
+/* UML needs to be able to override memcpy() and friends for KASAN. */
+#ifdef CONFIG_UML
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK
+#else
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS
+#endif
+
+/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
+ ENDBR
+
+/* SYM_FUNC_START -- use for global functions */
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
+#define SYM_FUNC_START_NOALIGN(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
+#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)
+
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#define SYM_FUNC_START_WEAK(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
+#define SYM_FUNC_START_WEAK_NOALIGN(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
+
+/*
+ * Expose 'sym' to the startup code in arch/x86/boot/startup/, by emitting an
+ * alias prefixed with __pi_
+ */
+#ifdef __ASSEMBLER__
+#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_L_GLOBAL)
+#else
+#define SYM_PIC_ALIAS(sym) extern typeof(sym) __PASTE(__pi_, sym) __alias(sym)
+#endif
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
deleted file mode 100644
index ed80003ce3e2..000000000000
--- a/arch/x86/include/asm/livepatch.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * livepatch.h - x86-specific Kernel Live Patching Core
- *
- * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
- * Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_X86_LIVEPATCH_H
-#define _ASM_X86_LIVEPATCH_H
-
-#include <asm/setup.h>
-#include <linux/ftrace.h>
-
-static inline int klp_check_compiler_support(void)
-{
-#ifndef CC_USING_FENTRY
- return 1;
-#endif
- return 0;
-}
-
-static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
-{
- regs->ip = ip;
-}
-
-#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 349a47acaa4a..59aa966dc212 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -120,34 +120,54 @@ static inline long local_sub_return(long i, local_t *l)
#define local_inc_return(l) (local_add_return(1, l))
#define local_dec_return(l) (local_sub_return(1, l))
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+static inline long local_cmpxchg(local_t *l, long old, long new)
+{
+ return cmpxchg_local(&l->a.counter, old, new);
+}
+
+static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+ return try_cmpxchg_local(&l->a.counter,
+ (typeof(l->a.counter) *) old, new);
+}
+
+/*
+ * Implement local_xchg using CMPXCHG instruction without the LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+static __always_inline long
+local_xchg(local_t *l, long n)
+{
+ long c = local_read(l);
+
+ do { } while (!local_try_cmpxchg(l, &c, n));
+
+ return c;
+}
/**
- * local_add_unless - add unless the number is a given value
+ * local_add_unless - add unless the number is already a given value
* @l: pointer of type local_t
* @a: the amount to add to l...
* @u: ...unless l is equal to u.
*
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
+ * Atomically adds @a to @l, if @v was not already @u.
+ * Returns true if the addition was done.
*/
-#define local_add_unless(l, a, u) \
-({ \
- long c, old; \
- c = local_read((l)); \
- for (;;) { \
- if (unlikely(c == (u))) \
- break; \
- old = local_cmpxchg((l), c, c + (a)); \
- if (likely(old == c)) \
- break; \
- c = old; \
- } \
- c != (u); \
-})
+static __always_inline bool
+local_add_unless(local_t *l, long a, long u)
+{
+ long c = local_read(l);
+
+ do {
+ if (unlikely(c == u))
+ return false;
+ } while (!local_try_cmpxchg(l, &c, c + a));
+
+ return true;
+}
+
#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
/* On x86_32, these are no better than the atomic variants.
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/x86/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 97198001e567..6115bb3d5795 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,7 +95,7 @@ static inline unsigned char current_lock_cmos_reg(void)
unsigned char rtc_cmos_read(unsigned char addr);
void rtc_cmos_write(unsigned char val, unsigned char addr);
-extern int mach_set_rtc_mmss(const struct timespec64 *now);
+extern int mach_set_cmos_time(const struct timespec64 *now);
extern void mach_get_cmos_time(struct timespec64 *now);
#define RTC_IRQ 8
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..2d98886de09a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -13,6 +13,7 @@
#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */
#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define MCG_SEAM_NR BIT_ULL(12) /* MCG_STATUS_SEAM_NR supported */
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
@@ -25,6 +26,7 @@
#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */
#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */
+#define MCG_STATUS_SEAM_NR BIT_ULL(12) /* Machine check inside SEAM non-root mode */
/* MCG_EXT_CTL register defines */
#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */
@@ -42,12 +44,15 @@
#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
+#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff)
/* AMD-specific bits */
#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
+#define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
+#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -57,6 +62,8 @@
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
+#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
+#define MCI_CONFIG_PADDRV BIT_ULL(11)
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF
@@ -86,6 +93,9 @@
#define MCI_MISC_ADDR_MEM 3 /* memory address */
#define MCI_MISC_ADDR_GENERIC 7 /* generic */
+/* MCi_ADDR register defines */
+#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0)
+
/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN BIT_ULL(30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
@@ -101,7 +111,7 @@
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-#define MCE_LOG_LEN 32
+#define MCE_LOG_MIN_LEN 32U
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
@@ -115,6 +125,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -125,6 +138,40 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
+
+#define XEC(x, mask) (((x) >> 16) & mask)
+
+/* mce.kflags flag bits for logging etc. */
+#define MCE_HANDLED_CEC BIT_ULL(0)
+#define MCE_HANDLED_UC BIT_ULL(1)
+#define MCE_HANDLED_EXTLOG BIT_ULL(2)
+#define MCE_HANDLED_NFIT BIT_ULL(3)
+#define MCE_HANDLED_EDAC BIT_ULL(4)
+#define MCE_HANDLED_MCELOG BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
+#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
+
+/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+
+/*
+ * Indicates that handler should check and clear Deferred error registers
+ * rather than common ones.
+ */
+#define MCE_CHECK_DFR_REGS BIT_ULL(8)
/*
* This structure contains all data related to the MCE log. Also
@@ -134,23 +181,52 @@
*/
struct mce_log_buffer {
char signature[12]; /* "MACHINECHECK" */
- unsigned len; /* = MCE_LOG_LEN */
+ unsigned len; /* = elements in .mce_entry[] */
unsigned next;
unsigned flags;
unsigned recordlen; /* length of struct mce */
- struct mce entry[MCE_LOG_LEN];
+ struct mce entry[];
};
+/* Highest last */
enum mce_notifier_prios {
- MCE_PRIO_FIRST = INT_MAX,
- MCE_PRIO_SRAO = INT_MAX - 1,
- MCE_PRIO_EXTLOG = INT_MAX - 2,
- MCE_PRIO_NFIT = INT_MAX - 3,
- MCE_PRIO_EDAC = INT_MAX - 4,
- MCE_PRIO_MCELOG = 1,
- MCE_PRIO_LOWEST = 0,
+ MCE_PRIO_LOWEST,
+ MCE_PRIO_MCELOG,
+ MCE_PRIO_EDAC,
+ MCE_PRIO_NFIT,
+ MCE_PRIO_EXTLOG,
+ MCE_PRIO_UC,
+ MCE_PRIO_EARLY,
+ MCE_PRIO_CEC,
+ MCE_PRIO_HIGHEST = MCE_PRIO_CEC
+};
+
+/**
+ * struct mce_hw_err - Hardware Error Record.
+ * @m: Machine Check record.
+ * @vendor: Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce. Instead, vendors
+ * should export their vendor-specific data through their structure in the
+ * vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for supplemental error
+ * information. Thus, current offsets of existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
+ */
+struct mce_hw_err {
+ struct mce m;
+
+ union vendor_info {
+ struct {
+ u64 synd1; /* MCA_SYND1 MSR */
+ u64 synd2; /* MCA_SYND2 MSR */
+ } amd;
+ } vendor;
};
+#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
+
struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -160,38 +236,39 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
+struct cper_ia_proc_ctx;
+
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
+void mca_bsp_init(struct cpuinfo_x86 *c);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
-void mcheck_vendor_init_severity(void);
+int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+ u64 lapic_id);
#else
static inline int mcheck_init(void) { return 0; }
+static inline void mca_bsp_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
-static inline void mcheck_vendor_init_severity(void) {}
-#endif
-
-#ifdef CONFIG_X86_ANCIENT_MCE
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
-#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void enable_p5_mce(void) {}
+static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+ u64 lapic_id) { return -EINVAL; }
#endif
-void mce_setup(struct mce *m);
-void mce_log(struct mce *m);
+void mce_prep_record(struct mce_hw_err *err);
+void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);
-/*
- * Maximum banks number.
- * This is the limit of the current register layout on
- * Intel CPUs.
- */
-#define MAX_NR_BANKS 32
+/* Maximum number of MCA banks per CPU. */
+#define MAX_NR_BANKS 64
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -209,20 +286,10 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
#endif
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
-#endif
-
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
-int mce_available(struct cpuinfo_x86 *c);
+bool mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
-int mce_usable_address(struct mce *m);
+bool mce_usable_address(struct mce *m);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
@@ -233,24 +300,26 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
- MCP_DONTLOG = BIT(2), /* only clear, don't log */
+ MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
};
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-int mce_notify_irq(void);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
DECLARE_PER_CPU(struct mce, injectm);
/* Disable CMCI/polling for MCA bank claimed by firmware */
extern void mce_disable_bank(int bank);
+#ifdef CONFIG_X86_MCE_THRESHOLD
+void mce_save_apei_thr_limit(u32 thr_limit);
+#else
+static inline void mce_save_apei_thr_limit(u32 thr_limit) { }
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
/*
* Exception handler
*/
-
-/* Call the installed machine check handler for this CPU setup. */
-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
-void do_machine_check(struct pt_regs *, long);
+void do_machine_check(struct pt_regs *pt_regs);
/*
* Threshold handler
@@ -261,28 +330,6 @@ extern void (*mce_threshold_vector)(void);
extern void (*deferred_error_int_vector)(void);
/*
- * Thermal handler
- */
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
-
-/* Interrupt Handler for core thermal thresholds */
-extern int (*platform_thermal_notify)(__u64 msr_val);
-
-/* Interrupt Handler for package thermal thresholds */
-extern int (*platform_thermal_package_notify)(__u64 msr_val);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-extern bool (*platform_thermal_package_rate_control)(void);
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-extern void mcheck_intel_therm_init(void);
-#else
-static inline void mcheck_intel_therm_init(void) { }
-#endif
-
-/*
* Used by APEI to report memory error via /dev/mcelog
*/
@@ -299,6 +346,7 @@ extern void apei_mce_report_mem_error(int corrected,
/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
+ SMCA_LS_V2,
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
@@ -307,43 +355,44 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
+ SMCA_CS_V2,
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
+ SMCA_UMC_V2,
+ SMCA_MA_LLC, /* Memory Attached Last Level Cache */
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
+ SMCA_PSP_V2,
SMCA_SMU, /* System Management Unit */
+ SMCA_SMU_V2,
+ SMCA_MP5, /* Microprocessor 5 Unit */
+ SMCA_MPDMA, /* MPDMA Unit */
+ SMCA_NBIO, /* Northbridge IO Unit */
+ SMCA_PCIE, /* PCI Express Unit */
+ SMCA_PCIE_V2,
+ SMCA_XGMI_PCS, /* xGMI PCS Unit */
+ SMCA_NBIF, /* NBIF Unit */
+ SMCA_SHUB, /* System HUB Unit */
+ SMCA_SATA, /* SATA Unit */
+ SMCA_USB, /* USB Unit */
+ SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
+ SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */
+ SMCA_GMI_PCS, /* GMI PCS Unit */
+ SMCA_XGMI_PHY, /* xGMI PHY Unit */
+ SMCA_WAFL_PHY, /* WAFL PHY Unit */
+ SMCA_GMI_PHY, /* GMI PHY Unit */
N_SMCA_BANK_TYPES
};
-#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
-
-struct smca_hwid {
- unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
- u32 hwid_mcatype; /* (hwid,mcatype) tuple */
- u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
- u8 count; /* Number of instances. */
-};
-
-struct smca_bank {
- struct smca_hwid *hwid;
- u32 id; /* Value of MCA_IPID[InstanceId]. */
- u8 sysfs_id; /* Value used for sysfs name. */
-};
-
-extern struct smca_bank smca_banks[MAX_NR_BANKS];
-
-extern const char *smca_get_long_name(enum smca_bank_types t);
extern bool amd_mce_is_memory_error(struct mce *m);
-extern int mce_threshold_create_device(unsigned int cpu);
-extern int mce_threshold_remove_device(unsigned int cpu);
-
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
#else
-
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
-static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
-
+static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
+unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);
+
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201..000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
- if (addr)
- mcsafe_test_src = (unsigned long) addr;
- else
- mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
- if (addr)
- mcsafe_test_dst = (unsigned long) addr;
- else
- mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
- .pushsection .data
- .align 8
- .globl mcsafe_test_src
- mcsafe_test_src:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_src)
- .globl mcsafe_test_dst
- mcsafe_test_dst:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_dst)
- .popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_src, %r9
- ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_dst, %r9
- ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 616f8e637bc3..ea6494628cb0 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -1,28 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __X86_MEM_ENCRYPT_H__
#define __X86_MEM_ENCRYPT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/init.h>
+#include <linux/cc_platform.h>
+
+#include <asm/asm.h>
+struct boot_params;
-#include <asm/bootparam.h>
+#ifdef CONFIG_X86_MEM_ENCRYPT
+void __init mem_encrypt_init(void);
+void __init mem_encrypt_setup_arch(void);
+#else
+static inline void mem_encrypt_init(void) { }
+static inline void __init mem_encrypt_setup_arch(void) { }
+#endif
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask;
-extern bool sev_enabled;
+extern u64 sev_status;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
@@ -40,24 +47,29 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(struct boot_params *bp);
-void __init sme_enable(struct boot_params *bp);
+void sme_encrypt_kernel(struct boot_params *bp);
+void sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr,
+ unsigned long size, bool enc);
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
void __init mem_encrypt_free_decrypted_mem(void);
-bool sme_active(void);
-bool sev_active(void);
+void __init sev_es_init_vc_handling(void);
-#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+static inline u64 sme_get_me_mask(void)
+{
+ return sme_me_mask;
+}
+
+#define __bss_decrypted __section(".bss..decrypted")
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0ULL
+#define sev_status 0ULL
static inline void __init sme_early_encrypt(resource_size_t paddr,
unsigned long size) { }
@@ -69,21 +81,28 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
-static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sme_encrypt_kernel(struct boot_params *bp) { }
+static inline void sme_enable(struct boot_params *bp) { }
-static inline bool sme_active(void) { return false; }
-static inline bool sev_active(void) { return false; }
+static inline void sev_es_init_vc_handling(void) { }
static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {}
+
+static inline void mem_encrypt_free_decrypted_mem(void) { }
+
+static inline u64 sme_get_me_mask(void) { return 0; }
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
+void add_encrypt_protection_map(void);
+
/*
* The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
* writing to or comparing values from the cr3 register. Having the
@@ -95,6 +114,6 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..113b2fa51849
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_bp_init(void);
+extern void pat_cpu_init(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+ enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+ enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+bool x86_has_pat_wp(void);
+enum page_cache_mode pgprot2cachemode(pgprot_t pgprot);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 2b7cc5397f80..8b41f26f003b 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,18 +2,7 @@
#ifndef _ASM_X86_MICROCODE_H
#define _ASM_X86_MICROCODE_H
-#include <asm/cpu.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-struct ucode_patch {
- struct list_head plist;
- void *data; /* Intel uses only this one */
- u32 patch_id;
- u16 equiv_cpu;
-};
-
-extern struct list_head microcode_cache;
+#include <asm/msr.h>
struct cpu_signature {
unsigned int sig;
@@ -21,125 +10,84 @@ struct cpu_signature {
unsigned int rev;
};
-struct device;
-
-enum ucode_state {
- UCODE_OK = 0,
- UCODE_NEW,
- UCODE_UPDATED,
- UCODE_NFOUND,
- UCODE_ERROR,
+struct ucode_cpu_info {
+ struct cpu_signature cpu_sig;
+ void *mc;
};
-struct microcode_ops {
- enum ucode_state (*request_microcode_user) (int cpu,
- const void __user *buf, size_t size);
-
- enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
- bool refresh_fw);
-
- void (*microcode_fini_cpu) (int cpu);
+#ifdef CONFIG_MICROCODE
+void load_ucode_bsp(void);
+void load_ucode_ap(void);
+void microcode_bsp_resume(void);
+bool __init microcode_loader_disabled(void);
+#else
+static inline void load_ucode_bsp(void) { }
+static inline void load_ucode_ap(void) { }
+static inline void microcode_bsp_resume(void) { }
+static inline bool __init microcode_loader_disabled(void) { return false; }
+#endif
- /*
- * The generic 'microcode_core' part guarantees that
- * the callbacks below run on a target cpu when they
- * are being called.
- * See also the "Synchronization" section in microcode_core.c.
- */
- enum ucode_state (*apply_microcode) (int cpu);
- int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
+extern unsigned long initrd_start_early;
+
+#ifdef CONFIG_CPU_SUP_INTEL
+/* Intel specific microcode defines. Public for IFS */
+struct microcode_header_intel {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int datasize;
+ unsigned int totalsize;
+ unsigned int metasize;
+ unsigned int min_req_ver;
+ unsigned int reserved;
};
-struct ucode_cpu_info {
- struct cpu_signature cpu_sig;
- int valid;
- void *mc;
+struct microcode_intel {
+ struct microcode_header_intel hdr;
+ unsigned int bits[];
};
-extern struct ucode_cpu_info ucode_cpu_info[];
-struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
-#ifdef CONFIG_MICROCODE_INTEL
-extern struct microcode_ops * __init init_intel_microcode(void);
-#else
-static inline struct microcode_ops * __init init_intel_microcode(void)
-{
- return NULL;
-}
-#endif /* CONFIG_MICROCODE_INTEL */
+#define DEFAULT_UCODE_DATASIZE (2000)
+#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
+#define MC_HEADER_TYPE_MICROCODE 1
+#define MC_HEADER_TYPE_IFS 2
-#ifdef CONFIG_MICROCODE_AMD
-extern struct microcode_ops * __init init_amd_microcode(void);
-extern void __exit exit_amd_microcode(void);
-#else
-static inline struct microcode_ops * __init init_amd_microcode(void)
+static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
{
- return NULL;
+ return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
}
-static inline void __exit exit_amd_microcode(void) {}
-#endif
-#define MAX_UCODE_COUNT 128
-
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx) \
- (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_cpuid_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
- *
- * x86_cpuid_vendor() gets vendor information directly from CPUID.
- */
-static inline int x86_cpuid_vendor(void)
+static inline u32 intel_get_microcode_revision(void)
{
- u32 eax = 0x00000000;
- u32 ebx, ecx = 0, edx;
+ u32 rev, dummy;
- native_cpuid(&eax, &ebx, &ecx, &edx);
+ native_wrmsrq(MSR_IA32_UCODE_REV, 0);
- if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
- return X86_VENDOR_INTEL;
+ /* As documented in the SDM: Do a CPUID 1 here */
+ native_cpuid_eax(1);
- if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
- return X86_VENDOR_AMD;
+ /* get the current revision from MSR 0x8B */
+ native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
- return X86_VENDOR_UNKNOWN;
+ return rev;
}
+#endif /* !CONFIG_CPU_SUP_INTEL */
-static inline unsigned int x86_cpuid_family(void)
-{
- u32 eax = 0x00000001;
- u32 ebx, ecx = 0, edx;
-
- native_cpuid(&eax, &ebx, &ecx, &edx);
+bool microcode_nmi_handler(void);
+void microcode_offline_nmi_handler(void);
- return x86_family(eax);
+#ifdef CONFIG_MICROCODE_LATE_LOADING
+DECLARE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
+static __always_inline bool microcode_nmi_handler_enabled(void)
+{
+ return static_branch_unlikely(&microcode_nmi_handler_enable);
}
-
-#ifdef CONFIG_MICROCODE
-int __init microcode_init(void);
-extern void __init load_ucode_bsp(void);
-extern void load_ucode_ap(void);
-void reload_early_microcode(void);
-extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
-extern bool initrd_gone;
#else
-static inline int __init microcode_init(void) { return 0; };
-static inline void __init load_ucode_bsp(void) { }
-static inline void load_ucode_ap(void) { }
-static inline void reload_early_microcode(void) { }
-static inline bool
-get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
+static __always_inline bool microcode_nmi_handler_enabled(void) { return false; }
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
deleted file mode 100644
index 209492849566..000000000000
--- a/arch/x86/include/asm/microcode_amd.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_AMD_H
-#define _ASM_X86_MICROCODE_AMD_H
-
-#include <asm/microcode.h>
-
-#define UCODE_MAGIC 0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
-#define UCODE_UCODE_TYPE 0x00000001
-
-#define SECTION_HDR_SIZE 8
-#define CONTAINER_HDR_SZ 12
-
-struct equiv_cpu_entry {
- u32 installed_cpu;
- u32 fixed_errata_mask;
- u32 fixed_errata_compare;
- u16 equiv_cpu;
- u16 res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
- u32 data_code;
- u32 patch_id;
- u16 mc_patch_data_id;
- u8 mc_patch_data_len;
- u8 init_flag;
- u32 mc_patch_data_checksum;
- u32 nb_dev_id;
- u32 sb_dev_id;
- u16 processor_rev_id;
- u8 nb_rev_id;
- u8 sb_rev_id;
- u8 bios_api_rev;
- u8 reserved1[3];
- u32 match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
- struct microcode_header_amd hdr;
- unsigned int mpb[0];
-};
-
-#define PATCH_MAX_SIZE PAGE_SIZE
-
-#ifdef CONFIG_MICROCODE_AMD
-extern void __init load_ucode_amd_bsp(unsigned int family);
-extern void load_ucode_amd_ap(unsigned int family);
-extern int __init save_microcode_in_initrd_amd(unsigned int family);
-void reload_ucode_amd(void);
-#else
-static inline void __init load_ucode_amd_bsp(unsigned int family) {}
-static inline void load_ucode_amd_ap(unsigned int family) {}
-static inline int __init
-save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-void reload_ucode_amd(void) {}
-#endif
-#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
deleted file mode 100644
index d85a07d7154f..000000000000
--- a/arch/x86/include/asm/microcode_intel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_INTEL_H
-#define _ASM_X86_MICROCODE_INTEL_H
-
-#include <asm/microcode.h>
-
-struct microcode_header_intel {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int datasize;
- unsigned int totalsize;
- unsigned int reserved[3];
-};
-
-struct microcode_intel {
- struct microcode_header_intel hdr;
- unsigned int bits[0];
-};
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
- unsigned int sig;
- unsigned int pf;
- unsigned int cksum;
-};
-
-struct extended_sigtable {
- unsigned int count;
- unsigned int cksum;
- unsigned int reserved[3];
- struct extended_signature sigs[0];
-};
-
-#define DEFAULT_UCODE_DATASIZE (2000)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
-#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
-
-#define get_totalsize(mc) \
- (((struct microcode_intel *)mc)->hdr.datasize ? \
- ((struct microcode_intel *)mc)->hdr.totalsize : \
- DEFAULT_UCODE_TOTALSIZE)
-
-#define get_datasize(mc) \
- (((struct microcode_intel *)mc)->hdr.datasize ? \
- ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-static inline u32 intel_get_microcode_revision(void)
-{
- u32 rev, dummy;
-
- native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
- /* As documented in the SDM: Do a CPUID 1 here */
- native_cpuid_eax(1);
-
- /* get the current revision from MSR 0x8B */
- native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
-
- return rev;
-}
-
-#ifdef CONFIG_MICROCODE_INTEL
-extern void __init load_ucode_intel_bsp(void);
-extern void load_ucode_intel_ap(void);
-extern void show_ucode_info_early(void);
-extern int __init save_microcode_in_initrd_intel(void);
-void reload_ucode_intel(void);
-#else
-static inline __init void load_ucode_intel_bsp(void) {}
-static inline void load_ucode_intel_ap(void) {}
-static inline void show_ucode_info_early(void) {}
-static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
-static inline void reload_ucode_intel(void) {}
-#endif
-
-#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
new file mode 100644
index 000000000000..12b820259b9f
--- /dev/null
+++ b/arch/x86/include/asm/mman.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#define arch_calc_vm_prot_bits(prot, key) ( \
+ ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
+ ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
+ ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
+ ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+#endif
+
+#include <uapi/asm/mman.h>
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c20..0fe9c569d171 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -6,6 +6,18 @@
#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
+#include <linux/bits.h>
+
+/* Uprobes on this MM assume 32-bit code */
+#define MM_CONTEXT_UPROBE_IA32 0
+/* vsyscall page is accessible on this MM */
+#define MM_CONTEXT_HAS_VSYSCALL 1
+/* Do not allow changing LAM mode */
+#define MM_CONTEXT_LOCK_LAM 2
+/* Allow LAM and SVA coexisting */
+#define MM_CONTEXT_FORCE_TAGGED_SVA 3
+/* Tracks mm_cpumask */
+#define MM_CONTEXT_NOTRACK 4
/*
* x86 has arch-specific MMU state beyond what lives in mm_struct.
@@ -27,14 +39,21 @@ typedef struct {
*/
atomic64_t tlb_gen;
+ unsigned long next_trim_cpumask;
+
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct rw_semaphore ldt_usr_sem;
struct ldt_struct *ldt;
#endif
-#ifdef CONFIG_X86_64
- /* True if mm supports a task running in 32 bit compatibility mode. */
- unsigned short ia32_compat;
+ unsigned long flags;
+
+#ifdef CONFIG_ADDRESS_MASKING
+ /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
+ unsigned long lam_cr3_mask;
+
+ /* Significant bits of the virtual address. Excludes tag bits. */
+ u64 untag_mask;
#endif
struct mutex lock;
@@ -45,22 +64,32 @@ typedef struct {
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
/*
* One bit per protection key says whether userspace can
- * use it or not. protected by mmap_sem.
+ * use it or not. protected by mmap_lock.
*/
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
-#ifdef CONFIG_X86_INTEL_MPX
- /* address of the bounds directory */
- void __user *bd_addr;
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+ /*
+ * The global ASID will be a non-zero value when the process has
+ * the same ASID across all CPUs, allowing it to make use of
+ * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+ */
+ u16 global_asid;
+
+ /* The process is transitioning to a new global ASID number. */
+ bool asid_transition;
#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
.context = { \
.ctx_id = 1, \
+ .lock = __MUTEX_INITIALIZER(mm.context.lock), \
}
-void leave_mm(int cpu);
+void leave_mm(void);
+#define leave_mm leave_mm
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 0ca50611e8ce..73bf3b1b44e8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -2,41 +2,24 @@
#ifndef _ASM_X86_MMU_CONTEXT_H
#define _ASM_X86_MMU_CONTEXT_H
-#include <asm/desc.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/pkeys.h>
#include <trace/events/tlb.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
-#include <asm/mpx.h>
+#include <asm/debugreg.h>
+#include <asm/gsseg.h>
+#include <asm/desc.h>
extern atomic64_t last_mm_ctx_id;
-#ifndef CONFIG_PARAVIRT_XXL
-static inline void paravirt_activate_mm(struct mm_struct *prev,
- struct mm_struct *next)
-{
-}
-#endif /* !CONFIG_PARAVIRT_XXL */
-
#ifdef CONFIG_PERF_EVENTS
-
+DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
-
-static inline void load_mm_cr4(struct mm_struct *mm)
-{
- if (static_branch_unlikely(&rdpmc_always_available_key) ||
- atomic_read(&mm->context.perf_rdpmc_allowed))
- cr4_set_bits(X86_CR4_PCE);
- else
- cr4_clear_bits(X86_CR4_PCE);
-}
-#else
-static inline void load_mm_cr4(struct mm_struct *mm) {}
+void cr4_update_pce(void *ignored);
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
@@ -66,14 +49,6 @@ struct ldt_struct {
int slot;
};
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
- return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
/*
* Used for LDT copy/destruction.
*/
@@ -96,88 +71,84 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
+#else
static inline void load_mm_ldt(struct mm_struct *mm)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
+ clear_LDT();
+}
+static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+ DEBUG_LOCKS_WARN_ON(preemptible());
+}
+#endif
+#ifdef CONFIG_ADDRESS_MASKING
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
/*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
+ * When switch_mm_irqs_off() is called for a kthread, it may race with
+ * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two
+ * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it
+ * reads a single value for both.
*/
+ return READ_ONCE(mm->context.lam_cr3_mask);
+}
- if (unlikely(ldt)) {
- if (static_cpu_has(X86_FEATURE_PTI)) {
- if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
- /*
- * Whoops -- either the new LDT isn't mapped
- * (if slot == -1) or is mapped into a bogus
- * slot (if slot > 1).
- */
- clear_LDT();
- return;
- }
-
- /*
- * If page table isolation is enabled, ldt->entries
- * will not be mapped in the userspace pagetables.
- * Tell the CPU to access the LDT through the alias
- * at ldt_slot_va(ldt->slot).
- */
- set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
- } else {
- set_ldt(ldt->entries, ldt->nr_entries);
- }
- } else {
- clear_LDT();
- }
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+ mm->context.untag_mask = oldmm->context.untag_mask;
+}
+
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return mm->context.untag_mask;
+}
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+ mm->context.untag_mask = -1UL;
+}
+
+#define arch_pgtable_dma_compat arch_pgtable_dma_compat
+static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
+{
+ return !mm_lam_cr3_mask(mm) ||
+ test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
+}
#else
- clear_LDT();
-#endif
+
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+ return 0;
}
-static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT if either the old or new mm had an LDT.
- *
- * An mm will never go from having an LDT to not having an LDT. Two
- * mms never share an LDT, so we don't gain anything by checking to
- * see whether the LDT changed. There's also no guarantee that
- * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
- * then prev->context.ldt will also be non-NULL.
- *
- * If we really cared, we could optimize the case where prev == next
- * and we're exiting lazy mode. Most of the time, if this happens,
- * we don't actually need to reload LDTR, but modify_ldt() is mostly
- * used by legacy code and emulators where we don't need this level of
- * performance.
- *
- * This uses | instead of || because it generates better code.
- */
- if (unlikely((unsigned long)prev->context.ldt |
- (unsigned long)next->context.ldt))
- load_mm_ldt(next);
-#endif
+}
- DEBUG_LOCKS_WARN_ON(preemptible());
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
}
+#endif
+
+#define enter_lazy_tlb enter_lazy_tlb
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+#define mm_init_global_asid mm_init_global_asid
+extern void mm_init_global_asid(struct mm_struct *mm);
+extern void mm_free_global_asid(struct mm_struct *mm);
+
+/*
+ * Init a new mm. Used on mm copies, like at fork()
+ * and on mm's that are brand-new, like at execve().
+ */
+#define init_new_context init_new_context
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
@@ -185,6 +156,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
atomic64_set(&mm->context.tlb_gen, 0);
+ mm->context.next_trim_cpumask = jiffies + HZ;
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
@@ -194,12 +166,18 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
+
+ mm_init_global_asid(mm);
+ mm_reset_untag_mask(mm);
init_new_context_ldt(mm);
return 0;
}
+
+#define destroy_context destroy_context
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
+ mm_free_global_asid(mm);
}
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -211,26 +189,42 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
#define activate_mm(prev, next) \
do { \
- paravirt_activate_mm((prev), (next)); \
- switch_mm((prev), (next), NULL); \
+ paravirt_enter_mmap(next); \
+ switch_mm_irqs_off((prev), (next), NULL); \
} while (0);
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
- lazy_load_gs(0); \
+ loadsegment(gs, 0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
do { \
+ shstk_free(tsk); \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
#endif
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
+
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+#endif
+}
+
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
- paravirt_arch_dup_mmap(oldmm, mm);
+ arch_dup_pkeys(oldmm, mm);
+ paravirt_enter_mmap(mm);
+ dup_lam(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}
@@ -244,7 +238,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
- !(mm->context.ia32_compat == TIF_IA32);
+ !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)
@@ -253,34 +247,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
+static inline bool is_notrack_mm(struct mm_struct *mm)
{
- mpx_mm_init(mm);
+ return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
}
-static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+static inline void set_notrack_mm(struct mm_struct *mm)
{
- /*
- * mpx_notify_unmap() goes and reads a rarely-hot
- * cacheline in the mm_struct. That can be expensive
- * enough to be seen in profiles.
- *
- * The mpx_notify_unmap() call and its contents have been
- * observed to affect munmap() performance on hardware
- * where MPX is not present.
- *
- * The unlikely() optimizes for the fast case: no MPX
- * in the CPU, or no MPX use in the process. Even if
- * we get this wrong (in the unlikely event that MPX
- * is widely enabled on some system) the overhead of
- * MPX itself (reading bounds tables) is expected to
- * overwhelm the overhead of getting this unlikely()
- * consistently wrong.
- */
- if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
- mpx_notify_unmap(mm, vma, start, end);
+ set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
}
/*
@@ -292,21 +266,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
* So do not enforce things if the VMA is not from the current
* mm, or if we are in a kernel thread.
*/
-static inline bool vma_is_foreign(struct vm_area_struct *vma)
-{
- if (!current->mm)
- return true;
- /*
- * Should PKRU be enforced on the access to this VMA? If
- * the VMA is from another process, then PKRU has no
- * relevance and should not be enforced.
- */
- if (current->mm != vma->vm_mm)
- return true;
-
- return false;
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
@@ -319,23 +278,11 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
-/*
- * This can be used from process context to figure out what the value of
- * CR3 is without needing to do a (slow) __read_cr3().
- *
- * It's intended to be used for code like KVM that sneakily changes CR3
- * and needs to restore it. It needs to be used very carefully.
- */
-static inline unsigned long __get_current_cr3_fast(void)
-{
- unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
- this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+unsigned long __get_current_cr3_fast(void);
- /* For now, be very restrictive about when this can be called. */
- VM_WARN_ON(in_nmi() || preemptible());
+#include <asm-generic/mmu_context.h>
- VM_BUG_ON(cr3 != __read_cr3());
- return cr3;
-}
+extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm);
+extern void unuse_temporary_mm(struct mm_struct *prev_mm);
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644
index f572d0f944bb..000000000000
--- a/arch/x86/include/asm/mmx.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MMX_H
-#define _ASM_X86_MMX_H
-
-/*
- * MMX 3Dnow! helper operations
- */
-
-#include <linux/types.h>
-
-extern void *_mmx_memcpy(void *to, const void *from, size_t size);
-extern void mmx_clear_page(void *page);
-extern void mmx_copy_page(void *to, void *from);
-
-#endif /* _ASM_X86_MMX_H */
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 73d8dd14dda2..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#ifdef CONFIG_DISCONTIGMEM
-
-/*
- * generic node memory support, the following assumptions apply:
- *
- * 1) memory comes in 64Mb contiguous chunks which are either present or not
- * 2) we will not have more than 64Gb in total
- *
- * for now assume that 64Gb is max amount of RAM for whole system
- * 64Gb / 4096bytes/page = 16777216 pages
- */
-#define MAX_NR_PAGES 16777216
-#define MAX_SECTIONS 1024
-#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS)
-
-extern s8 physnode_map[];
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-#ifdef CONFIG_NUMA
- return((int) physnode_map[(pfn) / PAGES_PER_SECTION]);
-#else
- return 0;
-#endif
-}
-
-static inline int pfn_valid(int pfn)
-{
- int nid = pfn_to_nid(pfn);
-
- if (nid >= 0)
- return (pfn < node_end_pfn(nid));
- return 0;
-}
-
-#define early_pfn_valid(pfn) pfn_valid((pfn))
-
-#endif /* CONFIG_DISCONTIGMEM */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..3c2de4ce3b10 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,70 +5,20 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+struct its_array {
+#ifdef CONFIG_MITIGATION_ITS
+ void **pages;
+ int num;
+#endif
+};
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+ struct its_array its_pages;
};
-#ifdef CONFIG_X86_64
-/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_MATOM
-#define MODULE_PROC_FAMILY "ATOM "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_MELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_X86_32
-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
-#endif
-
#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 606cbaebd336..d593e52e6635 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_MPSPEC_H
#define _ASM_X86_MPSPEC_H
+#include <linux/types.h>
#include <asm/mpspec_def.h>
#include <asm/x86_init.h>
@@ -15,16 +16,14 @@ extern int pic_mode;
* Summit or generic (i.e. installer) kernels need lots of bus entries.
* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
*/
-#if CONFIG_BASE_SMALL == 0
-# define MAX_MP_BUSSES 260
-#else
+#ifdef CONFIG_BASE_SMALL
# define MAX_MP_BUSSES 32
+#else
+# define MAX_MP_BUSSES 260
#endif
#define MAX_IRQ_SOURCES 256
-extern unsigned int def_to_bigsmp;
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
@@ -39,9 +38,8 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-extern unsigned int boot_cpu_physical_apicid;
+extern u32 boot_cpu_physical_apicid;
extern u8 boot_cpu_apic_version;
-extern unsigned long mp_lapic_addr;
#ifdef CONFIG_X86_LOCAL_APIC
extern int smp_found_config;
@@ -49,106 +47,31 @@ extern int smp_found_config;
# define smp_found_config 0
#endif
-static inline void get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(0);
-}
-
-static inline void early_get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(1);
-}
-
-static inline void find_smp_config(void)
-{
- x86_init.mpparse.find_smp_config();
-}
-
#ifdef CONFIG_X86_MPPARSE
extern void e820__memblock_alloc_reserved_mpc_new(void);
extern int enable_update_mptable;
-extern int default_mpc_apic_id(struct mpc_cpu *m);
-extern void default_smp_read_mpc_oem(struct mpc_table *mpc);
-# ifdef CONFIG_X86_IO_APIC
-extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
-# else
-# define default_mpc_oem_bus_info NULL
-# endif
-extern void default_find_smp_config(void);
-extern void default_get_smp_config(unsigned int early);
+extern void mpparse_find_mptable(void);
+extern void mpparse_parse_early_smp_config(void);
+extern void mpparse_parse_smp_config(void);
#else
static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
-#define enable_update_mptable 0
-#define default_mpc_apic_id NULL
-#define default_smp_read_mpc_oem NULL
-#define default_mpc_oem_bus_info NULL
-#define default_find_smp_config x86_init_noop
-#define default_get_smp_config x86_init_uint_noop
+#define enable_update_mptable 0
+#define mpparse_find_mptable x86_init_noop
+#define mpparse_parse_early_smp_config x86_init_noop
+#define mpparse_parse_smp_config x86_init_noop
#endif
-int generic_processor_info(int apicid, int version);
-
-#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
-
-struct physid_mask {
- unsigned long mask[PHYSID_ARRAY_SIZE];
-};
+extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC);
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map) set_bit(physid, (map).mask)
-#define physid_clear(physid, map) clear_bit(physid, (map).mask)
-#define physid_isset(physid, map) test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map) \
- test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2) \
- bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_or(dst, src1, src2) \
- bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_clear(map) \
- bitmap_zero((map).mask, MAX_LOCAL_APIC)
-
-#define physids_complement(dst, src) \
- bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
-
-#define physids_empty(map) \
- bitmap_empty((map).mask, MAX_LOCAL_APIC)
-
-#define physids_equal(map1, map2) \
- bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
-
-#define physids_weight(map) \
- bitmap_weight((map).mask, MAX_LOCAL_APIC)
-
-#define physids_shift_right(d, s, n) \
- bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
-
-#define physids_shift_left(d, s, n) \
- bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
-
-static inline unsigned long physids_coerce(physid_mask_t *map)
-{
- return map->mask[0];
-}
-
-static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+static inline void reset_phys_cpu_present_map(u32 apicid)
{
- physids_clear(*map);
- map->mask[0] = physids;
+ bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+ set_bit(apicid, phys_cpu_present_map);
}
-static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
+static inline void copy_phys_cpu_present_map(unsigned long *dst)
{
- physids_clear(*map);
- physid_set(physid, *map);
+ bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC);
}
-#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
deleted file mode 100644
index d0b1434fb0b6..000000000000
--- a/arch/x86/include/asm/mpx.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MPX_H
-#define _ASM_X86_MPX_H
-
-#include <linux/types.h>
-#include <linux/mm_types.h>
-
-#include <asm/ptrace.h>
-#include <asm/insn.h>
-
-/*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
-#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
-#define MPX_BNDCFG_ENABLE_FLAG 0x1
-#define MPX_BD_ENTRY_VALID_FLAG 0x1
-
-/*
- * The upper 28 bits [47:20] of the virtual address in 64-bit
- * are used to index into bounds directory (BD).
- *
- * The directory is 2G (2^31) in size, and with 8-byte entries
- * it has 2^28 entries.
- */
-#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
-#define MPX_BD_ENTRY_BYTES_64 8
-#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-
-/*
- * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
- * entries it has 2^20 entries.
- */
-#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
-#define MPX_BD_ENTRY_BYTES_32 4
-#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-
-/*
- * A 64-bit table is 4MB total in size, and an entry is
- * 4 64-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
-#define MPX_BT_ENTRY_BYTES_64 32
-#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-
-/*
- * A 32-bit table is 16kB total in size, and an entry is
- * 4 32-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
-#define MPX_BT_ENTRY_BYTES_32 16
-#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
-
-#define MPX_BNDSTA_TAIL 2
-#define MPX_BNDCFG_TAIL 12
-#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BNDSTA_ERROR_CODE 0x3
-
-struct mpx_fault_info {
- void __user *addr;
- void __user *lower;
- void __user *upper;
-};
-
-#ifdef CONFIG_X86_INTEL_MPX
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-int mpx_handle_bd_fault(void);
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
- /*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
-}
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
-
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
- unsigned long flags);
-#else
-static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- return -EINVAL;
-}
-static inline int mpx_handle_bd_fault(void)
-{
- return -EINVAL;
-}
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return 0;
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-}
-static inline void mpx_notify_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
- unsigned long len, unsigned long flags)
-{
- return addr;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..eef4c3a5ba28 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,162 +3,84 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/atomic.h>
#include <linux/nmi.h>
-#include <asm/io.h>
-#include <asm/hyperv-tlfs.h>
+#include <linux/msi.h>
+#include <linux/io.h>
+#include <linux/static_call.h>
#include <asm/nospec-branch.h>
+#include <asm/paravirt.h>
+#include <asm/msr.h>
+#include <hyperv/hvhdk.h>
+#include <asm/fpu/types.h>
-#define VP_INVAL U32_MAX
+/*
+ * Hyper-V always provides a single IO-APIC at this MMIO address.
+ * Ideally, the value should be looked up in ACPI tables, but it
+ * is needed for mapping the IO-APIC early in boot on Confidential
+ * VMs, before ACPI functions can be used.
+ */
+#define HV_IOAPIC_BASE_ADDRESS 0xfec00000
-struct ms_hyperv_info {
- u32 features;
- u32 misc_features;
- u32 hints;
- u32 nested_features;
- u32 max_vp_index;
- u32 max_lp_index;
-};
+#define HV_VTL_NORMAL 0x0
+#define HV_VTL_SECURE 0x1
+#define HV_VTL_MGMT 0x2
-extern struct ms_hyperv_info ms_hyperv;
+union hv_ghcb;
+DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
void *data);
-/*
- * Generate the guest ID.
- */
-
-static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
- __u64 d_info2)
-{
- __u64 guest_id = 0;
-
- guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
- guest_id |= (d_info1 << 48);
- guest_id |= (kernel_version << 16);
- guest_id |= d_info2;
-
- return guest_id;
-}
-
+void hyperv_vector_handler(struct pt_regs *regs);
-/* Free the message slot and signal end-of-message if required */
-static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
+static inline unsigned char hv_get_nmi_reason(void)
{
- /*
- * On crash we're reading some other CPU's message page and we need
- * to be careful: this other CPU may already had cleared the header
- * and the host may already had delivered some other message there.
- * In case we blindly write msg->header.message_type we're going
- * to lose it. We can still lose a message of the same type but
- * we count on the fact that there can only be one
- * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
- * on crash.
- */
- if (cmpxchg(&msg->header.message_type, old_msg_type,
- HVMSG_NONE) != old_msg_type)
- return;
-
- /*
- * Make sure the write to MessageType (ie set to
- * HVMSG_NONE) happens before we read the
- * MessagePending and EOMing. Otherwise, the EOMing
- * will not deliver any more messages since there is
- * no empty slot
- */
- mb();
-
- if (msg->header.message_flags.msg_pending) {
- /*
- * This will cause message queue rescan to
- * possibly deliver another msg from the
- * hypervisor
- */
- wrmsrl(HV_X64_MSR_EOM, 0);
- }
+ return 0;
}
-#define hv_init_timer(timer, tick) \
- wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
-#define hv_init_timer_config(timer, val) \
- wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val)
-
-#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
-#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
+extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
-#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val)
-#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val)
-
-#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val)
-#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val)
-
-#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+#if IS_ENABLED(CONFIG_HYPERV)
+extern void *hv_hypercall_pg;
-#define hv_get_synint_state(int_num, val) \
- rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
-#define hv_set_synint_state(int_num, val) \
- wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
+extern union hv_ghcb * __percpu *hv_ghcb_pg;
-#define hv_get_crash_ctl(val) \
- rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+bool hv_isolation_type_snp(void);
+bool hv_isolation_type_tdx(void);
-void hyperv_callback_vector(void);
-void hyperv_reenlightenment_vector(void);
-#ifdef CONFIG_TRACING
-#define trace_hyperv_callback_vector hyperv_callback_vector
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
#endif
-void hyperv_vector_handler(struct pt_regs *regs);
-void hv_setup_vmbus_irq(void (*handler)(void));
-void hv_remove_vmbus_irq(void);
-
-void hv_setup_kexec_handler(void (*handler)(void));
-void hv_remove_kexec_handler(void);
-void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
-void hv_remove_crash_handler(void);
/*
- * Routines for stimer0 Direct Mode handling.
- * On x86/x64, there are no percpu actions to take.
+ * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
+ * to start AP in enlightened SEV guest.
*/
-void hv_stimer0_vector_handler(struct pt_regs *regs);
-void hv_stimer0_callback_vector(void);
-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
-void hv_remove_stimer0_irq(int irq);
-
-static inline void hv_enable_stimer0_percpu_irq(int irq) {}
-static inline void hv_disable_stimer0_percpu_irq(int irq) {}
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
-extern void *hv_hypercall_pg;
-extern void __percpu **hyperv_pcpu_input_arg;
+#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
+#define HV_AP_SEGMENT_LIMIT 0xffffffff
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
- u64 hv_status;
#ifdef CONFIG_X86_64
- if (!hv_hypercall_pg)
- return U64_MAX;
-
- __asm__ __volatile__("mov %4, %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input_address)
- : "r" (output_address),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "memory", "r8", "r9", "r10", "r11");
+ return static_call_mod(hv_hypercall)(control, input_address, output_address);
#else
u32 input_address_hi = upper_32_bits(input_address);
u32 input_address_lo = lower_32_bits(input_address);
u32 output_address_hi = upper_32_bits(output_address);
u32 output_address_lo = lower_32_bits(output_address);
+ u64 hv_status;
if (!hv_hypercall_pg)
return U64_MAX;
@@ -171,115 +93,69 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
"D"(output_address_hi), "S"(output_address_lo),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
-#endif /* !x86_64 */
return hv_status;
+#endif /* !x86_64 */
}
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-
#ifdef CONFIG_X86_64
- {
- __asm__ __volatile__(CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, 0);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo),
- ASM_CALL_CONSTRAINT
- : "A" (control),
- "b" (input1_hi),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "edi", "esi");
- }
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ ASM_CALL_CONSTRAINT
+ : "A" (control),
+ "b" (input1_hi),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc", "edi", "esi");
+ return hv_status;
#endif
- return hv_status;
}
-/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ return _hv_do_fast_hypercall8(control, input1);
+}
+
+/* Fast hypercall with 16 bytes of input */
+static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
+{
#ifdef CONFIG_X86_64
- {
- __asm__ __volatile__("mov %4, %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : "r" (input2),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, input2);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
- u32 input2_hi = upper_32_bits(input2);
- u32 input2_lo = lower_32_bits(input2);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo), ASM_CALL_CONSTRAINT
- : "A" (control), "b" (input1_hi),
- "D"(input2_hi), "S"(input2_lo),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc");
- }
-#endif
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u32 input2_hi = upper_32_bits(input2);
+ u32 input2_lo = lower_32_bits(input2);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo), ASM_CALL_CONSTRAINT
+ : "A" (control), "b" (input1_hi),
+ "D"(input2_hi), "S"(input2_lo),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc");
return hv_status;
+#endif
}
-/*
- * Rep hypercalls. Callers of this functions are supposed to ensure that
- * rep_count and varhead_size comply with Hyper-V hypercall definition.
- */
-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
- void *input, void *output)
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
{
- u64 control = code;
- u64 status;
- u16 rep_comp;
-
- control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
- control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
-
- do {
- status = hv_do_hypercall(control, input, output);
- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
- return status;
-
- /* Bits 32-43 of status have 'Reps completed' data. */
- rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
- HV_HYPERCALL_REP_COMP_OFFSET;
-
- control &= ~HV_HYPERCALL_REP_START_MASK;
- control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
-
- touch_nmi_watchdog();
- } while (rep_comp < rep_count);
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
- return status;
+ return _hv_do_fast_hypercall16(control, input1, input2);
}
-/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information.
- */
-extern u32 *hv_vp_index;
-extern u32 hv_max_vp_index;
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -290,65 +166,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
return hv_vp_assist_page[cpu];
}
-/**
- * hv_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-static inline int hv_cpu_number_to_vp_number(int cpu_number)
-{
- return hv_vp_index[cpu_number];
-}
-
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
- const struct cpumask *cpus)
-{
- int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (hv_max_vp_index / 64 >= 64)
- return 0;
-
- /*
- * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
- * structs are not cleared between calls, we risk flushing unneeded
- * vCPUs otherwise.
- */
- for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
- vpset->bank_contents[vcpu_bank] = 0;
-
- /*
- * Some banks may end up being empty but this is acceptable.
- */
- for_each_cpu(cpu, cpus) {
- vcpu = hv_cpu_number_to_vp_number(cpu);
- if (vcpu == VP_INVAL)
- return -1;
- vcpu_bank = vcpu / 64;
- vcpu_offset = vcpu % 64;
- __set_bit(vcpu_offset, (unsigned long *)
- &vpset->bank_contents[vcpu_bank]);
- if (vcpu_bank >= nr_bank)
- nr_bank = vcpu_bank + 1;
- }
- vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
- return nr_bank;
-}
-
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-void hyperv_report_panic(struct pt_regs *regs, long err);
-void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
-bool hv_is_hyperv_initialized(void);
-void hyperv_cleanup(void);
-
-void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
@@ -358,6 +177,8 @@ int hyperv_flush_guest_mapping_range(u64 as,
int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
u64 start_gfn, u64 end_gfn);
+void hv_sleep_notifiers_register(void);
+void hv_machine_power_off(void);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -367,10 +188,69 @@ bool hv_vcpu_is_preempted(int vcpu);
static inline void hv_apic_init(void) {}
#endif
+struct irq_domain *hv_create_pci_msi_domain(void);
+
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry);
+int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
+ struct hv_interrupt_entry *entry);
+int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+bool hv_ghcb_negotiate_protocol(void);
+void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
+int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu);
+#else
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
+static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip,
+ unsigned int cpu) { return 0; }
+#endif
+
+#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+void hv_vtom_init(void);
+void hv_ivm_msr_write(u64 msr, u64 value);
+void hv_ivm_msr_read(u64 msr, u64 *value);
+#else
+static inline void hv_vtom_init(void) {}
+static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
+static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
+#endif
+
+static inline bool hv_is_synic_msr(unsigned int reg)
+{
+ return (reg >= HV_X64_MSR_SCONTROL) &&
+ (reg <= HV_X64_MSR_SINT15);
+}
+
+static inline bool hv_is_sint_msr(unsigned int reg)
+{
+ return (reg >= HV_X64_MSR_SINT0) &&
+ (reg <= HV_X64_MSR_SINT15);
+}
+
+u64 hv_get_msr(unsigned int reg);
+void hv_set_msr(unsigned int reg, u64 value);
+u64 hv_get_non_nested_msr(unsigned int reg);
+void hv_set_non_nested_msr(unsigned int reg, u64 value);
+
+static __always_inline u64 hv_raw_get_msr(unsigned int reg)
+{
+ return native_rdmsrq(reg);
+}
+int hv_apicid_to_vp_index(u32 apic_id);
+
+#if IS_ENABLED(CONFIG_MSHV_ROOT) && IS_ENABLED(CONFIG_CRASH_DUMP)
+void hv_root_crash_init(void);
+void hv_crash_asm32(void);
+void hv_crash_asm64(void);
+void hv_crash_asm_end(void);
+#else /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+static inline void hv_root_crash_init(void) {}
+#endif /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
-static inline bool hv_is_hyperv_initialized(void) { return false; }
-static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
@@ -385,75 +265,55 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
{
return -1;
}
+static inline void hv_set_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_msr(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
+static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; }
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- u64 scale, offset;
- u32 sequence;
-
- /*
- * The protocol for reading Hyper-V TSC page is specified in Hypervisor
- * Top-Level Functional Specification ver. 3.0 and above. To get the
- * reference time we must do the following:
- * - READ ReferenceTscSequence
- * A special '0' value indicates the time source is unreliable and we
- * need to use something else. The currently published specification
- * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
- * instead of '0' as the special value, see commit c35b82ef0294.
- * - ReferenceTime =
- * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
- * - READ ReferenceTscSequence again. In case its value has changed
- * since our first reading we need to discard ReferenceTime and repeat
- * the whole sequence as the hypervisor was updating the page in
- * between.
- */
- do {
- sequence = READ_ONCE(tsc_pg->tsc_sequence);
- if (!sequence)
- return U64_MAX;
- /*
- * Make sure we read sequence before we read other values from
- * TSC page.
- */
- smp_rmb();
-
- scale = READ_ONCE(tsc_pg->tsc_scale);
- offset = READ_ONCE(tsc_pg->tsc_offset);
- *cur_tsc = rdtsc_ordered();
-
- /*
- * Make sure we read sequence after we read all other values
- * from TSC page.
- */
- smp_rmb();
-
- } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
- return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
- u64 cur_tsc;
-
- return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
+struct mshv_vtl_cpu_context {
+ union {
+ struct {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 cr2;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ };
+ u64 gp_regs[16];
+ };
+
+ struct fxregs_state fx_state;
+};
+#ifdef CONFIG_HYPERV_VTL_MODE
+void __init hv_vtl_init_platform(void);
+int __init hv_vtl_early_init(void);
+void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
+void mshv_vtl_return_call_init(u64 vtl_return_offset);
+void mshv_vtl_return_hypercall(void);
+void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
#else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- BUG();
- return U64_MAX;
-}
+static inline void __init hv_vtl_init_platform(void) {}
+static inline int __init hv_vtl_early_init(void) { return 0; }
+static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
+static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
+static inline void mshv_vtl_return_hypercall(void) {}
+static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
#endif
+
+#include <asm-generic/mshyperv.h>
+
#endif
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 25ddd0916bb2..935c6d470341 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -9,6 +9,63 @@ typedef struct irq_alloc_info msi_alloc_info_t;
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg);
-void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
+/* Structs and defines for the X86 specific MSI message format */
+
+typedef struct x86_msi_data {
+ union {
+ struct {
+ u32 vector : 8,
+ delivery_mode : 3,
+ dest_mode_logical : 1,
+ reserved : 2,
+ active_low : 1,
+ is_level : 1;
+ };
+ u32 dmar_subhandle;
+ };
+} __attribute__ ((packed)) arch_msi_msg_data_t;
+#define arch_msi_msg_data x86_msi_data
+
+typedef struct x86_msi_addr_lo {
+ union {
+ struct {
+ u32 reserved_0 : 2,
+ dest_mode_logical : 1,
+ redirect_hint : 1,
+ reserved_1 : 1,
+ virt_destid_8_14 : 7,
+ destid_0_7 : 8,
+ base_address : 12;
+ };
+ struct {
+ u32 dmar_reserved_0 : 2,
+ dmar_index_15 : 1,
+ dmar_subhandle_valid : 1,
+ dmar_format : 1,
+ dmar_index_0_14 : 15,
+ dmar_base_address : 12;
+ };
+ };
+} __attribute__ ((packed)) arch_msi_msg_addr_lo_t;
+#define arch_msi_msg_addr_lo x86_msi_addr_lo
+
+#define X86_MSI_BASE_ADDRESS_LOW (0xfee00000 >> 20)
+
+typedef struct x86_msi_addr_hi {
+ u32 reserved : 8,
+ destid_8_31 : 24;
+} __attribute__ ((packed)) arch_msi_msg_addr_hi_t;
+#define arch_msi_msg_addr_hi x86_msi_addr_hi
+
+#define X86_MSI_BASE_ADDRESS_HIGH (0)
+
+struct msi_msg;
+u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid);
+
+#define X86_VECTOR_MSI_FLAGS_SUPPORTED \
+ (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX | MSI_FLAG_PCI_MSIX_ALLOC_DYN)
+
+#define X86_VECTOR_MSI_FLAGS_REQUIRED \
+ (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h
deleted file mode 100644
index ee2f8ccc32d0..000000000000
--- a/arch/x86/include/asm/msidef.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MSIDEF_H
-#define _ASM_X86_MSIDEF_H
-
-/*
- * Constants for Intel APIC based MSI messages.
- */
-
-/*
- * Shifts for MSI data
- */
-
-#define MSI_DATA_VECTOR_SHIFT 0
-#define MSI_DATA_VECTOR_MASK 0x000000ff
-#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \
- MSI_DATA_VECTOR_MASK)
-
-#define MSI_DATA_DELIVERY_MODE_SHIFT 8
-#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
-#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT 14
-#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
-#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT 15
-#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
-#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for msi address
- */
-
-#define MSI_ADDR_BASE_HI 0
-#define MSI_ADDR_BASE_LO 0xfee00000
-
-#define MSI_ADDR_DEST_MODE_SHIFT 2
-#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT)
-#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT 3
-#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
- /* dedicated cpu */
-#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
- /* lowest priority */
-
-#define MSI_ADDR_DEST_ID_SHIFT 12
-#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
-#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
- MSI_ADDR_DEST_ID_MASK)
-#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00)
-
-#define MSI_ADDR_IR_EXT_INT (1 << 4)
-#define MSI_ADDR_IR_SHV (1 << 3)
-#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
-#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
-#endif /* _ASM_X86_MSIDEF_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c2446fd1..3d0a0950d20a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -2,12 +2,9 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
-/*
- * CPU model specific register (MSR) numbers.
- *
- * Do not add new entries to this file unless the definitions are shared
- * between multiple compilation units.
- */
+#include <linux/bits.h>
+
+/* CPU model specific register (MSR) numbers. */
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
@@ -28,6 +25,8 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
+#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
@@ -36,18 +35,59 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
+#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
+
+/*
+ * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
+ * Most MSRs support/allow only a subset of memory types, but the values
+ * themselves are common across all relevant MSRs.
+ */
+#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */
+#define X86_MEMTYPE_WC 1ull /* Write Combining */
+/* RESERVED 2 */
+/* RESERVED 3 */
+#define X86_MEMTYPE_WT 4ull /* Write Through */
+#define X86_MEMTYPE_WP 5ull /* Write Protected */
+#define X86_MEMTYPE_WB 6ull /* Write Back */
+#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */
+
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
+#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
+#define MSR_IA32_FRED_RSP2 0x1ce /* Level 2 stack pointer */
+#define MSR_IA32_FRED_RSP3 0x1cf /* Level 3 stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x1d0 /* Exception stack levels */
+#define MSR_IA32_FRED_SSP0 MSR_IA32_PL0_SSP /* Level 0 shadow stack pointer */
+#define MSR_IA32_FRED_SSP1 0x1d1 /* Level 1 shadow stack pointer */
+#define MSR_IA32_FRED_SSP2 0x1d2 /* Level 2 shadow stack pointer */
+#define MSR_IA32_FRED_SSP3 0x1d3 /* Level 3 shadow stack pointer */
+#define MSR_IA32_FRED_CONFIG 0x1d4 /* Entrypoint and interrupt stack level */
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_TEST_CTRL 0x00000033
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
-#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
-#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
-#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
+
+/* A mask for bits which the kernel toggles when controlling mitigations */
+#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
-#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
@@ -59,6 +99,22 @@
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+#define MSR_IA32_UMWAIT_CONTROL 0xe1
+#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
+#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
+/*
+ * The time field is bit[31:2], but representing a 32bit value with
+ * bit[1:0] zero.
+ */
+#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+
+/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
+#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
@@ -69,24 +125,125 @@
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
-#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
-#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
-#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */
-#define ARCH_CAP_SSB_NO (1 << 4) /*
- * Not susceptible to Speculative Store Bypass
- * attack, so no Speculative Store Bypass
- * control required.
- */
+#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
+#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
+#define ARCH_CAP_SSB_NO BIT(4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
+#define ARCH_CAP_MDS_NO BIT(5) /*
+ * Not susceptible to
+ * Microarchitectural Data
+ * Sampling (MDS) vulnerabilities.
+ */
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
+ * The processor is not susceptible to a
+ * machine check error due to modifying the
+ * code page size along with either the
+ * physical address or cache type
+ * without TLB invalidation.
+ */
+#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO BIT(8) /*
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_MCU_ENUM BIT(16) /*
+ * Indicates the presence of microcode update
+ * feature enumeration and status information.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
+#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
+ * IA32_XAPIC_DISABLE_STATUS MSR
+ * supported
+ */
+#define ARCH_CAP_PBRSB_NO BIT(24) /*
+ * Not susceptible to Post-Barrier
+ * Return Stack Buffer Predictions.
+ */
+#define ARCH_CAP_GDS_CTRL BIT(25) /*
+ * CPU is vulnerable to Gather
+ * Data Sampling (GDS) and
+ * has controls for mitigation.
+ */
+#define ARCH_CAP_GDS_NO BIT(26) /*
+ * CPU is not vulnerable to Gather
+ * Data Sampling (GDS).
+ */
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
+ */
+#define ARCH_CAP_ITS_NO BIT_ULL(62) /*
+ * Not susceptible to
+ * Indirect Target Selection.
+ * This bit is not set by
+ * HW, but is synthesized by
+ * VMMs for guests to know
+ * their affected status.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
-#define L1D_FLUSH (1 << 0) /*
- * Writeback and invalidate the
- * L1 data cache.
- */
+#define L1D_FLUSH BIT(0) /*
+ * Writeback and invalidate the
+ * L1 data cache.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
+#define MSR_IA32_MCU_OPT_CTRL 0x00000123
+#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
+#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
@@ -94,6 +251,7 @@
#define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_ERROR_CONTROL 0x0000017f
#define MSR_IA32_MCG_EXT_CTL 0x000004d0
#define MSR_OFFCORE_RSP_0 0x000001a6
@@ -102,8 +260,25 @@
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+#define MSR_SNOOP_RSP_0 0x00001328
+#define MSR_SNOOP_RSP_1 0x00001329
+
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE 19
+
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SBAF_BIT 8
+#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT)
+#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9)
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -113,13 +288,69 @@
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
#define LBR_INFO_CYCLES 0xffff
+#define LBR_INFO_BR_TYPE_OFFSET 56
+#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET 32
+#define LBR_INFO_BR_CNTR_NUM 4
+#define LBR_INFO_BR_CNTR_BITS 2
+#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
+
+#define MSR_ARCH_LBR_CTL 0x000014ce
+#define ARCH_LBR_CTL_LBREN BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET 1
+#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET 3
+#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET 16
+#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH 0x000014cf
+#define MSR_ARCH_LBR_FROM_0 0x00001500
+#define MSR_ARCH_LBR_TO_0 0x00001600
+#define MSR_ARCH_LBR_INFO_0 0x00001200
#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define PERF_CAP_METRICS_IDX 15
+#define PERF_CAP_PT_IDX 16
+
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+ PERF_CAP_PEBS_TIMING_INFO)
+
+/* Arch PEBS */
+#define MSR_IA32_PEBS_BASE 0x000003f4
+#define MSR_IA32_PEBS_INDEX 0x000003f5
+#define ARCH_PEBS_OFFSET_MASK 0x7fffff
+#define ARCH_PEBS_INDEX_WR_SHIFT 4
+
+#define ARCH_PEBS_RELOAD 0xffffffff
+#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
+#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
+#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
+#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
+#define ARCH_PEBS_LBR_SHIFT 40
+#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
+#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
+#define ARCH_PEBS_GPR BIT_ULL(61)
+#define ARCH_PEBS_AUX BIT_ULL(62)
+#define ARCH_PEBS_EN BIT_ULL(63)
+#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
+ ARCH_PEBS_CNTR_METRICS)
+
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
#define RTIT_CTL_CYCLEACC BIT(1)
@@ -135,6 +366,8 @@
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_EVENT_EN BIT(31)
+#define RTIT_CTL_NOTNT BIT_ULL(55)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
@@ -185,16 +418,27 @@
#define MSR_IA32_CR_PAT 0x00000277
+#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \
+ ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \
+ (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \
+ (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \
+ (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
+
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
+#define MSR_IA32_PASID 0x00000d93
+#define MSR_IA32_PASID_VALID BIT_ULL(31)
+
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define DEBUGCTLMSR_BTINT (1UL << 8)
@@ -204,11 +448,10 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
-#define MSR_IA32_POWER_CTL 0x000001fc
-
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -238,6 +481,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@@ -259,6 +503,10 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
+#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a
+#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b
+
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
@@ -267,6 +515,7 @@
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+#define MSR_SECONDARY_TURBO_RATIO_LIMIT 0x00000650
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
@@ -284,11 +533,29 @@
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
-
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+/* Control-flow Enforcement Technology MSRs */
+#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */
+#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */
+#define CET_SHSTK_EN BIT_ULL(0)
+#define CET_WRSS_EN BIT_ULL(1)
+#define CET_ENDBR_EN BIT_ULL(2)
+#define CET_LEG_IW_EN BIT_ULL(3)
+#define CET_NO_TRACK_EN BIT_ULL(4)
+#define CET_SUPPRESS_DISABLE BIT_ULL(5)
+#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
+#define CET_SUPPRESS BIT_ULL(10)
+#define CET_WAIT_ENDBR BIT_ULL(11)
+
+#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */
+#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */
+#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */
+#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */
+#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */
+
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
@@ -296,7 +563,7 @@
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
-#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
@@ -313,16 +580,16 @@
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
-#define HWP_MIN_PERF(x) (x & 0xff)
-#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
-#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
@@ -358,17 +625,55 @@
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
- complete list. */
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0 0x000014c1
+#define MSR_RELOAD_FIXED_CTR0 0x00001309
+
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
+#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
+#define MSR_IA32_PMC_V6_STEP 4
+/* KeyID partitioning between MKTME and TDX */
+#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
+
+/*
+ * AMD64 MSRs. Not complete. See the architecture manual for a more
+ * complete list.
+ */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD_PERF_CTL 0xc0010062
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD_PPIN_CTL 0xc00102f0
+#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_TW_CFG 0xc0011023
+
+#define MSR_AMD64_DE_CFG 0xc0011029
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
+
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
@@ -386,13 +691,111 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
+#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
+#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
+#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
+
+#define MSR_SVSM_CAA 0xc001f000
+
+/* AMD Collaborative Processor Performance Control MSRs */
+#define MSR_AMD_CPPC_CAP1 0xc00102b0
+#define MSR_AMD_CPPC_ENABLE 0xc00102b1
+#define MSR_AMD_CPPC_CAP2 0xc00102b2
+#define MSR_AMD_CPPC_REQ 0xc00102b3
+#define MSR_AMD_CPPC_STATUS 0xc00102b4
+
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
+
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
+
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
+
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501
+#define MSR_AMD_WORKLOAD_HRST 0xc0000502
+
+/* AMD Last Branch Record MSRs */
+#define MSR_AMD64_LBR_SELECT 0xc000010e
+
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
+/* Fam 19h MSRs */
+#define MSR_F19H_UMC_PERF_CTL 0xc0010800
+#define MSR_F19H_UMC_PERF_CTR 0xc0010801
+
+/* Zen 2 */
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@@ -406,6 +809,8 @@
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
@@ -436,16 +841,20 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
-#define MSR_F10H_DECFG 0xc0011029
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
-#define MSR_K8_SYSCFG 0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG 0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
@@ -468,8 +877,13 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
+#define MSR_K7_HWCR_CPB_DIS_BIT 25
+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
@@ -514,38 +928,55 @@
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
-#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+
+/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */
+#define MSR_IA32_FEAT_CTL 0x0000003a
+#define FEAT_CTL_LOCKED BIT(0)
+#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1)
+#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2)
+#define FEAT_CTL_SGX_LC_ENABLED BIT(17)
+#define FEAT_CTL_SGX_ENABLED BIT(18)
+#define FEAT_CTL_LMCE_ENABLED BIT(20)
+
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+#define MSR_IA32_XFD 0x000001c4
+#define MSR_IA32_XFD_ERR 0x000001c5
#define MSR_IA32_XSS 0x00000da0
-#define FEATURE_CONTROL_LOCKED (1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
-#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
-#define FEATURE_CONTROL_LMCE (1<<20)
-
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define MSR_IA32_TSCDEADLINE 0x000006e0
-
#define MSR_IA32_UCODE_WRITE 0x00000079
+
+#define MSR_IA32_MCU_ENUMERATION 0x0000007b
+#define MCU_STAGING BIT(4)
+
#define MSR_IA32_UCODE_REV 0x0000008b
+/* Intel SGX Launch Enclave Public Key Hash MSRs */
+#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C
+#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D
+#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E
+#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F
+
#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
#define MSR_IA32_SMBASE 0x0000009e
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
-#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
-#define MSR_AMD_PERF_STATUS 0xc0010063
-#define MSR_AMD_PERF_CTL 0xc0010062
+
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
+#define MSR_AMD_SAMP_BR_FROM 0xc0010300
+
+#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
@@ -577,6 +1008,7 @@
#define ENERGY_PERF_BIAS_PERFORMANCE 0
#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_NORMAL_POWERSAVE 7
#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
#define ENERGY_PERF_BIAS_POWERSAVE 15
@@ -584,12 +1016,14 @@
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
@@ -666,6 +1100,16 @@
#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+#define MSR_TSX_FORCE_ABORT 0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
+#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1
+#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT)
+#define MSR_TFA_SDV_ENABLE_RTM_BIT 2
+#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT)
+
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
@@ -770,11 +1214,22 @@
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+#define MSR_PERF_METRICS 0x00000329
+
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
@@ -797,24 +1252,48 @@
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
#define MSR_IA32_VMX_VMFUNC 0x00000491
+#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
+
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
+/* Resctrl MSRs: */
+/* - Intel: */
+#define MSR_IA32_L3_QOS_CFG 0xc81
+#define MSR_IA32_L2_QOS_CFG 0xc82
+#define MSR_IA32_QM_EVTSEL 0xc8d
+#define MSR_IA32_QM_CTR 0xc8e
+#define MSR_IA32_PQR_ASSOC 0xc8f
+#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
+#define MSR_IA32_L2_CBM_BASE 0xd10
+#define MSR_IA32_MBA_THRTL_BASE 0xd50
+
+/* - AMD: */
+#define MSR_IA32_MBA_BW_BASE 0xc0000200
+#define MSR_IA32_SMBA_BW_BASE 0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
+#define MSR_IA32_EVT_CFG_BASE 0xc0000400
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT 32
-#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
-#define VMX_BASIC_64 0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT 50
-#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB 6LLU
-#define VMX_BASIC_INOUT 0x0040000000000000LLU
-
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
-
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
+#define SVM_VM_CR_VALID_MASK 0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
+
+/* x2APIC locked status */
+#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
+#define LEGACY_XAPIC_DISABLED BIT(0) /*
+ * x2APIC mode is locked and
+ * disabling x2APIC will cause
+ * a #GP
+ */
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 91e4cf189914..9c2ea29e12a9 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -4,28 +4,22 @@
#include "msr-index.h"
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
+#include <asm/shared/msr.h>
-struct msr {
- union {
- struct {
- u32 l;
- u32 h;
- };
- u64 q;
- };
-};
+#include <linux/types.h>
+#include <linux/percpu.h>
struct msr_info {
- u32 msr_no;
- struct msr reg;
- struct msr *msrs;
- int err;
+ u32 msr_no;
+ struct msr reg;
+ struct msr __percpu *msrs;
+ int err;
};
struct msr_regs_info {
@@ -44,41 +38,22 @@ struct saved_msrs {
};
/*
- * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
- * constraint has different meanings. For i386, "A" means exactly
- * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
- * it means rax *or* rdx.
- */
-#ifdef CONFIG_X86_64
-/* Using 64-bit values saves one instruction clearing the high half of low */
-#define DECLARE_ARGS(val, low, high) unsigned long low, high
-#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
-#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
-#else
-#define DECLARE_ARGS(val, low, high) unsigned long long val
-#define EAX_EDX_VAL(val, low, high) (val)
-#define EAX_EDX_RET(val, low, high) "=A" (val)
-#endif
-
-#ifdef CONFIG_TRACEPOINTS
-/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>
-extern struct tracepoint __tracepoint_read_msr;
-extern struct tracepoint __tracepoint_write_msr;
-extern struct tracepoint __tracepoint_rdpmc;
-#define msr_tracepoint_active(t) static_key_false(&(t).key)
-extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(read_msr);
+DECLARE_TRACEPOINT(write_msr);
+DECLARE_TRACEPOINT(rdpmc);
+extern void do_trace_write_msr(u32 msr, u64 val, int failed);
+extern void do_trace_read_msr(u32 msr, u64 val, int failed);
+extern void do_trace_rdpmc(u32 msr, u64 val, int failed);
#else
-#define msr_tracepoint_active(t) false
-static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_write_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
#endif
/*
@@ -88,24 +63,24 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
* think of extending them - you will be slapped with a stinking trout or a frozen
* shark will reach you, wherever you are! You've been warned.
*/
-static inline unsigned long long notrace __rdmsr(unsigned int msr)
+static __always_inline u64 __rdmsr(u32 msr)
{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("1: rdmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
-static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
+static __always_inline void __wrmsrq(u32 msr, u64 val)
{
asm volatile("1: wrmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
- : : "c" (msr), "a"(low), "d" (high) : "memory");
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
}
#define native_rdmsr(msr, val1, val2) \
@@ -115,129 +90,81 @@ do { \
(void)((val2) = (u32)(__val >> 32)); \
} while (0)
+static __always_inline u64 native_rdmsrq(u32 msr)
+{
+ return __rdmsr(msr);
+}
+
#define native_wrmsr(msr, low, high) \
- __wrmsr(msr, low, high)
+ __wrmsrq((msr), (u64)(high) << 32 | (low))
-#define native_wrmsrl(msr, val) \
- __wrmsr((msr), (u32)((u64)(val)), \
- (u32)((u64)(val) >> 32))
+#define native_wrmsrq(msr, val) \
+ __wrmsrq((msr), (val))
-static inline unsigned long long native_read_msr(unsigned int msr)
+static inline u64 native_read_msr(u32 msr)
{
- unsigned long long val;
+ u64 val;
val = __rdmsr(msr);
- if (msr_tracepoint_active(__tracepoint_read_msr))
+ if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, val, 0);
return val;
}
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
- int *err)
+static inline int native_read_msr_safe(u32 msr, u64 *p)
{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("2: rdmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err]\n\t"
- "xorl %%eax, %%eax\n\t"
- "xorl %%edx, %%edx\n\t"
- "jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
- : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
- : "c" (msr), [fault] "i" (-EIO));
- if (msr_tracepoint_active(__tracepoint_read_msr))
- do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
- return EAX_EDX_VAL(val, low, high);
+ int err;
+ EAX_EDX_DECLARE_ARGS(val, low, high);
+
+ asm volatile("1: rdmsr ; xor %[err],%[err]\n"
+ "2:\n\t"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
+ : [err] "=r" (err), EAX_EDX_RET(val, low, high)
+ : "c" (msr));
+ if (tracepoint_enabled(read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), err);
+
+ *p = EAX_EDX_VAL(val, low, high);
+
+ return err;
}
/* Can be uninlined because referenced by paravirt */
-static inline void notrace
-native_write_msr(unsigned int msr, u32 low, u32 high)
+static inline void notrace native_write_msr(u32 msr, u64 val)
{
- __wrmsr(msr, low, high);
+ native_wrmsrq(msr, val);
- if (msr_tracepoint_active(__tracepoint_write_msr))
- do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
+ if (tracepoint_enabled(write_msr))
+ do_trace_write_msr(msr, val, 0);
}
/* Can be uninlined because referenced by paravirt */
-static inline int notrace
-native_write_msr_safe(unsigned int msr, u32 low, u32 high)
+static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
int err;
- asm volatile("2: wrmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
+ asm volatile("1: wrmsr ; xor %[err],%[err]\n"
+ "2:\n\t"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
: [err] "=a" (err)
- : "c" (msr), "0" (low), "d" (high),
- [fault] "i" (-EIO)
+ : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
: "memory");
- if (msr_tracepoint_active(__tracepoint_write_msr))
- do_trace_write_msr(msr, ((u64)high << 32 | low), err);
+ if (tracepoint_enabled(write_msr))
+ do_trace_write_msr(msr, val, err);
return err;
}
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
-/**
- * rdtsc() - returns the current TSC without ordering constraints
- *
- * rdtsc() returns the result of RDTSC as a 64-bit integer. The
- * only ordering constraint it supplies is the ordering implied by
- * "asm volatile": it will put the RDTSC in the place you expect. The
- * CPU can and will speculatively execute that RDTSC, though, so the
- * results can be non-monotonic if compared on different CPUs.
- */
-static __always_inline unsigned long long rdtsc(void)
+static inline u64 native_read_pmc(int counter)
{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-
- return EAX_EDX_VAL(val, low, high);
-}
-
-/**
- * rdtsc_ordered() - read the current TSC in program order
- *
- * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
- * It is ordered like a load to a global in-memory counter. It should
- * be impossible to observe non-monotonic rdtsc_unordered() behavior
- * across multiple CPUs as long as the TSC is synced.
- */
-static __always_inline unsigned long long rdtsc_ordered(void)
-{
- /*
- * The RDTSC instruction is not ordered relative to memory
- * access. The Intel SDM and the AMD APM are both vague on this
- * point, but empirically an RDTSC instruction can be
- * speculatively executed before prior loads. An RDTSC
- * immediately after an appropriate barrier appears to be
- * ordered as a normal load, that is, it provides the same
- * ordering guarantees as reading from a global memory location
- * that some other imaginary CPU is updating continuously with a
- * time stamp.
- */
- barrier_nospec();
- return rdtsc();
-}
-
-static inline unsigned long long native_read_pmc(int counter)
-{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
- if (msr_tracepoint_active(__tracepoint_rdpmc))
+ if (tracepoint_enabled(rdpmc))
do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@@ -259,82 +186,86 @@ do { \
(void)((high) = (u32)(__val >> 32)); \
} while (0)
-static inline void wrmsr(unsigned int msr, u32 low, u32 high)
+static inline void wrmsr(u32 msr, u32 low, u32 high)
{
- native_write_msr(msr, low, high);
+ native_write_msr(msr, (u64)high << 32 | low);
}
-#define rdmsrl(msr, val) \
+#define rdmsrq(msr, val) \
((val) = native_read_msr((msr)))
-static inline void wrmsrl(unsigned int msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
{
- native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
+ native_write_msr(msr, val);
}
/* wrmsr with exception handling */
-static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
+static inline int wrmsrq_safe(u32 msr, u64 val)
{
- return native_write_msr_safe(msr, low, high);
+ return native_write_msr_safe(msr, val);
}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, low, high) \
({ \
- int __err; \
- u64 __val = native_read_msr_safe((msr), &__err); \
+ u64 __val; \
+ int __err = native_read_msr_safe((msr), &__val); \
(*low) = (u32)__val; \
(*high) = (u32)(__val >> 32); \
__err; \
})
-static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
+static inline int rdmsrq_safe(u32 msr, u64 *p)
{
- int err;
+ return native_read_msr_safe(msr, p);
+}
- *p = native_read_msr_safe(msr, &err);
- return err;
+static __always_inline u64 rdpmc(int counter)
+{
+ return native_read_pmc(counter);
}
-#define rdpmc(counter, low, high) \
-do { \
- u64 _l = native_read_pmc((counter)); \
- (low) = (u32)_l; \
- (high) = (u32)(_l >> 32); \
-} while (0)
+#endif /* !CONFIG_PARAVIRT_XXL */
-#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
-#endif /* !CONFIG_PARAVIRT_XXL */
+/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
+static __always_inline void wrmsrns(u32 msr, u64 val)
+{
+ /*
+ * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
+ * DS prefix to avoid a trailing NOP.
+ */
+ asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
+ "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
+}
/*
- * 64-bit version of wrmsr_safe():
+ * Dual u32 version of wrmsrq_safe():
*/
-static inline int wrmsrl_safe(u32 msr, u64 val)
+static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
{
- return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
+ return wrmsrq_safe(msr, (u64)high << 32 | low);
}
-#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
-
-#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-
-struct msr *msrs_alloc(void);
-void msrs_free(struct msr *msrs);
+struct msr __percpu *msrs_alloc(void);
+void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
#else /* CONFIG_SMP */
@@ -348,25 +279,25 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
wrmsr(msr_no, l, h);
return 0;
}
-static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
- rdmsrl(msr_no, *q);
+ rdmsrq(msr_no, *q);
return 0;
}
-static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
- wrmsrl(msr_no, q);
+ wrmsrq(msr_no, q);
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
@@ -377,13 +308,13 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
return wrmsr_safe(msr_no, l, h);
}
-static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
- return rdmsrl_safe(msr_no, q);
+ return rdmsrq_safe(msr_no, q);
}
-static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
- return wrmsrl_safe(msr_no, q);
+ return wrmsrq_safe(msr_no, q);
}
static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
@@ -394,5 +325,11 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
+
+/* Compatibility wrappers: */
+#define rdmsrl(msr, val) rdmsrq(msr, val)
+#define wrmsrl(msr, val) wrmsrq(msr, val)
+#define rdmsrl_on_cpu(cpu, msr, q) rdmsrq_on_cpu(cpu, msr, q)
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..76b95bd1a405 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.0+ */
/* Generic MTRR (Memory Type Range Register) ioctls.
Copyright (C) 1997-1999 Richard Gooch
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
The postal address is:
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
@@ -23,15 +10,43 @@
#ifndef _ASM_X86_MTRR_H
#define _ASM_X86_MTRR_H
+#include <linux/bits.h>
#include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+/* Defines for hardware MTRR registers. */
+#define MTRR_CAP_VCNT GENMASK(7, 0)
+#define MTRR_CAP_FIX BIT_MASK(8)
+#define MTRR_CAP_WC BIT_MASK(10)
+
+#define MTRR_DEF_TYPE_TYPE GENMASK(7, 0)
+#define MTRR_DEF_TYPE_FE BIT_MASK(10)
+#define MTRR_DEF_TYPE_E BIT_MASK(11)
+
+#define MTRR_DEF_TYPE_ENABLE (MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E)
+#define MTRR_DEF_TYPE_DISABLE ~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE)
+
+#define MTRR_PHYSBASE_TYPE GENMASK(7, 0)
+#define MTRR_PHYSBASE_RSVD GENMASK(11, 8)
+
+#define MTRR_PHYSMASK_RSVD GENMASK(10, 0)
+#define MTRR_PHYSMASK_V BIT_MASK(11)
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+ unsigned char enabled;
+ bool have_fixed;
+ mtrr_type def_type;
+};
/*
* The following functions are for use by other drivers that cannot use
* arch_phys_wc_add and arch_phys_wc_del.
*/
# ifdef CONFIG_MTRR
+void mtrr_bp_init(void);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+ mtrr_type def_type);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
@@ -41,21 +56,27 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment);
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
-extern void mtrr_ap_init(void);
-extern void mtrr_bp_init(void);
-extern void set_mtrr_aps_delayed_init(void);
-extern void mtrr_aps_init(void);
-extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
+void mtrr_disable(void);
+void mtrr_enable(void);
+void mtrr_generic_set_state(void);
# else
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+ unsigned int num_var,
+ mtrr_type def_type)
+{
+}
+
static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
{
/*
- * Return no-MTRRs:
+ * Return the default MTRR type, without any known other types in
+ * that range.
*/
- return MTRR_TYPE_INVALID;
+ *uniform = 1;
+
+ return MTRR_TYPE_UNCACHABLE;
}
#define mtrr_save_fixed_ranges(arg) do {} while (0)
#define mtrr_save_state() do {} while (0)
@@ -81,18 +102,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
{
return 0;
}
-static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
-}
-static inline void mtrr_bp_init(void)
-{
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
-}
-
-#define mtrr_ap_init() do {} while (0)
-#define set_mtrr_aps_delayed_init() do {} while (0)
-#define mtrr_aps_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
+#define mtrr_bp_init() do {} while (0)
+#define mtrr_disable() do {} while (0)
+#define mtrr_enable() do {} while (0)
+#define mtrr_generic_set_state() do {} while (0)
# endif
#ifdef CONFIG_COMPAT
@@ -127,7 +140,8 @@ struct mtrr_gentry32 {
#endif /* CONFIG_COMPAT */
/* Bit fields for enabled in struct mtrr_state_type */
-#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01
-#define MTRR_STATE_MTRR_ENABLED 0x02
+#define MTRR_STATE_SHIFT 10
+#define MTRR_STATE_MTRR_FIXED_ENABLED (MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT)
+#define MTRR_STATE_MTRR_ENABLED (MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT)
#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 39a2fb29378a..e4815e15dc9a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -6,43 +6,46 @@
#include <linux/sched/idle.h>
#include <asm/cpufeature.h>
+#include <asm/nospec-branch.h>
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK 0xf0
-#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
-#define MWAITX_MAX_LOOPS ((u32)-1)
-#define MWAITX_DISABLE_CSTATES 0xf
+#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
+#define MWAITX_DISABLE_CSTATES 0xf0
+#define TPAUSE_C01_STATE 1
+#define TPAUSE_C02_STATE 0
-static inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
{
- /* "monitor %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc8;"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx));
}
-static inline void __monitorx(const void *eax, unsigned long ecx,
- unsigned long edx)
+static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
{
- /* "monitorx %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfa;"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(u32 eax, u32 ecx)
{
- /* "mwait %eax, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+ /*
+ * Use the instruction mnemonic with implicit operands, as the LLVM
+ * assembler fails to assemble the mnemonic with explicit operands:
+ */
+ asm volatile("mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -71,20 +74,26 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
-static inline void __mwaitx(unsigned long eax, unsigned long ebx,
- unsigned long ecx)
+static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
- /* "mwaitx %eax, %ebx, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xfb;"
- :: "a" (eax), "b" (ebx), "c" (ecx));
+ /* No need for TSA buffer clearing on AMD */
+
+ asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx));
}
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+/*
+ * Re-enable interrupts right upon calling mwait in such a way that
+ * no interrupt can fire _before_ the execution of mwait, ie: no
+ * instruction must be placed between "sti" and "mwait".
+ *
+ * This is necessary because if an interrupt queues a timer before
+ * executing mwait, it would otherwise go unnoticed and the next tick
+ * would not be reprogrammed accordingly before mwait ever wakes up.
+ */
+static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
- trace_hardirqs_on();
- /* "mwait %eax, %ecx;" */
- asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
+
+ asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
/*
@@ -97,20 +106,45 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
- if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
- mb();
- clflush((void *)&current_thread_info()->flags);
- mb();
- }
+ const void *addr = &current_thread_info()->flags;
+
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched())
+ if (need_resched())
+ goto out;
+
+ if (ecx & 1) {
__mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
+ }
}
+
+out:
current_clr_polling();
}
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+ /* "tpause %ecx" */
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1"
+ :: "c" (ecx), "d" (edx), "a" (eax));
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..79d88d12c8fb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -9,21 +9,31 @@
#ifdef CONFIG_X86_LOCAL_APIC
-extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-struct ctl_table;
-extern int proc_nmi_enabled(struct ctl_table *, int ,
- void __user *, size_t *, loff_t *);
-extern int unknown_nmi_panic;
-
#endif /* CONFIG_X86_LOCAL_APIC */
+extern int unknown_nmi_panic;
+extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
+
+/* NMI handler flags */
#define NMI_FLAG_FIRST 1
+/**
+ * enum - NMI types.
+ * @NMI_LOCAL: Local NMI, CPU-specific NMI generated by the Local APIC.
+ * @NMI_UNKNOWN: Unknown NMI, the source of the NMI may not be identified.
+ * @NMI_SERR: System Error NMI, typically triggered by PCI errors.
+ * @NMI_IO_CHECK: I/O Check NMI, related to I/O errors.
+ * @NMI_MAX: Maximum value for NMI types.
+ *
+ * NMI types are used to categorize NMIs and to dispatch them to the
+ * appropriate handler.
+ */
enum {
NMI_LOCAL=0,
NMI_UNKNOWN,
@@ -32,6 +42,7 @@ enum {
NMI_MAX
};
+/* NMI handler return values */
#define NMI_DONE 0
#define NMI_HANDLED 1
@@ -41,14 +52,33 @@ struct nmiaction {
struct list_head list;
nmi_handler_t handler;
u64 max_duration;
- struct irq_work irq_work;
unsigned long flags;
const char *name;
};
+/**
+ * register_nmi_handler - Register a handler for a specific NMI type
+ * @t: NMI type (e.g. NMI_LOCAL)
+ * @fn: The NMI handler
+ * @fg: Flags associated with the NMI handler
+ * @n: Name of the NMI handler
+ * @init: Optional __init* attributes for struct nmiaction
+ *
+ * Adds the provided handler to the list of handlers for the specified
+ * NMI type. Handlers flagged with NMI_FLAG_FIRST would be executed first.
+ *
+ * Sometimes the source of an NMI can't be reliably determined which
+ * results in an NMI being tagged as "unknown". Register an additional
+ * handler using the NMI type - NMI_UNKNOWN to handle such cases. The
+ * caller would get one last chance to assume responsibility for the
+ * NMI.
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
#define register_nmi_handler(t, fn, fg, n, init...) \
({ \
static struct nmiaction init fn##_na = { \
+ .list = LIST_HEAD_INIT(fn##_na.list), \
.handler = (fn), \
.name = (n), \
.flags = (fg), \
@@ -58,7 +88,18 @@ struct nmiaction {
int __register_nmi_handler(unsigned int, struct nmiaction *);
-void unregister_nmi_handler(unsigned int, const char *);
+/**
+ * unregister_nmi_handler - Unregister a handler for a specific NMI type
+ * @type: NMI type (e.g. NMI_LOCAL)
+ * @name: Name of the NMI handler used during registration
+ *
+ * Removes the handler associated with the specified NMI type from the
+ * NMI handler list. The "name" is used as a lookup key to identify the
+ * handler.
+ */
+void unregister_nmi_handler(unsigned int type, const char *name);
+
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
void stop_nmi(void);
void restart_nmi(void);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 12f12b5cf2ca..cd94221d8335 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -2,146 +2,88 @@
#ifndef _ASM_X86_NOPS_H
#define _ASM_X86_NOPS_H
+#include <asm/asm.h>
+
/*
* Define nops for use with alternative() and for tracing.
- *
- * *_NOP5_ATOMIC must be a single instruction.
*/
-#define NOP_DS_PREFIX 0x3e
+#ifndef CONFIG_64BIT
-/* generic versions from gas
- 1: nop
- the following instructions are NOT nops in 64-bit mode,
- for 64-bit mode use K8 or P6 nops instead
- 2: movl %esi,%esi
- 3: leal 0x00(%esi),%esi
- 4: leal 0x00(,%esi,1),%esi
- 6: leal 0x00000000(%esi),%esi
- 7: leal 0x00000000(,%esi,1),%esi
-*/
-#define GENERIC_NOP1 0x90
-#define GENERIC_NOP2 0x89,0xf6
-#define GENERIC_NOP3 0x8d,0x76,0x00
-#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
-#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
-#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
-#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
-#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
-#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x89,0xf6
+#define BYTES_NOP3 0x8d,0x76,0x00
+#define BYTES_NOP4 0x8d,0x74,0x26,0x00
+#define BYTES_NOP5 0x3e,BYTES_NOP4
+#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x3e,BYTES_NOP7
-/* Opteron 64bit nops
- 1: nop
- 2: osp nop
- 3: osp osp nop
- 4: osp osp osp nop
-*/
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2 0x66,K8_NOP1
-#define K8_NOP3 0x66,K8_NOP2
-#define K8_NOP4 0x66,K8_NOP3
-#define K8_NOP5 K8_NOP3,K8_NOP2
-#define K8_NOP6 K8_NOP3,K8_NOP3
-#define K8_NOP7 K8_NOP4,K8_NOP3
-#define K8_NOP8 K8_NOP4,K8_NOP4
-#define K8_NOP5_ATOMIC 0x66,K8_NOP4
+#define ASM_NOP_MAX 8
-/* K7 nops
- uses eax dependencies (arbitrary choice)
- 1: nop
- 2: movl %eax,%eax
- 3: leal (,%eax,1),%eax
- 4: leal 0x00(,%eax,1),%eax
- 6: leal 0x00000000(%eax),%eax
- 7: leal 0x00000000(,%eax,1),%eax
-*/
-#define K7_NOP1 GENERIC_NOP1
-#define K7_NOP2 0x8b,0xc0
-#define K7_NOP3 0x8d,0x04,0x20
-#define K7_NOP4 0x8d,0x44,0x20,0x00
-#define K7_NOP5 K7_NOP4,K7_NOP1
-#define K7_NOP6 0x8d,0x80,0,0,0,0
-#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0
-#define K7_NOP8 K7_NOP7,K7_NOP1
-#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
+#else
-/* P6 nops
- uses eax dependencies (Intel-recommended choice)
- 1: nop
- 2: osp nop
- 3: nopl (%eax)
- 4: nopl 0x00(%eax)
- 5: nopl 0x00(%eax,%eax,1)
- 6: osp nopl 0x00(%eax,%eax,1)
- 7: nopl 0x00000000(%eax)
- 8: nopl 0x00000000(%eax,%eax,1)
- Note: All the above are assumed to be a single instruction.
- There is kernel code that depends on this.
-*/
-#define P6_NOP1 GENERIC_NOP1
-#define P6_NOP2 0x66,0x90
-#define P6_NOP3 0x0f,0x1f,0x00
-#define P6_NOP4 0x0f,0x1f,0x40,0
-#define P6_NOP5 0x0f,0x1f,0x44,0x00,0
-#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0
-#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0
-#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
-#define P6_NOP5_ATOMIC P6_NOP5
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x00000000(%eax)
+ * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x66,BYTES_NOP1
+#define BYTES_NOP3 0x0f,0x1f,0x00
+#define BYTES_NOP4 0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6 0x66,BYTES_NOP5
+#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9 0x2e,BYTES_NOP8
+#define BYTES_NOP10 0x66,BYTES_NOP9
+#define BYTES_NOP11 0x66,BYTES_NOP10
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
+#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
-#if defined(CONFIG_MK7)
-#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_64)
-#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
-#else
-#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
-#endif
+#define ASM_NOP_MAX 11
-#define ASM_NOP_MAX 8
-#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */
+#endif /* CONFIG_64BIT */
+
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
-extern const unsigned char * const *ideal_nops;
-extern void arch_init_ideal_nops(void);
+#ifndef __ASSEMBLER__
+extern const unsigned char * const x86_nops[];
#endif
#endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba0..4f4b5e8a1574 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,11 +4,102 @@
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
+#include <linux/objtool.h>
+#include <linux/linkage.h>
#include <asm/alternative.h>
-#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
+
+/*
+ * Call depth tracking for Intel SKL CPUs to address the RSB underflow
+ * issue in software.
+ *
+ * The tracking does not use a counter. It uses uses arithmetic shift
+ * right on call entry and logical shift left on return.
+ *
+ * The depth tracking variable is initialized to 0x8000.... when the call
+ * depth is zero. The arithmetic shift right sign extends the MSB and
+ * saturates after the 12th call. The shift count is 5 for both directions
+ * so the tracking covers 12 nested calls.
+ *
+ * Call
+ * 0: 0x8000000000000000 0x0000000000000000
+ * 1: 0xfc00000000000000 0xf000000000000000
+ * ...
+ * 11: 0xfffffffffffffff8 0xfffffffffffffc00
+ * 12: 0xffffffffffffffff 0xffffffffffffffe0
+ *
+ * After a return buffer fill the depth is credited 12 calls before the
+ * next stuffing has to take place.
+ *
+ * There is a inaccuracy for situations like this:
+ *
+ * 10 calls
+ * 5 returns
+ * 3 calls
+ * 4 returns
+ * 3 calls
+ * ....
+ *
+ * The shift count might cause this to be off by one in either direction,
+ * but there is still a cushion vs. the RSB depth. The algorithm does not
+ * claim to be perfect and it can be speculated around by the CPU, but it
+ * is considered that it obfuscates the problem enough to make exploitation
+ * extremely difficult.
+ */
+#define RET_DEPTH_SHIFT 5
+#define RSB_RET_STUFF_LOOPS 16
+#define RET_DEPTH_INIT 0x8000000000000000ULL
+#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
+#define RET_DEPTH_CREDIT 0xffffffffffffffffULL
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+# define CALL_THUNKS_DEBUG_INC_CALLS \
+ incq PER_CPU_VAR(__x86_call_count);
+# define CALL_THUNKS_DEBUG_INC_RETS \
+ incq PER_CPU_VAR(__x86_ret_count);
+# define CALL_THUNKS_DEBUG_INC_STUFFS \
+ incq PER_CPU_VAR(__x86_stuffs_count);
+# define CALL_THUNKS_DEBUG_INC_CTXSW \
+ incq PER_CPU_VAR(__x86_ctxsw_count);
+#else
+# define CALL_THUNKS_DEBUG_INC_CALLS
+# define CALL_THUNKS_DEBUG_INC_RETS
+# define CALL_THUNKS_DEBUG_INC_STUFFS
+# define CALL_THUNKS_DEBUG_INC_CTXSW
+#endif
+
+#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+
+#include <asm/asm-offsets.h>
+
+#define CREDIT_CALL_DEPTH \
+ movq $-1, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH \
+ xor %eax, %eax; \
+ bts $63, %rax; \
+ movq %rax, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH_FROM_CALL \
+ movb $0xfc, %al; \
+ shl $56, %rax; \
+ movq %rax, PER_CPU_VAR(__x86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#define INCREMENT_CALL_DEPTH \
+ sarq $5, PER_CPU_VAR(__x86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#else
+#define CREDIT_CALL_DEPTH
+#define RESET_CALL_DEPTH
+#define RESET_CALL_DEPTH_FROM_CALL
+#define INCREMENT_CALL_DEPTH
+#endif
/*
* Fill the CPU return stack buffer.
@@ -27,112 +118,121 @@
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/
+#define RETPOLINE_THUNK_SIZE 32
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
+ * the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- call 772f; \
-773: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- call 774f; \
-775: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- dec reg; \
- jnz 771b; \
- add $(BITS_PER_LONG/8) * nr, sp;
-
-#ifdef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence; \
+ CREDIT_CALL_DEPTH \
+ CALL_THUNKS_DEBUG_INC_CTXSW
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ .rept nr; \
+ __FILL_RETURN_SLOT; \
+ .endr; \
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
*/
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.nospec
- .long .Lannotate_\@ - .
- .popsection
-.endm
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
+ lfence;
+
+#ifdef __ASSEMBLER__
/*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
*/
-.macro ANNOTATE_RETPOLINE_SAFE
- .Lannotate_\@:
- .pushsection .discard.retpoline_safe
- _ASM_PTR .Lannotate_\@
- .popsection
-.endm
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
/*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into its own annotation.
*/
-.macro RETPOLINE_JMP reg:req
- call .Ldo_rop_\@
-.Lspec_trap_\@:
- pause
- lfence
- jmp .Lspec_trap_\@
-.Ldo_rop_\@:
- mov \reg, (%_ASM_SP)
- ret
+.macro VALIDATE_UNRET_END
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
+ ANNOTATE_RETPOLINE_SAFE
+ nop
+#endif
.endm
/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
-.macro RETPOLINE_CALL reg:req
- jmp .Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
- RETPOLINE_JMP \reg
-.Ldo_call_\@:
- call .Ldo_retpoline_jmp_\@
+.macro __CS_PREFIX reg:req
+ .irp rs,r8,r9,r10,r11,r12,r13,r14,r15
+ .ifc \reg,\rs
+ .byte 0x2e
+ .endif
+ .endr
.endm
/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
+ *
+ * NOTE: these do not take kCFI into account and are thus not comparable to C
+ * indirect calls, take care when using. The target of these should be an ENDBR
+ * instruction irrespective of kCFI.
*/
.macro JMP_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
- __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#ifdef CONFIG_MITIGATION_RETPOLINE
+ __CS_PREFIX \reg
+ jmp __x86_indirect_thunk_\reg
#else
- jmp *\reg
+ jmp *%\reg
+ int3
#endif
.endm
.macro CALL_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
- __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#ifdef CONFIG_MITIGATION_RETPOLINE
+ __CS_PREFIX \reg
+ call __x86_indirect_thunk_\reg
#else
- call *\reg
+ call *%\reg
#endif
.endm
@@ -140,48 +240,222 @@
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
- \ftr
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
+.endm
+
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+#endif
+.endm
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
+ * write_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
+ VALIDATE_UNRET_END
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
+ "call write_ibpb", \ibpb_feature, \
+ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+#define UNTRAIN_RET \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_VM \
+ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_FROM_CALL \
+ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
+
+
+.macro CALL_DEPTH_ACCOUNT
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+ ALTERNATIVE "", \
+ __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+/*
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+#ifdef CONFIG_X86_64
+#define VERW verw x86_verw_sel(%rip)
+#else
+/*
+ * In 32bit mode, the memory operand must be a %cs reference. The data segments
+ * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32).
+ */
+#define VERW verw %cs:x86_verw_sel
#endif
+
+/*
+ * Provide a stringified VERW macro for simple usage, and a non-stringified
+ * VERW macro for use in more elaborate sequences, e.g. to encode a conditional
+ * VERW within an ALTERNATIVE.
+ */
+#define __CLEAR_CPU_BUFFERS __stringify(VERW)
+
+/* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */
+#define CLEAR_CPU_BUFFERS \
+ ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF
+
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT
.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
+#else /* __ASSEMBLER__ */
+
+#define ITS_THUNK_SIZE 64
+
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+extern its_thunk_t __x86_indirect_its_thunk_array[];
+
+#ifdef CONFIG_MITIGATION_RETHUNK
+extern void __x86_return_thunk(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
+extern void retbleed_return_thunk(void);
+#else
+static inline void retbleed_return_thunk(void) {}
+#endif
+
+extern void srso_alias_untrain_ret(void);
+
+#ifdef CONFIG_MITIGATION_SRSO
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+#else
+static inline void srso_return_thunk(void) {}
+static inline void srso_alias_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void entry_untrain_ret(void);
+extern void write_ibpb(void);
+
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
+extern void (*x86_return_thunk)(void);
+
+extern void __warn_thunk(void);
+
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+extern void call_depth_return_thunk(void);
+
+#define CALL_DEPTH_ACCOUNT \
+ ALTERNATIVE("", \
+ __stringify(INCREMENT_CALL_DEPTH), \
+ X86_FEATURE_CALL_DEPTH)
+
+DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DECLARE_PER_CPU(u64, __x86_call_count);
+DECLARE_PER_CPU(u64, __x86_ret_count);
+DECLARE_PER_CPU(u64, __x86_stuffs_count);
+DECLARE_PER_CPU(u64, __x86_ctxsw_count);
+#endif
+#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+
+static inline void call_depth_return_thunk(void) {}
+#define CALL_DEPTH_ACCOUNT ""
+
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+
+#ifdef CONFIG_MITIGATION_RETPOLINE
-#else /* __ASSEMBLY__ */
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.nospec\n\t" \
- ".long 999b - .\n\t" \
- ".popsection\n\t"
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
-#define ANNOTATE_RETPOLINE_SAFE \
- "999:\n\t" \
- ".pushsection .discard.retpoline_safe\n\t" \
- _ASM_PTR " 999b\n\t" \
- ".popsection\n\t"
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
-#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
- * which is ensured when CONFIG_RETPOLINE is defined.
+ * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
+
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
#else /* CONFIG_X86_32 */
@@ -191,9 +465,8 @@
* here, anyway.
*/
# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
" jmp 904f;\n" \
" .align 16\n" \
@@ -202,16 +475,16 @@
" lfence;\n" \
" jmp 902b;\n" \
" .align 16\n" \
- "903: addl $4, %%esp;\n" \
+ "903: lea 4(%%esp), %%esp;\n" \
" pushl %[thunk_target];\n" \
" ret;\n" \
" .align 16\n" \
"904: call 901b;\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
@@ -223,9 +496,12 @@
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_GENERIC,
- SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS_ENHANCED,
+ SPECTRE_V2_RETPOLINE,
+ SPECTRE_V2_LFENCE,
+ SPECTRE_V2_EIBRS,
+ SPECTRE_V2_EIBRS_RETPOLINE,
+ SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@@ -240,35 +516,12 @@ enum spectre_v2_user_mitigation {
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_AUTO,
SPEC_STORE_BYPASS_DISABLE,
SPEC_STORE_BYPASS_PRCTL,
SPEC_STORE_BYPASS_SECCOMP,
};
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
-/*
- * On VMEXIT we must ensure that no RSB predictions learned in the guest
- * can be followed in the host, by overwriting the RSB completely. Both
- * retpoline and IBRS mitigations for Spectre v2 need this; only on future
- * CPUs with IBRS_ALL *might* it be avoided.
- */
-static inline void vmexit_fill_RSB(void)
-{
-#ifdef CONFIG_RETPOLINE
- unsigned long loops;
-
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE("jmp 910f",
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
- X86_FEATURE_RETPOLINE)
- "910:"
- : "=r" (loops), ASM_CALL_CONSTRAINT
- : : "memory" );
-#endif
-}
-
static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
@@ -280,15 +533,20 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
+DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+
static inline void indirect_branch_prediction_barrier(void)
{
- u64 val = PRED_CMD_IBPB;
-
- alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+ asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
+ : ASM_CALL_CONSTRAINT
+ :: "rax", "rcx", "rdx", "memory");
}
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void update_spec_ctrl_cond(u64 val);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -298,18 +556,18 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
@@ -318,65 +576,51 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-#endif /* __ASSEMBLY__ */
+DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- * callq do_rop
- * spec_trap:
- * pause
- * lfence
- * jmp spec_trap
- * do_rop:
- * mov %rax,(%rsp) for x86_64
- * mov %edx,(%esp) for x86_32
- * retq
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
+extern u16 x86_verw_sel;
+
+#include <asm/segment.h>
+
+/**
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
- * Without retpolines configured:
+ * This uses the otherwise unused and obsolete VERW instruction in
+ * combination with microcode which triggers a CPU buffer flush when the
+ * instruction is executed.
+ */
+static __always_inline void x86_clear_cpu_buffers(void)
+{
+ static const u16 ds = __KERNEL_DS;
+
+ /*
+ * Has to be the memory-operand variant because only that
+ * guarantees the CPU buffer flush functionality according to
+ * documentation. The register-operand variant does not.
+ * Works with any segment selector, but a valid writable
+ * data segment is the fastest variant.
+ *
+ * "cc" clobber is required because VERW modifies ZF.
+ */
+ asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
+}
+
+/**
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
- * jmp *%rax for x86_64
- * jmp *%edx for x86_32
+ * Clear CPU buffers if the corresponding static key is enabled
*/
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
- EMIT1(0xC3); /* retq */ \
-} while (0)
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
- EMIT1(0xC3); /* ret */ \
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
-# endif
-#endif
+static __always_inline void x86_idle_clear_cpu_buffers(void)
+{
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
+}
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bbfde3d2662f..53ba39ce010c 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -3,21 +3,13 @@
#define _ASM_X86_NUMA_H
#include <linux/nodemask.h>
+#include <linux/errno.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
#ifdef CONFIG_NUMA
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
extern int numa_off;
/*
@@ -31,9 +23,6 @@ extern int numa_off;
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
@@ -52,32 +41,24 @@ static inline int numa_cpu_node(int cpu)
}
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_X86_32
-# include <asm/numa_32.h>
-#endif
-
#ifdef CONFIG_NUMA
extern void numa_set_node(int cpu, int node);
extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
+extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
-static inline void numa_add_cpu(int cpu) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
#endif
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
-#endif /* CONFIG_NUMA_EMU */
-
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
deleted file mode 100644
index 9c8e9e85be77..000000000000
--- a/arch/x86/include/asm/numa_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_NUMA_32_H
-#define _ASM_X86_NUMA_32_H
-
-#ifdef CONFIG_HIGHMEM
-extern void set_highmem_pages_init(void);
-#else
-static inline void set_highmem_pages_init(void)
-{
-}
-#endif
-
-#endif /* _ASM_X86_NUMA_32_H */
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index c2bf1de5d901..6fe76282aceb 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -9,12 +9,10 @@
struct olpc_platform_t {
int flags;
uint32_t boardrev;
- int ecver;
};
#define OLPC_F_PRESENT 0x01
#define OLPC_F_DCON 0x02
-#define OLPC_F_EC_WIDE_SCI 0x04
#ifdef CONFIG_OLPC
@@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev)
return olpc_platform_info.boardrev >= rev;
}
-extern void olpc_ec_wakeup_set(u16 value);
-extern void olpc_ec_wakeup_clear(u16 value);
-extern bool olpc_ec_wakeup_available(void);
-
-extern int olpc_ec_mask_write(u16 bits);
-extern int olpc_ec_sci_query(u16 *sci_value);
-
#else
static inline int machine_is_olpc(void)
@@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void)
return 0;
}
-static inline void olpc_ec_wakeup_set(u16 value) { }
-static inline void olpc_ec_wakeup_clear(u16 value) { }
-
-static inline bool olpc_ec_wakeup_available(void)
-{
- return false;
-}
-
#endif
#ifdef CONFIG_OLPC_XO1_PM
@@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
extern int pci_olpc_init(void);
-/* SCI source values */
-
-#define EC_SCI_SRC_EMPTY 0x00
-#define EC_SCI_SRC_GAME 0x01
-#define EC_SCI_SRC_BATTERY 0x02
-#define EC_SCI_SRC_BATSOC 0x04
-#define EC_SCI_SRC_BATERR 0x08
-#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */
-#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */
-#define EC_SCI_SRC_ACPWR 0x40
-#define EC_SCI_SRC_BATCRIT 0x80
-#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */
-#define EC_SCI_SRC_ALL 0x1FF
-
/* GPIO assignments */
#define OLPC_GPIO_MIC_AC 1
diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h
new file mode 100644
index 000000000000..07bacf3e160e
--- /dev/null
+++ b/arch/x86/include/asm/orc_header.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER \
+ __used __section(".orc_header") __aligned(4) \
+ static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
index 91c8d868424d..241631282e43 100644
--- a/arch/x86/include/asm/orc_lookup.h
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ORC_LOOKUP_H
#define _ORC_LOOKUP_H
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 46f516dd80ce..e0125afa53fb 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ORC_TYPES_H
@@ -51,29 +39,15 @@
#define ORC_REG_SP_INDIRECT 9
#define ORC_REG_MAX 15
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct. They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL 0
-#define ORC_TYPE_REGS 1
-#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_SAVE 3
-#define UNWIND_HINT_TYPE_RESTORE 4
+#define ORC_TYPE_UNDEFINED 0
+#define ORC_TYPE_END_OF_STACK 1
+#define ORC_TYPE_CALL 2
+#define ORC_TYPE_REGS 3
+#define ORC_TYPE_REGS_PARTIAL 4
+
+#ifndef __ASSEMBLER__
+#include <asm/byteorder.h>
-#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -85,25 +59,20 @@
struct orc_entry {
s16 sp_offset;
s16 bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
unsigned sp_reg:4;
unsigned bp_reg:4;
- unsigned type:2;
- unsigned end:1;
+ unsigned type:3;
+ unsigned signal:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ unsigned bp_reg:4;
+ unsigned sp_reg:4;
+ unsigned unused:4;
+ unsigned signal:1;
+ unsigned type:3;
+#endif
} __packed;
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48803a8..9265f2fca99a 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -14,7 +14,7 @@
#include <asm/page_32.h>
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct page;
@@ -34,9 +34,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
copy_page(to, from);
}
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
@@ -67,11 +66,25 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* virt_addr_valid(kaddr) returns true.
*/
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr))
-#endif /* __ASSEMBLY__ */
+static __always_inline void *pfn_to_kaddr(unsigned long pfn)
+{
+ return __va(pfn << PAGE_SHIFT);
+}
+
+static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return __canonical_address(vaddr, vaddr_bits) == vaddr;
+}
+
+#endif /* __ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..0c623706cb7e 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -4,7 +4,7 @@
#include <asm/page_32_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
@@ -15,23 +15,6 @@ extern unsigned long __phys_addr(unsigned long);
#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-
-#ifdef CONFIG_X86_USE_3DNOW
-#include <asm/mmx.h>
-
-static inline void clear_page(void *page)
-{
- mmx_clear_page(page);
-}
-
-static inline void copy_page(void *to, void *from)
-{
- mmx_copy_page(to, from);
-}
-#else /* !CONFIG_X86_USE_3DNOW */
#include <linux/string.h>
static inline void clear_page(void *page)
@@ -43,7 +26,6 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
-#endif /* CONFIG_X86_3DNOW */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0d5c739eebd7..623f1e9f493e 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -11,8 +11,8 @@
* a virtual address space of one gigabyte, which limits the
* amount of physical memory you can use to about 950MB.
*
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G
+ * and CONFIG_HIGHMEM4G options in the kernel configuration.
*/
#define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL)
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
@@ -22,11 +22,9 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
/*
@@ -44,11 +42,28 @@
#endif /* CONFIG_X86_PAE */
/*
- * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ * User space process size: 3GB (default).
+ */
+#define IA32_PAGE_OFFSET __PAGE_OFFSET
+#define TASK_SIZE __PAGE_OFFSET
+#define TASK_SIZE_LOW TASK_SIZE
+#define TASK_SIZE_MAX TASK_SIZE
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP TASK_SIZE
+#define STACK_TOP_MAX STACK_TOP
+
+/*
+ * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual
+ * address for the kernel image, rather than the limit on the size itself. On
+ * 32-bit, this is not a strict limit, but this value is used to limit the
+ * link-time virtual address range of the kernel, and by KASLR to limit the
+ * randomized address from which the kernel is executed. A relocatable kernel
+ * can be loaded somewhat higher than KERNEL_IMAGE_SIZE as long as enough space
+ * remains for the vmalloc area.
*/
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This much address space is reserved for vmalloc() and iomap()
@@ -58,8 +73,7 @@ extern unsigned int __VMALLOC_RESERVE;
extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
-extern void setup_bootmem_allocator(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_DEFS_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..2f0e47be79a4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -4,9 +4,13 @@
#include <asm/page_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <linux/kmsan-checks.h>
+#include <linux/mmdebug.h>
+
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
@@ -14,8 +18,9 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
+extern unsigned long direct_map_physmem_end;
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
@@ -27,36 +32,79 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x)
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
-extern unsigned long __phys_addr_symbol(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
-#define __phys_addr_symbol(x) \
- ((unsigned long)(x) - __START_KERNEL_map + phys_base)
#endif
-#define __phys_reloc_hide(x) (x)
+static inline unsigned long __phys_addr_symbol(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_pfn)
-#endif
+ /* only check upper bounds since lower bounds will trigger carry */
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+
+ return y + phys_base;
+}
+
+#define __phys_reloc_hide(x) (x)
void clear_page_orig(void *page);
void clear_page_rep(void *page);
void clear_page_erms(void *page);
+KCFI_REFERENCE(clear_page_orig);
+KCFI_REFERENCE(clear_page_rep);
+KCFI_REFERENCE(clear_page_erms);
static inline void clear_page(void *page)
{
+ /*
+ * Clean up KMSAN metadata for the page being cleared. The assembly call
+ * below clobbers @page, so we perform unpoisoning before it.
+ */
+ kmsan_unpoison_memory(page, PAGE_SIZE);
alternative_call_2(clear_page_orig,
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
- "0" (page)
- : "cc", "memory", "rax", "rcx");
+ "D" (page),
+ "cc", "memory", "rax", "rcx");
}
void copy_page(void *to, void *from);
+KCFI_REFERENCE(copy_page);
+
+/*
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+static __always_inline unsigned long task_size_max(void)
+{
+ unsigned long ret;
+
+ alternative_io("movq %[small],%0","movq %[large],%0",
+ X86_FEATURE_LA57,
+ "=r" (ret),
+ [small] "i" ((1ul << 47)-PAGE_SIZE),
+ [large] "i" ((1ul << 56)-PAGE_SIZE));
+
+ return ret;
+}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
# define __HAVE_ARCH_GATE_AREA 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..7400dab373fe 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/kaslr.h>
#endif
@@ -14,22 +14,21 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
-#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 2
-#define DEBUG_STACK 3
-#define MCE_STACK 4
-#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
+/*
+ * The index for the tss.ist[] array. The hardware limit is 7 entries.
+ */
+#define IST_INDEX_DF 0
+#define IST_INDEX_NMI 1
+#define IST_INDEX_DB 2
+#define IST_INDEX_MCE 3
+#define IST_INDEX_VC 4
/*
* Set __PAGE_OFFSET to the most negative possible address +
@@ -42,27 +41,39 @@
#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL)
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
-#else
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 52
-
-#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
-#else
-#define __VIRTUAL_MASK_SHIFT 47
-#endif
+
+#define TASK_SIZE_MAX task_size_max()
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+ 0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
+ IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
+#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+
+#define STACK_TOP TASK_SIZE_LOW
+#define STACK_TOP_MAX TASK_SIZE_MAX
/*
- * Maximum kernel image size is limited to 1 GiB, due to the fixmap living
- * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
+ * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual
+ * address for the kernel image, rather than the limit on the size itself.
+ * This can be at most 1 GiB, due to the fixmap living in the next 1 GiB (see
+ * level2_kernel_pgt in arch/x86/kernel/head_64.S).
*
* On KASLR use 1 GiB by default, leaving 1 GiB for modules once the
* page tables are fully set up.
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c85e15010f48..018a8d906ca3 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -6,25 +6,16 @@
#include <linux/types.h>
#include <linux/mem_encrypt.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+#include <vdso/page.h>
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast P*D_MASK to a signed type so that it is sign-extended if
virtual addresses are 32-bits but physical addresses are larger
(ie, 32-bit PAE). */
#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK)
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
@@ -35,14 +26,12 @@
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define VM_DATA_DEFAULT_FLAGS \
- (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \
- CONFIG_PHYSICAL_ALIGN)
+/* Physical address where kernel should be loaded. */
+#define LOAD_PHYSICAL_ADDR __ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1)
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR)
#ifdef CONFIG_X86_64
#include <asm/page_64_types.h>
@@ -52,7 +41,7 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
extern phys_addr_t physical_mask;
@@ -73,11 +62,8 @@ static inline phys_addr_t get_max_mapped(void)
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-
extern void initmem_init(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a97f28d914d5..b5e59a7ba0d0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -4,22 +4,35 @@
/* Various instructions on x86 need to be replaced for
* para-virtualization: those hooks are defined here. */
+#include <asm/paravirt_types.h>
+
+#ifndef __ASSEMBLER__
+struct mm_struct;
+#endif
+
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
#include <asm/nospec-branch.h>
-#include <asm/paravirt_types.h>
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
+#include <linux/static_call_types.h>
#include <asm/frame.h>
-static inline unsigned long long paravirt_sched_clock(void)
+u64 dummy_steal_clock(int cpu);
+u64 dummy_sched_clock(void);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void));
+
+static __always_inline u64 paravirt_sched_clock(void)
{
- return PVOP_CALL0(unsigned long long, time.sched_clock);
+ return static_call(pv_sched_clock)();
}
struct static_key;
@@ -33,21 +46,31 @@ bool pv_is_native_vcpu_is_preempted(void);
static inline u64 paravirt_steal_clock(int cpu)
{
- return PVOP_CALL1(u64, time.steal_clock, cpu);
+ return static_call(pv_steal_clock)(cpu);
}
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
#ifdef REALLY_SLOW_IO
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
#endif
}
-static inline void __flush_tlb(void)
+void native_flush_tlb_local(void);
+void native_flush_tlb_global(void);
+void native_flush_tlb_one_user(unsigned long addr);
+void native_flush_tlb_multi(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info);
+
+static inline void __flush_tlb_local(void)
{
PVOP_VCALL0(mmu.flush_tlb_user);
}
@@ -62,20 +85,31 @@ static inline void __flush_tlb_one_user(unsigned long addr)
PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
}
-static inline void flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info)
+static inline void __flush_tlb_multi(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
{
- PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
+ PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}
-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
+ PVOP_VCALL1(mmu.exit_mmap, mm);
}
-static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+ int npages, bool enc)
{
- PVOP_VCALL1(mmu.exit_mmap, mm);
+ PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
+static __always_inline void arch_safe_halt(void)
+{
+ PVOP_VCALL0(irq.safe_halt);
+}
+
+static inline void halt(void)
+{
+ PVOP_VCALL0(irq.halt);
}
#ifdef CONFIG_PARAVIRT_XXL
@@ -94,12 +128,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
/*
* These special macros can be used to get or set a debugging register
*/
-static inline unsigned long paravirt_get_debugreg(int reg)
+static __always_inline unsigned long paravirt_get_debugreg(int reg)
{
return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
-static inline void set_debugreg(unsigned long val, int reg)
+static __always_inline void set_debugreg(unsigned long val, int reg)
{
PVOP_VCALL2(cpu.set_debugreg, reg, val);
}
@@ -114,24 +148,26 @@ static inline void write_cr0(unsigned long x)
PVOP_VCALL1(cpu.write_cr0, x);
}
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr2);
+ return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+ "mov %%cr2, %%rax;", ALT_NOT_XEN);
}
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
{
PVOP_VCALL1(mmu.write_cr2, x);
}
static inline unsigned long __read_cr3(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr3);
+ return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT_XEN);
}
static inline void write_cr3(unsigned long x)
{
- PVOP_VCALL1(mmu.write_cr3, x);
+ PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
}
static inline void __write_cr4(unsigned long x)
@@ -139,55 +175,24 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-#ifdef CONFIG_X86_64
-static inline unsigned long read_cr8(void)
-{
- return PVOP_CALL0(unsigned long, cpu.read_cr8);
-}
-
-static inline void write_cr8(unsigned long x)
-{
- PVOP_VCALL1(cpu.write_cr8, x);
-}
-#endif
-
-static inline void arch_safe_halt(void)
-{
- PVOP_VCALL0(irq.safe_halt);
-}
-
-static inline void halt(void)
-{
- PVOP_VCALL0(irq.halt);
-}
-
-static inline void wbinvd(void)
-{
- PVOP_VCALL0(cpu.wbinvd);
-}
-
-#define get_kernel_rpl() (pv_info.kernel_rpl)
-
-static inline u64 paravirt_read_msr(unsigned msr)
+static inline u64 paravirt_read_msr(u32 msr)
{
return PVOP_CALL1(u64, cpu.read_msr, msr);
}
-static inline void paravirt_write_msr(unsigned msr,
- unsigned low, unsigned high)
+static inline void paravirt_write_msr(u32 msr, u64 val)
{
- PVOP_VCALL3(cpu.write_msr, msr, low, high);
+ PVOP_VCALL2(cpu.write_msr, msr, val);
}
-static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+static inline int paravirt_read_msr_safe(u32 msr, u64 *val)
{
- return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
+ return PVOP_CALL2(int, cpu.read_msr_safe, msr, val);
}
-static inline int paravirt_write_msr_safe(unsigned msr,
- unsigned low, unsigned high)
+static inline int paravirt_write_msr_safe(u32 msr, u64 val)
{
- return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
+ return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
}
#define rdmsr(msr, val1, val2) \
@@ -197,55 +202,46 @@ do { \
val2 = _l >> 32; \
} while (0)
-#define wrmsr(msr, val1, val2) \
-do { \
- paravirt_write_msr(msr, val1, val2); \
-} while (0)
+static __always_inline void wrmsr(u32 msr, u32 low, u32 high)
+{
+ paravirt_write_msr(msr, (u64)high << 32 | low);
+}
-#define rdmsrl(msr, val) \
+#define rdmsrq(msr, val) \
do { \
val = paravirt_read_msr(msr); \
} while (0)
-static inline void wrmsrl(unsigned msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
{
- wrmsr(msr, (u32)val, (u32)(val>>32));
+ paravirt_write_msr(msr, val);
}
-#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b)
+static inline int wrmsrq_safe(u32 msr, u64 val)
+{
+ return paravirt_write_msr_safe(msr, val);
+}
/* rdmsr with exception handling */
#define rdmsr_safe(msr, a, b) \
({ \
- int _err; \
- u64 _l = paravirt_read_msr_safe(msr, &_err); \
+ u64 _l; \
+ int _err = paravirt_read_msr_safe((msr), &_l); \
(*a) = (u32)_l; \
- (*b) = _l >> 32; \
+ (*b) = (u32)(_l >> 32); \
_err; \
})
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+static __always_inline int rdmsrq_safe(u32 msr, u64 *p)
{
- int err;
-
- *p = paravirt_read_msr_safe(msr, &err);
- return err;
+ return paravirt_read_msr_safe(msr, p);
}
-static inline unsigned long long paravirt_read_pmc(int counter)
+static __always_inline u64 rdpmc(int counter)
{
return PVOP_CALL1(u64, cpu.read_pmc, counter);
}
-#define rdpmc(counter, low, high) \
-do { \
- u64 _l = paravirt_read_pmc(counter); \
- low = (u32)_l; \
- high = _l >> 32; \
-} while (0)
-
-#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
-
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(cpu.alloc_ldt, ldt, entries);
@@ -283,12 +279,10 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
PVOP_VCALL2(cpu.load_tls, t, cpu);
}
-#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
PVOP_VCALL1(cpu.load_gs_index, gs);
}
-#endif
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
@@ -306,21 +300,22 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
-static inline void set_iopl_mask(unsigned mask)
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_invalidate_io_bitmap(void)
{
- PVOP_VCALL1(cpu.set_iopl_mask, mask);
+ PVOP_VCALL0(cpu.invalidate_io_bitmap);
}
-static inline void paravirt_activate_mm(struct mm_struct *prev,
- struct mm_struct *next)
+static inline void tss_update_io_bitmap(void)
{
- PVOP_VCALL2(mmu.activate_mm, prev, next);
+ PVOP_VCALL0(cpu.update_io_bitmap);
}
+#endif
-static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline void paravirt_enter_mmap(struct mm_struct *next)
{
- PVOP_VCALL2(mmu.dup_mmap, oldmm, mm);
+ PVOP_VCALL1(mmu.enter_mmap, next);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
@@ -373,157 +368,92 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val);
-
- return (pte_t) { .pte = ret };
+ return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pteval_t pte_val(pte_t pte)
{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t, mmu.pte_val,
- pte.pte, (u64)pte.pte >> 32);
- else
- ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
-
- return ret;
+ return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline pgd_t __pgd(pgdval_t val)
{
- pgdval_t ret;
-
- if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val);
-
- return (pgd_t) { ret };
+ return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pgdval_t pgd_val(pgd_t pgd)
{
- pgdval_t ret;
-
- if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val,
- pgd.pgd, (u64)pgd.pgd >> 32);
- else
- ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
-
- return ret;
+ return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
{
pteval_t ret;
- ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep);
+ ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep);
return (pte_t) { .pte = ret };
}
-static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
- if (sizeof(pteval_t) > sizeof(long))
- /* 5 arg words */
- pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte);
- else
- PVOP_VCALL4(mmu.ptep_modify_prot_commit,
- mm, addr, ptep, pte.pte);
-}
-static inline void set_pte(pte_t *ptep, pte_t pte)
-{
- if (sizeof(pteval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32);
- else
- PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
+ PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte);
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void set_pte(pte_t *ptep, pte_t pte)
{
- if (sizeof(pteval_t) > sizeof(long))
- /* 5 arg words */
- pv_ops.mmu.set_pte_at(mm, addr, ptep, pte);
- else
- PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte);
+ PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- pmdval_t val = native_pmd_val(pmd);
-
- if (sizeof(pmdval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32);
- else
- PVOP_VCALL2(mmu.set_pmd, pmdp, val);
+ PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd));
}
-#if CONFIG_PGTABLE_LEVELS >= 3
static inline pmd_t __pmd(pmdval_t val)
{
- pmdval_t ret;
-
- if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val);
-
- return (pmd_t) { ret };
+ return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}
static inline pmdval_t pmd_val(pmd_t pmd)
{
- pmdval_t ret;
-
- if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val,
- pmd.pmd, (u64)pmd.pmd >> 32);
- else
- ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
-
- return ret;
+ return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- pudval_t val = native_pud_val(pud);
-
- if (sizeof(pudval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32);
- else
- PVOP_VCALL2(mmu.set_pud, pudp, val);
+ PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud));
}
-#if CONFIG_PGTABLE_LEVELS >= 4
+
static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
- ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
+ ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
return (pud_t) { ret };
}
static inline pudval_t pud_val(pud_t pud)
{
- return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
+ return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void pud_clear(pud_t *pudp)
{
- set_pud(pudp, __pud(0));
+ set_pud(pudp, native_make_pud(0));
}
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -533,18 +463,18 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
PVOP_VCALL2(mmu.set_p4d, p4dp, val);
}
-#if CONFIG_PGTABLE_LEVELS >= 5
-
static inline p4d_t __p4d(p4dval_t val)
{
- p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
+ p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
return (p4d_t) { ret };
}
static inline p4dval_t p4d_val(p4d_t p4d)
{
- return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
+ return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -560,40 +490,15 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
} while (0)
#define pgd_clear(pgdp) do { \
- if (pgtable_l5_enabled()) \
- set_pgd(pgdp, __pgd(0)); \
+ if (pgtable_l5_enabled()) \
+ set_pgd(pgdp, native_make_pgd(0)); \
} while (0)
-#endif /* CONFIG_PGTABLE_LEVELS == 5 */
-
static inline void p4d_clear(p4d_t *p4dp)
{
- set_p4d(p4dp, __p4d(0));
+ set_p4d(p4dp, native_make_p4d(0));
}
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
-
-#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
-
-#ifdef CONFIG_X86_PAE
-/* Special-case pte-setting operations for PAE, which can't update a
- 64-bit pte atomically */
-static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- PVOP_VCALL1(mmu.pmd_clear, pmdp);
-}
-#else /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
set_pte(ptep, pte);
@@ -602,14 +507,13 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- set_pte_at(mm, addr, ptep, __pte(0));
+ set_pte(ptep, native_make_pte(0));
}
static inline void pmd_clear(pmd_t *pmdp)
{
- set_pmd(pmdp, __pmd(0));
+ set_pmd(pmdp, native_make_pmd(0));
}
-#endif /* CONFIG_X86_PAE */
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
@@ -655,7 +559,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
- PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
+ PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock,
+ "movb $0, (%%" _ASM_ARG1 ");",
+ ALT_NOT(X86_FEATURE_PVUNLOCK));
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
@@ -670,7 +576,9 @@ static __always_inline void pv_kick(int cpu)
static __always_inline bool pv_vcpu_is_preempted(long cpu)
{
- return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
+ return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu,
+ "xor %%" _ASM_AX ", %%" _ASM_AX ";",
+ ALT_NOT(X86_FEATURE_VCPUPREEMPT));
}
void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
@@ -679,16 +587,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
-#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
-
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
-
-#define PV_FLAGS_ARG "0"
-#define PV_EXTRA_CLOBBERS
-#define PV_VEXTRA_CLOBBERS
#else
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS \
@@ -709,14 +610,6 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
"pop %rsi;" \
"pop %rdx;" \
"pop %rcx;"
-
-/* We save some registers, but all of them, that's too much. We clobber all
- * caller saved registers but the argument parameter */
-#define PV_SAVE_REGS "pushq %%rdi;"
-#define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
-#define PV_FLAGS_ARG "D"
#endif
/*
@@ -732,21 +625,27 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
* functions.
*/
#define PV_THUNK_NAME(func) "__raw_callee_save_" #func
-#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \
extern typeof(func) __raw_callee_save_##func; \
\
- asm(".pushsection .text;" \
+ asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
+ ASM_FUNC_ALIGN \
PV_THUNK_NAME(func) ":" \
+ ASM_ENDBR \
FRAME_BEGIN \
PV_SAVE_ALL_CALLER_REGS \
"call " #func ";" \
PV_RESTORE_ALL_CALLER_REGS \
FRAME_END \
- "ret;" \
+ ASM_RET \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
+#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+ __PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
+
/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { __raw_callee_save_##func })
@@ -756,27 +655,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
((struct paravirt_callee_save) { func })
#ifdef CONFIG_PARAVIRT_XXL
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline unsigned long arch_local_save_flags(void)
{
- return PVOP_CALLEE0(unsigned long, irq.save_fl);
+ return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
+ ALT_NOT_XEN);
}
-static inline notrace void arch_local_irq_restore(unsigned long f)
+static __always_inline void arch_local_irq_disable(void)
{
- PVOP_VCALLEE1(irq.restore_fl, f);
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN);
}
-static inline notrace void arch_local_irq_disable(void)
+static __always_inline void arch_local_irq_enable(void)
{
- PVOP_VCALLEE0(irq.irq_disable);
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN);
}
-static inline notrace void arch_local_irq_enable(void)
-{
- PVOP_VCALLEE0(irq.irq_enable);
-}
-
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
{
unsigned long f;
@@ -803,144 +698,42 @@ static inline notrace unsigned long arch_local_irq_save(void)
#undef PVOP_CALL4
extern void default_banner(void);
+void native_pv_lock_init(void) __init;
-#else /* __ASSEMBLY__ */
-
-#define _PVSITE(ptype, ops, word, algn) \
-771:; \
- ops; \
-772:; \
- .pushsection .parainstructions,"a"; \
- .align algn; \
- word 771b; \
- .byte ptype; \
- .byte 772b-771b; \
- .popsection
-
-
-#define COND_PUSH(set, mask, reg) \
- .if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg) \
- .if ((~(set)) & mask); pop %reg; .endif
-
-#ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_RAX, rax); \
- COND_PUSH(set, CLBR_RCX, rcx); \
- COND_PUSH(set, CLBR_RDX, rdx); \
- COND_PUSH(set, CLBR_RSI, rsi); \
- COND_PUSH(set, CLBR_RDI, rdi); \
- COND_PUSH(set, CLBR_R8, r8); \
- COND_PUSH(set, CLBR_R9, r9); \
- COND_PUSH(set, CLBR_R10, r10); \
- COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_R11, r11); \
- COND_POP(set, CLBR_R10, r10); \
- COND_POP(set, CLBR_R9, r9); \
- COND_POP(set, CLBR_R8, r8); \
- COND_POP(set, CLBR_RDI, rdi); \
- COND_POP(set, CLBR_RSI, rsi); \
- COND_POP(set, CLBR_RDX, rdx); \
- COND_POP(set, CLBR_RCX, rcx); \
- COND_POP(set, CLBR_RAX, rax)
-
-#define PARA_PATCH(off) ((off) / 8)
-#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
-#define PARA_INDIRECT(addr) *addr(%rip)
-#else
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_EAX, eax); \
- COND_PUSH(set, CLBR_EDI, edi); \
- COND_PUSH(set, CLBR_ECX, ecx); \
- COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_EDX, edx); \
- COND_POP(set, CLBR_ECX, ecx); \
- COND_POP(set, CLBR_EDI, edi); \
- COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off) ((off) / 4)
-#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr) *%cs:addr
-#endif
-
-#ifdef CONFIG_PARAVIRT_XXL
-#define INTERRUPT_RETURN \
- PARA_SITE(PARA_PATCH(PV_CPU_iret), \
- ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-
-#define DISABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
-/*
- * If swapgs is used while the userspace stack is still current,
- * there's no way to call a pvop. The PV replacement *must* be
- * inlined, or the swapgs instruction must be trapped and emulated.
- */
-#define SWAPGS_UNSAFE_STACK \
- PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs)
-
-/*
- * Note: swapgs is very special, and in practise is either going to be
- * implemented with a single "swapgs" instruction or something very
- * special. Either way, we don't need to save any registers for
- * it.
- */
-#define SWAPGS \
- PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
- )
-#endif
+#ifdef CONFIG_DEBUG_ENTRY
-#define GET_CR2_INTO_RAX \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
+#define PARA_INDIRECT(addr) *addr(%rip)
-#ifdef CONFIG_PARAVIRT_XXL
-#define USERGS_SYSRET64 \
- PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
- ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
+.macro PARA_IRQ_save_fl
+ ANNOTATE_RETPOLINE_SAFE;
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
+.endm
-#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \
+ "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \
+ "pushf; pop %rax;", ALT_NOT_XEN
#endif
-#endif
-
-#endif /* CONFIG_X86_32 */
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#else /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
+
+#ifndef __ASSEMBLER__
+static inline void native_pv_lock_init(void)
+{
+}
+#endif
#endif /* !CONFIG_PARAVIRT */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef CONFIG_PARAVIRT_XXL
-static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline void paravirt_enter_mmap(struct mm_struct *mm)
{
}
#endif
@@ -950,5 +743,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
}
#endif
-#endif /* __ASSEMBLY__ */
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 488c59686a73..3502939415ad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -2,46 +2,12 @@
#ifndef _ASM_X86_PARAVIRT_TYPES_H
#define _ASM_X86_PARAVIRT_TYPES_H
-/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
-#define CLBR_EAX (1 << 0)
-#define CLBR_ECX (1 << 1)
-#define CLBR_EDX (1 << 2)
-#define CLBR_EDI (1 << 3)
+#ifdef CONFIG_PARAVIRT
-#ifdef CONFIG_X86_32
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY ((1 << 4) - 1)
-
-#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH (0)
-#else
-#define CLBR_RAX CLBR_EAX
-#define CLBR_RCX CLBR_ECX
-#define CLBR_RDX CLBR_EDX
-#define CLBR_RDI CLBR_EDI
-#define CLBR_RSI (1 << 4)
-#define CLBR_R8 (1 << 5)
-#define CLBR_R9 (1 << 6)
-#define CLBR_R10 (1 << 7)
-#define CLBR_R11 (1 << 8)
-
-#define CLBR_ANY ((1 << 9) - 1)
-
-#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
- CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG (CLBR_RAX)
-#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
-
-#endif /* X86_64 */
-
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
#include <asm/desc_defs.h>
-#include <asm/kmap_types.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
@@ -55,6 +21,7 @@ struct task_struct;
struct cpumask;
struct flush_tlb_info;
struct mmu_gather;
+struct vm_area_struct;
/*
* Wrapper type for pointers to code which uses the non-standard
@@ -67,30 +34,12 @@ struct paravirt_callee_save {
/* general info */
struct pv_info {
#ifdef CONFIG_PARAVIRT_XXL
- unsigned int kernel_rpl;
- int shared_kernel_pmd;
-
-#ifdef CONFIG_X86_64
u16 extra_user_64bit_cs; /* __USER_CS if none */
#endif
-#endif
const char *name;
};
-struct pv_init_ops {
- /*
- * Patch may replace one of the defined code sequences with
- * arbitrary code, subject to the same register constraints.
- * This generally means the code is not free to clobber any
- * registers other than EAX. The patch function should return
- * the number of bytes of code generated, as we nop pad the
- * rest in generic code.
- */
- unsigned (*patch)(u8 type, void *insnbuf,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
#ifdef CONFIG_PARAVIRT_XXL
struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -100,11 +49,6 @@ struct pv_lazy_ops {
} __no_randomize_layout;
#endif
-struct pv_time_ops {
- unsigned long long (*sched_clock)(void);
- unsigned long long (*steal_clock)(int cpu);
-} __no_randomize_layout;
-
struct pv_cpu_ops {
/* hooks for various privileged instructions */
void (*io_delay)(void);
@@ -118,11 +62,6 @@ struct pv_cpu_ops {
void (*write_cr4)(unsigned long);
-#ifdef CONFIG_X86_64
- unsigned long (*read_cr8)(void);
- void (*write_cr8)(unsigned long);
-#endif
-
/* Segment descriptor handling */
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
@@ -130,9 +69,7 @@ struct pv_cpu_ops {
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-#ifdef CONFIG_X86_64
void (*load_gs_index)(unsigned int idx);
-#endif
void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
const void *desc);
void (*write_gdt_entry)(struct desc_struct *,
@@ -144,41 +81,28 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
- void (*set_iopl_mask)(unsigned mask);
-
- void (*wbinvd)(void);
+#ifdef CONFIG_X86_IOPL_IOPERM
+ void (*invalidate_io_bitmap)(void);
+ void (*update_io_bitmap)(void);
+#endif
/* cpuid emulation, mostly so that caps bits can be disabled */
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
/* Unsafe MSR operations. These will warn or panic on failure. */
- u64 (*read_msr)(unsigned int msr);
- void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+ u64 (*read_msr)(u32 msr);
+ void (*write_msr)(u32 msr, u64 val);
/*
* Safe MSR operations.
- * read sets err to 0 or -EIO. write returns 0 or -EIO.
+ * Returns 0 or -EIO.
*/
- u64 (*read_msr_safe)(unsigned int msr, int *err);
- int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
+ int (*read_msr_safe)(u32 msr, u64 *val);
+ int (*write_msr_safe)(u32 msr, u64 val);
u64 (*read_pmc)(int counter);
- /*
- * Switch to usermode gs and return to 64-bit usermode using
- * sysret. Only used in 64-bit kernels to return to 64-bit
- * processes. Usermode register state, including %rsp, must
- * already be restored.
- */
- void (*usergs_sysret64)(void);
-
- /* Normal iret. Jump to this with the standard iret stack
- frame set up. */
- void (*iret)(void);
-
- void (*swapgs)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
#endif
@@ -187,22 +111,18 @@ struct pv_cpu_ops {
struct pv_irq_ops {
#ifdef CONFIG_PARAVIRT_XXL
/*
- * Get/set interrupt state. save_fl and restore_fl are only
- * expected to use X86_EFLAGS_IF; all other bits
- * returned from save_fl are undefined, and may be ignored by
- * restore_fl.
+ * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF;
+ * all other bits returned from save_fl are undefined.
*
* NOTE: These functions callers expect the callee to preserve
* more registers than the standard C calling convention.
*/
struct paravirt_callee_save save_fl;
- struct paravirt_callee_save restore_fl;
struct paravirt_callee_save irq_disable;
struct paravirt_callee_save irq_enable;
-
+#endif
void (*safe_halt)(void);
void (*halt)(void);
-#endif
} __no_randomize_layout;
struct pv_mmu_ops {
@@ -210,26 +130,22 @@ struct pv_mmu_ops {
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_one_user)(unsigned long addr);
- void (*flush_tlb_others)(const struct cpumask *cpus,
- const struct flush_tlb_info *info);
-
- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+ void (*flush_tlb_multi)(const struct cpumask *cpus,
+ const struct flush_tlb_info *info);
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
+ void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
#ifdef CONFIG_PARAVIRT_XXL
- unsigned long (*read_cr2)(void);
+ struct paravirt_callee_save read_cr2;
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
void (*write_cr3)(unsigned long);
- /* Hooks for intercepting the creation/use of an mm_struct. */
- void (*activate_mm)(struct mm_struct *prev,
- struct mm_struct *next);
- void (*dup_mmap)(struct mm_struct *oldmm,
- struct mm_struct *mm);
+ /* Hook for intercepting the creation/use of an mm_struct. */
+ void (*enter_mmap)(struct mm_struct *mm);
/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
@@ -250,13 +166,11 @@ struct pv_mmu_ops {
/* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval);
- void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
- pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+ pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep);
- void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+ void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte);
struct paravirt_callee_save pte_val;
@@ -265,36 +179,20 @@ struct pv_mmu_ops {
struct paravirt_callee_save pgd_val;
struct paravirt_callee_save make_pgd;
-#if CONFIG_PGTABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
- void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
- void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
- void (*pmd_clear)(pmd_t *pmdp);
-
-#endif /* CONFIG_X86_PAE */
-
void (*set_pud)(pud_t *pudp, pud_t pudval);
struct paravirt_callee_save pmd_val;
struct paravirt_callee_save make_pmd;
-#if CONFIG_PGTABLE_LEVELS >= 4
struct paravirt_callee_save pud_val;
struct paravirt_callee_save make_pud;
void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
-#if CONFIG_PGTABLE_LEVELS >= 5
struct paravirt_callee_save p4d_val;
struct paravirt_callee_save make_p4d;
void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
-
-#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
-
-#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
struct pv_lazy_ops lazy_mode;
@@ -328,8 +226,6 @@ struct pv_lock_ops {
* number for each function using the offset which we use to indicate
* what to patch. */
struct paravirt_patch_template {
- struct pv_init_ops init;
- struct pv_time_ops time;
struct pv_cpu_ops cpu;
struct pv_irq_ops irq;
struct pv_mmu_ops mmu;
@@ -339,60 +235,22 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
-#define PARAVIRT_PATCH(x) \
- (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op) \
- [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(pv_ops.op))
-#define paravirt_clobber(clobber) \
- [paravirt_clobber] "i" (clobber)
-
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber) \
- "771:\n\t" insn_string "\n" "772:\n" \
- ".pushsection .parainstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR " 771b\n" \
- " .byte " type "\n" \
- " .byte 772b-771b\n" \
- " .short " clobber "\n" \
- ".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string) \
- _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
-
-/* Simple instruction patching code. */
-#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-
-#define DEF_NATIVE(ops, name, code) \
- __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
- unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
- const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
-
-int paravirt_disable_iospace(void);
+#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
/*
* This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
+ *
+ * Since alternatives run after enabling CET/IBT -- the latter setting/clearing
+ * capabilities and the former requiring all capabilities being finalized --
+ * these indirect calls are subject to IBT and the paravirt stubs should have
+ * ENDBR on.
+ *
+ * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs
+ * don't need to bother with CFI prefixes.
*/
#define PARAVIRT_CALL \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ ANNOTATE_RETPOLINE_SAFE "\n\t" \
+ "call *%[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
@@ -405,7 +263,7 @@ int paravirt_disable_iospace(void);
* Unfortunately, this is a relatively slow operation for modern CPUs,
* because it cannot necessarily determine what the destination
* address is. In this case, the address is a runtime constant, so at
- * the very least we can patch the call to e a simple direct call, or
+ * the very least we can patch the call to a simple direct call, or,
* ideally, patch an inline implementation into the callsite. (Direct
* calls are essentially free, because the call and return addresses
* are completely predictable.)
@@ -416,18 +274,12 @@ int paravirt_disable_iospace(void);
* on the stack. All caller-save registers (eax,edx,ecx) are expected
* to be modified (either clobbered or used for return values).
* X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * conventions, returning at %rax, with parameters going in %rdi, %rsi,
* %rdx, and %rcx. Note that for this reason, x86_64 does not need any
* special handling for dealing with 4 arguments, unlike i386.
- * However, x86_64 also have to clobber all caller saved registers, which
+ * However, x86_64 also has to clobber all caller saved registers, which
* unfortunately, are quite a bit (r8 - r11)
*
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
* Unfortunately there's no way to get gcc to generate the args setup
* for the call, and then allow the call itself to be generated by an
* inline asm. Because of this, we must do the complete arg setup and
@@ -437,33 +289,31 @@ int paravirt_disable_iospace(void);
* There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
* It could be extended to more arguments, but there would be little
* to be gained from that. For each number of arguments, there are
- * the two VCALL and CALL variants for void and non-void functions.
+ * two VCALL and CALL variants for void and non-void functions.
*
* When there is a return value, the invoker of the macro must specify
* the return type. The macro then uses sizeof() on that type to
- * determine whether its a 32 or 64 bit value, and places the return
+ * determine whether it's a 32 or 64 bit value and places the return
* in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * 64-bit). For x86_64 machines, it just returns in %rax regardless of
* the return value size.
*
- * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
+ * 64-bit arguments are passed as a pair of adjacent 32-bit arguments;
* i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
* in low,high order
*
* Small structures are passed and returned in registers. The macro
* calling convention can't directly deal with this, so the wrapper
- * functions must do this.
+ * functions must do it.
*
* These PVOP_* macros are only defined within this header. This
* means that all uses must be wrapped in inline functions. This also
* makes sure the incoming and outgoing types are always correct.
*/
#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
@@ -479,12 +329,10 @@ int paravirt_disable_iospace(void);
#define VEXTRA_CLOBBERS
#else /* CONFIG_X86_64 */
/* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
@@ -495,8 +343,17 @@ int paravirt_disable_iospace(void);
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
-/* void functions are still allowed [re]ax for scratch */
+/*
+ * void functions are still allowed [re]ax for scratch.
+ *
+ * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved
+ * registers. Make sure we model this with the appropriate clobbers.
+ */
+#ifdef CONFIG_ZERO_CALL_USED_REGS
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), PVOP_VCALL_CLOBBERS
+#else
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+#endif
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
@@ -509,183 +366,165 @@ int paravirt_disable_iospace(void);
#define PVOP_TEST_NULL(op) ((void)pv_ops.op)
#endif
-#define PVOP_RETMASK(rettype) \
+#define PVOP_RETVAL(rettype) \
({ unsigned long __mask = ~0UL; \
+ BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \
switch (sizeof(rettype)) { \
case 1: __mask = 0xffUL; break; \
case 2: __mask = 0xffffUL; break; \
case 4: __mask = 0xffffffffUL; break; \
default: break; \
} \
- __mask; \
+ __mask & __eax; \
})
-
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
- pre, post, ...) \
+/*
+ * Use alternative patching for paravirt calls:
+ * - For replacing an indirect call with a direct one, use the "normal"
+ * ALTERNATIVE() macro with the indirect call as the initial code sequence,
+ * which will be replaced with the related direct call by using the
+ * ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ * - In case the replacement is either a direct call or a short code sequence
+ * depending on a feature bit, the ALTERNATIVE_2() macro is being used.
+ * The indirect call is the initial code sequence again, while the special
+ * code sequence is selected with the specified feature bit. In case the
+ * feature is not active, the direct call is used as above via the
+ * ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ */
+#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \
({ \
- rettype __ret; \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- /* This is 32-bit specific, but is okay in 64-bit */ \
- /* since this condition will never hold */ \
- if (sizeof(rettype) > sizeof(unsigned long)) { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)((((u64)__edx) << 32) | __eax); \
- } else { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
- } \
- __ret; \
+ asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \
+ ALT_CALL_ALWAYS) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
+ : paravirt_ptr(op), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
- EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
- PVOP_CALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr, \
+ extra_clbr, ...) \
({ \
- PVOP_VCALL_ARGS; \
+ PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
+ asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \
+ ALT_CALL_INSTR, ALT_CALL_ALWAYS, \
+ alt, cond) \
: call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
+ : paravirt_ptr(op), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_VCALL(op, pre, post, ...) \
- ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
- VEXTRA_CLOBBERS, \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_CALL(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op, \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
-#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
- ____PVOP_VCALL(op.func, CLBR_RET_REG, \
- PVOP_VCALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
+#define __PVOP_CALLEESAVE(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, \
+ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
+#define __PVOP_VCALL(op, ...) \
+ (void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALL(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op, alt, cond, \
+ PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(op, ...) \
+ (void)____PVOP_CALL(, op.func, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op.func, alt, cond, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
#define PVOP_CALL0(rettype, op) \
- __PVOP_CALL(rettype, op, "", "")
+ __PVOP_CALL(rettype, op)
#define PVOP_VCALL0(op) \
- __PVOP_VCALL(op, "", "")
+ __PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond)
#define PVOP_CALLEE0(rettype, op) \
- __PVOP_CALLEESAVE(rettype, op, "", "")
+ __PVOP_CALLEESAVE(rettype, op)
#define PVOP_VCALLEE0(op) \
- __PVOP_VCALLEESAVE(op, "", "")
+ __PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond)
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALLEE1(rettype, op, arg1) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALLEE1(op, arg1) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", \
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
-
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
- PARAVIRT_LAZY_NONE,
- PARAVIRT_LAZY_MMU,
- PARAVIRT_LAZY_CPU,
-};
-
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-void paravirt_flush_lazy_mmu(void);
-
-void _paravirt_nop(void);
+unsigned long paravirt_ret0(void);
+#ifdef CONFIG_PARAVIRT_XXL
u64 _paravirt_ident_64(u64);
+unsigned long pv_native_save_fl(void);
+void pv_native_irq_disable(void);
+void pv_native_irq_enable(void);
+unsigned long pv_native_read_cr2(void);
+#endif
-#define paravirt_nop ((void *)_paravirt_nop)
-
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 instrtype; /* type of this instruction */
- u8 len; /* length of original instruction */
-};
+#define paravirt_nop ((void *)nop_func)
-extern struct paravirt_patch_site __parainstructions[],
- __parainstructions_end[];
+#endif /* __ASSEMBLER__ */
-#endif /* __ASSEMBLY__ */
+#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV)
+#endif /* CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
- enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
- enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pc-conf-reg.h b/arch/x86/include/asm/pc-conf-reg.h
new file mode 100644
index 000000000000..56bceceacf5f
--- /dev/null
+++ b/arch/x86/include/asm/pc-conf-reg.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for the configuration register space at port I/O locations
+ * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec,
+ * Cyrix CPUs, numerous chipsets.
+ */
+#ifndef _ASM_X86_PC_CONF_REG_H
+#define _ASM_X86_PC_CONF_REG_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define PC_CONF_INDEX 0x22
+#define PC_CONF_DATA 0x23
+
+#define PC_CONF_MPS_IMCR 0x70
+
+extern raw_spinlock_t pc_conf_lock;
+
+static inline u8 pc_conf_get(u8 reg)
+{
+ outb(reg, PC_CONF_INDEX);
+ return inb(PC_CONF_DATA);
+}
+
+static inline void pc_conf_set(u8 reg, u8 data)
+{
+ outb(reg, PC_CONF_INDEX);
+ outb(data, PC_CONF_DATA);
+}
+
+#endif /* _ASM_X86_PC_CONF_REG_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 662963681ea6..b3ab80a03365 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,11 +7,9 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <linux/numa.h>
#include <asm/io.h>
-#include <asm/pat.h>
-#include <asm/x86_init.h>
-
-#ifdef __KERNEL__
+#include <asm/memtype.h>
struct pci_sysdata {
int domain; /* PCI domain */
@@ -22,11 +20,11 @@ struct pci_sysdata {
#ifdef CONFIG_X86_64
void *iommu; /* IOMMU private data */
#endif
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
void *fwnode; /* IRQ domain for MSI assignment */
#endif
#if IS_ENABLED(CONFIG_VMD)
- bool vmd_domain; /* True if in Intel VMD domain */
+ struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */
#endif
};
@@ -34,14 +32,17 @@ extern int pci_routeirq;
extern int noioapicquirk;
extern int noioapicreroute;
+static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus)
+{
+ return bus->sysdata;
+}
+
#ifdef CONFIG_PCI
#ifdef CONFIG_PCI_DOMAINS
static inline int pci_domain_nr(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->domain;
+ return to_pci_sysdata(bus)->domain;
}
static inline int pci_proc_domain(struct pci_bus *bus)
@@ -50,27 +51,23 @@ static inline int pci_proc_domain(struct pci_bus *bus)
}
#endif
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->fwnode;
+ return to_pci_sysdata(bus)->fwnode;
}
#define pci_root_bus_fwnode _pci_root_bus_fwnode
#endif
+#if IS_ENABLED(CONFIG_VMD)
static inline bool is_vmd(struct pci_bus *bus)
{
-#if IS_ENABLED(CONFIG_VMD)
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->vmd_domain;
-#else
- return false;
-#endif
+ return to_pci_sysdata(bus)->vmd_dev != NULL;
}
+#else
+#define is_vmd(bus) false
+#endif /* CONFIG_VMD */
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
@@ -94,6 +91,7 @@ void pcibios_scan_root(int bus);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev);
#define HAVE_PCI_MMAP
#define arch_can_pci_mmap_wc() pat_enabled()
@@ -107,32 +105,11 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
-#ifdef CONFIG_PCI_MSI
-/* implemented in arch/x86/kernel/apic/io_apic. */
-struct msi_desc;
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
-void native_teardown_msi_irq(unsigned int irq);
-void native_restore_msi_irqs(struct pci_dev *dev);
-#else
-#define native_setup_msi_irqs NULL
-#define native_teardown_msi_irq NULL
-#endif
-#endif /* __KERNEL__ */
-
-#ifdef CONFIG_X86_64
-#include <asm/pci_64.h>
-#endif
-
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
static inline int __pcibus_to_node(const struct pci_bus *bus)
{
- const struct pci_sysdata *sd = bus->sysdata;
-
- return sd->node;
+ return to_pci_sysdata(bus)->node;
}
static inline const struct cpumask *
@@ -141,21 +118,9 @@ cpumask_of_pcibus(const struct pci_bus *bus)
int node;
node = __pcibus_to_node(bus);
- return (node == -1) ? cpu_online_mask :
+ return (node == NUMA_NO_NODE) ? cpu_online_mask :
cpumask_of_node(node);
}
#endif
-struct pci_setup_rom {
- struct setup_data data;
- uint16_t vendor;
- uint16_t devid;
- uint64_t pcilen;
- unsigned long segment;
- unsigned long bus;
- unsigned long device;
- unsigned long function;
- uint8_t romdata[0];
-};
-
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
deleted file mode 100644
index f5411de0ae11..000000000000
--- a/arch/x86/include/asm/pci_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PCI_64_H
-#define _ASM_X86_PCI_64_H
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_CALGARY_IOMMU
-static inline void *pci_iommu(struct pci_bus *bus)
-{
- struct pci_sysdata *sd = bus->sysdata;
- return sd->iommu;
-}
-
-static inline void set_pci_iommu(struct pci_bus *bus, void *val)
-{
- struct pci_sysdata *sd = bus->sysdata;
- sd->iommu = val;
-}
-#endif /* CONFIG_CALGARY_IOMMU */
-
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 value);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 73bb404f4d2a..70533fdcbf02 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -5,7 +5,10 @@
* (c) 1999 Martin Mares <mj@ucw.cz>
*/
+#include <linux/errno.h>
+#include <linux/init.h>
#include <linux/ioport.h>
+#include <linux/spinlock.h>
#undef DEBUG
@@ -39,6 +42,8 @@ do { \
#define PCI_ROOT_NO_CRS 0x100000
#define PCI_NOASSIGN_BARS 0x200000
#define PCI_BIG_ROOT_WINDOW 0x400000
+#define PCI_USE_E820 0x800000
+#define PCI_NO_E820 0x1000000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
@@ -64,6 +69,8 @@ void pcibios_scan_specific_bus(int busn);
/* pci-irq.c */
+struct pci_dev;
+
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -87,7 +94,16 @@ struct irq_routing_table {
u32 miniport_data; /* Crap */
u8 rfu[11];
u8 checksum; /* Modulo 256 checksum must give 0 */
- struct irq_info slots[0];
+ struct irq_info slots[];
+} __attribute__((packed));
+
+struct irt_routing_table {
+ u32 signature; /* IRT_SIGNATURE should be here */
+ u8 size; /* Number of entries provided */
+ u8 used; /* Number of entries actually used */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to
+ PCI usage */
+ struct irq_info slots[];
} __attribute__((packed));
extern unsigned int pcibios_irq_mask;
@@ -114,9 +130,20 @@ extern const struct pci_raw_ops pci_direct_conf1;
extern bool port_cf9_safe;
/* arch_initcall level */
+#ifdef CONFIG_PCI_DIRECT
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
+#else
+static inline int pci_direct_probe(void) { return -1; }
+static inline void pci_direct_init(int type) { }
+#endif
+
+#ifdef CONFIG_PCI_BIOS
extern void pci_pcbios_init(void);
+#else
+static inline void pci_pcbios_init(void) { }
+#endif
+
extern void __init dmi_check_pciprobe(void);
extern void __init dmi_check_skip_isa_align(void);
@@ -221,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
# define x86_default_pci_init_irq NULL
# define x86_default_pci_fixup_irqs NULL
#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
+extern bool pci_use_e820;
+#else
+#define pci_use_e820 false
+#endif
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..725d0eff7acd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -3,550 +3,604 @@
#define _ASM_X86_PERCPU_H
#ifdef CONFIG_X86_64
-#define __percpu_seg gs
-#define __percpu_mov_op movq
+# define __percpu_seg gs
+# define __percpu_rel (%rip)
#else
-#define __percpu_seg fs
-#define __percpu_mov_op movl
+# define __percpu_seg fs
+# define __percpu_rel
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
#ifdef CONFIG_SMP
-#define PER_CPU(var, reg) \
- __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
- lea var(reg), reg
-#define PER_CPU_VAR(var) %__percpu_seg:var
-#else /* ! SMP */
-#define PER_CPU(var, reg) __percpu_mov_op $var, reg
-#define PER_CPU_VAR(var) var
-#endif /* SMP */
-
-#ifdef CONFIG_X86_64_SMP
-#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
+# define __percpu %__percpu_seg:
#else
-#define INIT_PER_CPU_VAR(var) var
+# define __percpu
#endif
-#else /* ...!ASSEMBLY */
+#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
-#include <linux/kernel.h>
+#else /* !__ASSEMBLY__: */
+
+#include <linux/args.h>
+#include <linux/bits.h>
+#include <linux/build_bug.h>
#include <linux/stringify.h>
+#include <asm/asm.h>
#ifdef CONFIG_SMP
-#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset this_cpu_read(this_cpu_off)
+
+#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
+
+#ifdef CONFIG_CC_HAS_NAMED_AS
+
+#ifdef __CHECKER__
+# define __seg_gs __attribute__((address_space(__seg_gs)))
+# define __seg_fs __attribute__((address_space(__seg_fs)))
+#endif
+
+#define __percpu_prefix
+#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
+
+#else /* !CONFIG_CC_HAS_NAMED_AS: */
+
+#define __percpu_prefix __force_percpu_prefix
+#define __percpu_seg_override
+
+#endif /* CONFIG_CC_HAS_NAMED_AS */
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
*/
-#define arch_raw_cpu_ptr(ptr) \
-({ \
- unsigned long tcp_ptr__; \
- asm volatile("add " __percpu_arg(1) ", %0" \
- : "=r" (tcp_ptr__) \
- : "m" (this_cpu_off), "0" (ptr)); \
- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
+#define __my_cpu_offset this_cpu_read(this_cpu_off)
+
+/*
+ * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
+ * kernel, because games are played with CONFIG_X86_64 there and
+ * sizeof(this_cpu_off) becames 4.
+ */
+#ifndef BUILD_VDSO32_64
+#define arch_raw_cpu_ptr(_ptr) \
+({ \
+ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \
+ \
+ tcp_ptr__ += (__force unsigned long)(_ptr); \
+ (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \
})
#else
-#define __percpu_prefix ""
+#define arch_raw_cpu_ptr(_ptr) \
+({ \
+ BUILD_BUG(); \
+ (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0; \
+})
#endif
+#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel
+
+#else /* !CONFIG_SMP: */
+
+#define __force_percpu_prefix
+#define __percpu_prefix
+#define __percpu_seg_override
+
+#define PER_CPU_VAR(var) (var)__percpu_rel
+
+#endif /* CONFIG_SMP */
+
+#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL)
+# define __my_cpu_type(var) typeof(var)
+# define __my_cpu_ptr(ptr) (ptr)
+# define __my_cpu_var(var) (var)
+
+# define __percpu_qual __percpu_seg_override
+#else
+# define __my_cpu_type(var) typeof(var) __percpu_seg_override
+# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
+# define __my_cpu_var(var) (*__my_cpu_ptr(&(var)))
+#endif
+
+#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
#define __percpu_arg(x) __percpu_prefix "%" #x
/*
- * Initialized pointers to per-cpu variables needed for the boot
- * processor need to use these macros to get the proper address
- * offset from __per_cpu_load on SMP.
- *
- * There also must be an entry in vmlinux_64.lds.S
+ * For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though).
*/
-#define DECLARE_INIT_PER_CPU(var) \
- extern typeof(var) init_per_cpu_var(var)
-#ifdef CONFIG_X86_64_SMP
-#define init_per_cpu_var(var) init_per_cpu__##var
-#else
-#define init_per_cpu_var(var) var
-#endif
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(op, var, val) \
-do { \
- typedef typeof(var) pto_T__; \
- if (0) { \
- pto_T__ pto_tmp__; \
- pto_tmp__ = (val); \
- (void)pto_tmp__; \
- } \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pto_T__)(val))); \
- break; \
- case 2: \
- asm(op "w %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 4: \
- asm(op "l %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 8: \
- asm(op "q %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pto_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op_1(op) op "b "
+#define __pcpu_op_2(op) op "w "
+#define __pcpu_op_4(op) op "l "
+#define __pcpu_op_8(op) op "q "
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+
+#define __raw_cpu_read(size, qual, pcp) \
+({ \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
+})
+
+#define __raw_cpu_write(size, qual, pcp, val) \
+do { \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
+} while (0)
+
+#define __raw_cpu_read_const(pcp) __raw_cpu_read(, , pcp)
+
+#else /* !CONFIG_USE_X86_SEG_SUPPORT: */
+
+#define __raw_cpu_read(size, qual, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm qual (__pcpu_op_##size("mov") \
+ __percpu_arg([var]) ", %[val]" \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "m" (__my_cpu_var(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
+#define __raw_cpu_write(size, qual, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
+ if (0) { \
+ TYPEOF_UNQUAL(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual (__pcpu_op_##size("mov") "%[val], " \
+ __percpu_arg([var]) \
+ : [var] "=m" (__my_cpu_var(_var)) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
/*
- * Generate a percpu add to memory instruction and optimize code
+ * The generic per-CPU infrastrucutre is not suitable for
+ * reading const-qualified variables.
+ */
+#define __raw_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
+
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
+#define __raw_cpu_read_stable(size, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm(__pcpu_op_##size("mov") \
+ __force_percpu_arg(a[var]) ", %[val]" \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "i" (&(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
+#define percpu_unary_op(size, qual, op, _var) \
+({ \
+ asm qual (__pcpu_op_##size(op) __percpu_arg([var]) \
+ : [var] "+m" (__my_cpu_var(_var))); \
+})
+
+#define percpu_binary_op(size, qual, op, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
+ if (0) { \
+ TYPEOF_UNQUAL(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var]) \
+ : [var] "+m" (__my_cpu_var(_var)) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
+} while (0)
+
+/*
+ * Generate a per-CPU add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(var, val) \
+#define percpu_add_op(size, qual, var, val) \
do { \
- typedef typeof(var) pao_T__; \
- const int pao_ID__ = (__builtin_constant_p(val) && \
- ((val) == 1 || (val) == -1)) ? \
- (int)(val) : 0; \
+ const int pao_ID__ = \
+ (__builtin_constant_p(val) && \
+ ((val) == 1 || \
+ (val) == (typeof(val))-1)) ? (int)(val) : 0; \
+ \
if (0) { \
- pao_T__ pao_tmp__; \
+ TYPEOF_UNQUAL(var) pao_tmp__; \
pao_tmp__ = (val); \
(void)pao_tmp__; \
} \
- switch (sizeof(var)) { \
- case 1: \
- if (pao_ID__ == 1) \
- asm("incb "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm("decb "__percpu_arg(0) : "+m" (var)); \
- else \
- asm("addb %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pao_T__)(val))); \
- break; \
- case 2: \
- if (pao_ID__ == 1) \
- asm("incw "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm("decw "__percpu_arg(0) : "+m" (var)); \
- else \
- asm("addw %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 4: \
- if (pao_ID__ == 1) \
- asm("incl "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm("decl "__percpu_arg(0) : "+m" (var)); \
- else \
- asm("addl %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 8: \
- if (pao_ID__ == 1) \
- asm("incq "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm("decq "__percpu_arg(0) : "+m" (var)); \
- else \
- asm("addq %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pao_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+ if (pao_ID__ == 1) \
+ percpu_unary_op(size, qual, "inc", var); \
+ else if (pao_ID__ == -1) \
+ percpu_unary_op(size, qual, "dec", var); \
+ else \
+ percpu_binary_op(size, qual, "add", var, val); \
} while (0)
-#define percpu_from_op(op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm volatile(op "b "__percpu_arg(1)",%0"\
- : "=q" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 2: \
- asm volatile(op "w "__percpu_arg(1)",%0"\
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 4: \
- asm volatile(op "l "__percpu_arg(1)",%0"\
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 8: \
- asm volatile(op "q "__percpu_arg(1)",%0"\
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
-})
-
-#define percpu_stable_op(op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b "__percpu_arg(P1)",%0" \
- : "=q" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 2: \
- asm(op "w "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 4: \
- asm(op "l "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 8: \
- asm(op "q "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
-})
-
-#define percpu_unary_op(op, var) \
-({ \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 2: \
- asm(op "w "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 4: \
- asm(op "l "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 8: \
- asm(op "q "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
-})
-
/*
* Add return operation
*/
-#define percpu_add_return_op(var, val) \
+#define percpu_add_return_op(size, qual, _var, _val) \
({ \
- typeof(var) paro_ret__ = val; \
- switch (sizeof(var)) { \
- case 1: \
- asm("xaddb %0, "__percpu_arg(1) \
- : "+q" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 2: \
- asm("xaddw %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 4: \
- asm("xaddl %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 8: \
- asm("xaddq %0, "__percpu_arg(1) \
- : "+re" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- paro_ret__ += val; \
- paro_ret__; \
+ __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
+ \
+ asm qual (__pcpu_op_##size("xadd") "%[tmp], " \
+ __percpu_arg([var]) \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__), \
+ [var] "+m" (__my_cpu_var(_var)) \
+ : : "memory"); \
+ (typeof(_var))(unsigned long) (paro_tmp__ + _val); \
})
/*
- * xchg is implemented using cmpxchg without a lock prefix. xchg is
- * expensive due to the implied lock prefix. The processor cannot prefetch
- * cachelines if xchg is used.
+ * raw_cpu_xchg() can use a load-store since
+ * it is not required to be IRQ-safe.
*/
-#define percpu_xchg_op(var, nval) \
+#define raw_percpu_xchg_op(_var, _nval) \
({ \
- typeof(var) pxo_ret__; \
- typeof(var) pxo_new__ = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm("\n\tmov "__percpu_arg(1)",%%al" \
- "\n1:\tcmpxchgb %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "q" (pxo_new__) \
- : "memory"); \
- break; \
- case 2: \
- asm("\n\tmov "__percpu_arg(1)",%%ax" \
- "\n1:\tcmpxchgw %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- case 4: \
- asm("\n\tmov "__percpu_arg(1)",%%eax" \
- "\n1:\tcmpxchgl %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- case 8: \
- asm("\n\tmov "__percpu_arg(1)",%%rax" \
- "\n1:\tcmpxchgq %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pxo_ret__; \
+ TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var); \
+ \
+ raw_cpu_write(_var, _nval); \
+ \
+ pxo_old__; \
})
/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
+ * this_cpu_xchg() is implemented using CMPXCHG without a LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
*/
-#define percpu_cmpxchg_op(var, oval, nval) \
+#define this_percpu_xchg_op(_var, _nval) \
({ \
- typeof(var) pco_ret__; \
- typeof(var) pco_old__ = (oval); \
- typeof(var) pco_new__ = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm("cmpxchgb %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "q" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 2: \
- asm("cmpxchgw %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 4: \
- asm("cmpxchgl %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 8: \
- asm("cmpxchgq %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pco_ret__; \
+ TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var); \
+ \
+ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
+ \
+ pxo_old__; \
})
/*
- * this_cpu_read() makes gcc load the percpu variable every time it is
- * accessed while this_cpu_read_stable() allows the value to be cached.
- * this_cpu_read_stable() is more efficient and can be used if its value
- * is guaranteed to be valid across cpus. The current users include
- * get_current() and get_thread_info() both of which are actually
- * per-thread variables implemented as per-cpu variables and thus
- * stable for the duration of the respective task.
+ * CMPXCHG has no such implied lock semantics as a result it is much
+ * more efficient for CPU-local operations.
*/
-#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-
-#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \
({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- asm volatile("cmpxchg8b "__percpu_arg(1) \
- CC_SET(z) \
- : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
- : "b" (__n1), "c" (__n2)); \
- __ret; \
+ __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
+ __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
+ \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
+ : [oval] "+a" (pco_old__), \
+ [var] "+m" (__my_cpu_var(_var)) \
+ : [nval] __pcpu_reg_##size(, pco_new__) \
+ : "memory"); \
+ \
+ (typeof(_var))(unsigned long) pco_old__; \
})
-#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#endif /* CONFIG_X86_CMPXCHG64 */
+#define percpu_try_cmpxchg_op(size, qual, _var, _ovalp, _nval) \
+({ \
+ bool success; \
+ __pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \
+ __pcpu_type_##size pco_old__ = *pco_oval__; \
+ __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
+ \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
+ : "=@ccz" (success), \
+ [oval] "+a" (pco_old__), \
+ [var] "+m" (__my_cpu_var(_var)) \
+ : [nval] __pcpu_reg_##size(, pco_new__) \
+ : "memory"); \
+ if (unlikely(!success)) \
+ *pco_oval__ = pco_old__; \
+ \
+ likely(success); \
+})
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+
+#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
+({ \
+ union { \
+ u64 var; \
+ struct { \
+ u32 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
+ \
+ old__.var; \
+})
+
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \
+({ \
+ bool success; \
+ u64 *_oval = (u64 *)(_ovalp); \
+ union { \
+ u64 var; \
+ struct { \
+ u32 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = *_oval; \
+ new__.var = _nval; \
+ \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
+ if (unlikely(!success)) \
+ *_oval = old__.var; \
+ \
+ likely(success); \
+})
+
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
+
+#endif /* defined(CONFIG_X86_32) && !defined(CONFIG_UML) */
-/*
- * Per cpu atomic 64 bit operations are only available under 64 bit.
- * 32 bit must fall back to generic operations.
- */
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
-/*
- * Pretty complex macro to generate cmpxchg16 instruction. The instruction
- * is not supported on early AMD64 processors so we must be able to emulate
- * it in software. The address used in the cmpxchg16 instruction must be
- * aligned to a 16 byte boundary.
- */
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval);
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
+
+#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \
({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
- "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
- X86_FEATURE_CX16, \
- ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
- "+m" (pcp2), "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
- __ret; \
+ union { \
+ u128 var; \
+ struct { \
+ u64 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
+ \
+ old__.var; \
})
-#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
+#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
-#endif
+#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \
+({ \
+ bool success; \
+ u128 *_oval = (u128 *)(_ovalp); \
+ union { \
+ u128 var; \
+ struct { \
+ u64 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = *_oval; \
+ new__.var = _nval; \
+ \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
+ if (unlikely(!success)) \
+ *_oval = old__.var; \
+ \
+ likely(success); \
+})
-static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
- const unsigned long __percpu *addr)
-{
- unsigned long __percpu *a =
- (unsigned long __percpu *)addr + nr / BITS_PER_LONG;
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
+
+#endif /* CONFIG_X86_64 */
+
+#define raw_cpu_read_1(pcp) __raw_cpu_read(1, , pcp)
+#define raw_cpu_read_2(pcp) __raw_cpu_read(2, , pcp)
+#define raw_cpu_read_4(pcp) __raw_cpu_read(4, , pcp)
+#define raw_cpu_write_1(pcp, val) __raw_cpu_write(1, , pcp, val)
+#define raw_cpu_write_2(pcp, val) __raw_cpu_write(2, , pcp, val)
+#define raw_cpu_write_4(pcp, val) __raw_cpu_write(4, , pcp, val)
+
+#define this_cpu_read_1(pcp) __raw_cpu_read(1, volatile, pcp)
+#define this_cpu_read_2(pcp) __raw_cpu_read(2, volatile, pcp)
+#define this_cpu_read_4(pcp) __raw_cpu_read(4, volatile, pcp)
+#define this_cpu_write_1(pcp, val) __raw_cpu_write(1, volatile, pcp, val)
+#define this_cpu_write_2(pcp, val) __raw_cpu_write(2, volatile, pcp, val)
+#define this_cpu_write_4(pcp, val) __raw_cpu_write(4, volatile, pcp, val)
+
+#define this_cpu_read_stable_1(pcp) __raw_cpu_read_stable(1, pcp)
+#define this_cpu_read_stable_2(pcp) __raw_cpu_read_stable(2, pcp)
+#define this_cpu_read_stable_4(pcp) __raw_cpu_read_stable(4, pcp)
+
+#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_binary_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_binary_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_binary_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_binary_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_binary_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_binary_op(4, , "or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_binary_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_binary_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_binary_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_binary_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_binary_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_binary_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval)
+/*
+ * Per-CPU atomic 64-bit operations are only available under 64-bit kernels.
+ * 32-bit kernels must fall back to generic operations.
+ */
#ifdef CONFIG_X86_64
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
-#else
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
-#endif
-}
-static inline bool x86_this_cpu_variable_test_bit(int nr,
- const unsigned long __percpu *addr)
-{
- bool oldbit;
+#define raw_cpu_read_8(pcp) __raw_cpu_read(8, , pcp)
+#define raw_cpu_write_8(pcp, val) __raw_cpu_write(8, , pcp, val)
+
+#define this_cpu_read_8(pcp) __raw_cpu_read(8, volatile, pcp)
+#define this_cpu_write_8(pcp, val) __raw_cpu_write(8, volatile, pcp, val)
+
+#define this_cpu_read_stable_8(pcp) __raw_cpu_read_stable(8, pcp)
+
+#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_binary_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_binary_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
+
+#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_binary_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_binary_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
+
+#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp)
+
+#else /* !CONFIG_X86_64: */
- asm volatile("btl "__percpu_arg(2)",%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
- : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+/* There is no generic 64-bit read stable operation for 32-bit targets. */
+#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
- return oldbit;
-}
+#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp)
-#define x86_this_cpu_test_bit(nr, addr) \
- (__builtin_constant_p((nr)) \
- ? x86_this_cpu_constant_test_bit((nr), (addr)) \
- : x86_this_cpu_variable_test_bit((nr), (addr)))
+#endif /* CONFIG_X86_64 */
+
+#define this_cpu_read_const(pcp) __raw_cpu_read_const(pcp)
+
+/*
+ * this_cpu_read() makes the compiler load the per-CPU variable every time
+ * it is accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across CPUs. The current users include
+ * current_task and cpu_current_top_of_stack, both of which are
+ * actually per-thread variables implemented as per-CPU variables and
+ * thus stable for the duration of the respective task.
+ */
+#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+
+#define x86_this_cpu_constant_test_bit(_nr, _var) \
+({ \
+ unsigned long __percpu *addr__ = \
+ (unsigned long __percpu *)&(_var) + BIT_WORD(_nr); \
+ \
+ !!(BIT_MASK(_nr) & raw_cpu_read(*addr__)); \
+})
+
+#define x86_this_cpu_variable_test_bit(_nr, _var) \
+({ \
+ bool oldbit; \
+ \
+ asm volatile("btl %[nr], " __percpu_arg([var]) \
+ : "=@ccc" (oldbit) \
+ : [var] "m" (__my_cpu_var(_var)), \
+ [nr] "rI" (_nr)); \
+ oldbit; \
+})
+
+#define x86_this_cpu_test_bit(_nr, _var) \
+ (__builtin_constant_p(_nr) \
+ ? x86_this_cpu_constant_test_bit(_nr, _var) \
+ : x86_this_cpu_variable_test_bit(_nr, _var))
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_SMP
@@ -568,46 +622,47 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
- DECLARE_PER_CPU(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
+ DECLARE_PER_CPU(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
- DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+ DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define early_per_cpu_ptr(_name) (_name##_early_ptr)
-#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
-#define early_per_cpu(_name, _cpu) \
- *(early_per_cpu_ptr(_name) ? \
- &early_per_cpu_ptr(_name)[_cpu] : \
+#define early_per_cpu_ptr(_name) (_name##_early_ptr)
+#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
+
+#define early_per_cpu(_name, _cpu) \
+ *(early_per_cpu_ptr(_name) ? \
+ &early_per_cpu_ptr(_name)[_cpu] : \
&per_cpu(_name, _cpu))
-#else /* !CONFIG_SMP */
-#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
+#else /* !CONFIG_SMP: */
+#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue
#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
-#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
-#define early_per_cpu_ptr(_name) NULL
+#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
+#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
-#endif /* !CONFIG_SMP */
+#endif /* !CONFIG_SMP */
#endif /* _ASM_X86_PERCPU_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..7276ba70c88a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -2,12 +2,14 @@
#ifndef _ASM_X86_PERF_EVENT_H
#define _ASM_X86_PERF_EVENT_H
+#include <linux/static_call.h>
+
/*
* Performance event hw details:
*/
#define INTEL_PMC_MAX_GENERIC 32
-#define INTEL_PMC_MAX_FIXED 3
+#define INTEL_PMC_MAX_FIXED 16
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
@@ -29,9 +31,29 @@
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
+
+#define INTEL_FIXED_BITS_STRIDE 4
+#define INTEL_FIXED_0_KERNEL (1ULL << 0)
+#define INTEL_FIXED_0_USER (1ULL << 1)
+#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
+#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
+#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
+#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
+
+#define INTEL_FIXED_BITS_MASK \
+ (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | \
+ INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI | \
+ ICL_FIXED_0_ADAPTIVE)
+
+#define intel_fixed_bits_by_idx(_idx, _bits) \
+ ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
@@ -48,11 +70,22 @@
#define AMD64_L3_SLICE_SHIFT 48
#define AMD64_L3_SLICE_MASK \
- ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+ (0xFULL << AMD64_L3_SLICE_SHIFT)
+#define AMD64_L3_SLICEID_MASK \
+ (0x7ULL << AMD64_L3_SLICE_SHIFT)
#define AMD64_L3_THREAD_SHIFT 56
#define AMD64_L3_THREAD_MASK \
- ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+ (0xFFULL << AMD64_L3_THREAD_SHIFT)
+#define AMD64_L3_F19H_THREAD_MASK \
+ (0x3ULL << AMD64_L3_THREAD_SHIFT)
+
+#define AMD64_L3_EN_ALL_CORES BIT_ULL(47)
+#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46)
+
+#define AMD64_L3_COREID_SHIFT 42
+#define AMD64_L3_COREID_MASK \
+ (0x7ULL << AMD64_L3_COREID_SHIFT)
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
@@ -74,6 +107,26 @@
#define AMD64_RAW_EVENT_MASK_NB \
(AMD64_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK)
+
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ GENMASK_ULL(37, 36))
+
+#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \
+ (ARCH_PERFMON_EVENTSEL_UMASK | \
+ GENMASK_ULL(27, 24))
+
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \
+ (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
+ AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
+
+#define AMD64_PERFMON_V2_ENABLE_UMC BIT_ULL(31)
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC GENMASK_ULL(7, 0)
+#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC GENMASK_ULL(9, 8)
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC \
+ (AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC | \
+ AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC)
+
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
#define AMD64_NUM_COUNTERS_NB 4
@@ -87,6 +140,21 @@
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
#define ARCH_PERFMON_EVENTS_COUNT 7
+#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
+#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_XMMS BIT_ULL(2)
+#define PEBS_DATACFG_LBRS BIT_ULL(3)
+#define PEBS_DATACFG_CNTR BIT_ULL(4)
+#define PEBS_DATACFG_METRICS BIT_ULL(5)
+#define PEBS_DATACFG_LBR_SHIFT 24
+#define PEBS_DATACFG_CNTR_SHIFT 32
+#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
+#define PEBS_DATACFG_FIX_SHIFT 48
+#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
+
+/* Steal the highest bit of pebs_data_cfg for SW usage */
+#define PEBS_UPDATE_DS_SW BIT_ULL(63)
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
@@ -118,11 +186,112 @@ union cpuid10_edx {
struct {
unsigned int num_counters_fixed:5;
unsigned int bit_width_fixed:8;
- unsigned int reserved:19;
+ unsigned int reserved1:2;
+ unsigned int anythread_deprecated:1;
+ unsigned int reserved2:16;
} split;
unsigned int full;
};
+/*
+ * Intel "Architectural Performance Monitoring extension" CPUID
+ * detection/enumeration details:
+ */
+#define ARCH_PERFMON_EXT_LEAF 0x00000023
+#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+#define ARCH_PERFMON_ACR_LEAF 0x2
+#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4
+#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5
+
+union cpuid35_eax {
+ struct {
+ unsigned int leaf0:1;
+ /* Counters Sub-Leaf */
+ unsigned int cntr_subleaf:1;
+ /* Auto Counter Reload Sub-Leaf */
+ unsigned int acr_subleaf:1;
+ /* Events Sub-Leaf */
+ unsigned int events_subleaf:1;
+ /* arch-PEBS Sub-Leaves */
+ unsigned int pebs_caps_subleaf:1;
+ unsigned int pebs_cnts_subleaf:1;
+ unsigned int reserved:26;
+ } split;
+ unsigned int full;
+};
+
+union cpuid35_ebx {
+ struct {
+ /* UnitMask2 Supported */
+ unsigned int umask2:1;
+ /* EQ-bit Supported */
+ unsigned int eq:1;
+ unsigned int reserved:30;
+ } split;
+ unsigned int full;
+};
+
+/*
+ * Intel Architectural LBR CPUID detection/enumeration details:
+ */
+union cpuid28_eax {
+ struct {
+ /* Supported LBR depth values */
+ unsigned int lbr_depth_mask:8;
+ unsigned int reserved:22;
+ /* Deep C-state Reset */
+ unsigned int lbr_deep_c_reset:1;
+ /* IP values contain LIP */
+ unsigned int lbr_lip:1;
+ } split;
+ unsigned int full;
+};
+
+union cpuid28_ebx {
+ struct {
+ /* CPL Filtering Supported */
+ unsigned int lbr_cpl:1;
+ /* Branch Filtering Supported */
+ unsigned int lbr_filter:1;
+ /* Call-stack Mode Supported */
+ unsigned int lbr_call_stack:1;
+ } split;
+ unsigned int full;
+};
+
+union cpuid28_ecx {
+ struct {
+ /* Mispredict Bit Supported */
+ unsigned int lbr_mispred:1;
+ /* Timed LBRs Supported */
+ unsigned int lbr_timed_lbr:1;
+ /* Branch Type Field Supported */
+ unsigned int lbr_br_type:1;
+ unsigned int reserved:13;
+ /* Branch counters (Event Logging) Supported */
+ unsigned int lbr_counters:4;
+ } split;
+ unsigned int full;
+};
+
+/*
+ * AMD "Extended Performance Monitoring and Debug" CPUID
+ * detection/enumeration details:
+ */
+union cpuid_0x80000022_ebx {
+ struct {
+ /* Number of Core Performance Counters */
+ unsigned int num_core_pmc:4;
+ /* Number of available LBR Stack Entries */
+ unsigned int lbr_v2_stack_sz:6;
+ /* Number of Data Fabric Counters */
+ unsigned int num_df_pmc:6;
+ /* Number of Unified Memory Controller Counters */
+ unsigned int num_umc_pmc:6;
+ } split;
+ unsigned int full;
+};
+
struct x86_pmu_capability {
int version;
int num_counters_gp;
@@ -131,19 +300,36 @@ struct x86_pmu_capability {
int bit_width_fixed;
unsigned int events_mask;
int events_mask_len;
+ unsigned int pebs_ept :1;
};
/*
* Fixed-purpose performance events:
*/
+/* RDPMC offset for Fixed PMCs */
+#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
+#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
+
/*
- * All 3 fixed-mode PMCs are configured via this single MSR:
+ * All the fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
- * The counts are available in three separate MSRs:
+ * There is no event-code assigned to the fixed-mode PMCs.
+ *
+ * For a fixed-mode PMC, which has an equivalent event on a general-purpose
+ * PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
+ * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
+ *
+ * For a fixed-mode PMC, which doesn't have an equivalent event, a
+ * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
+ * The pseudo event-code for a fixed-mode PMC must be 0x00.
+ * The pseudo umask-code is 0xX. The X equals the index of the fixed
+ * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
+ *
+ * The counts are available in separate MSRs:
*/
/* Instr_Retired.Any: */
@@ -154,27 +340,280 @@ struct x86_pmu_capability {
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
-/* CPU_CLK_Unhalted.Ref: */
+/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
+/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
+#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
+#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
+#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
+static inline bool use_fixed_pseudo_encoding(u64 code)
+{
+ return !(code & 0xff);
+}
+
/*
* We model BTS tracing as another fixed-mode PMC.
*
- * We choose a value in the middle of the fixed event range, since lower
+ * We choose the value 47 for the fixed index of BTS, since lower
* values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/
-#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
-#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
-#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
-#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
-#define GLOBAL_STATUS_ASIF BIT_ULL(60)
-#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
-#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
-#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
+/*
+ * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
+ * each TopDown metric event.
+ *
+ * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
+ */
+#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
+#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
+#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
+#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
+#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4)
+#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5)
+#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6)
+#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7)
+#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND
+#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
+ INTEL_PMC_MSK_FIXED_SLOTS)
+
+/*
+ * There is no event-code assigned to the TopDown events.
+ *
+ * For the slots event, use the pseudo code of the fixed counter 3.
+ *
+ * For the metric events, the pseudo event-code is 0x00.
+ * The pseudo umask-code starts from the middle of the pseudo event
+ * space, 0x80.
+ */
+#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
+/* Level 1 metrics */
+#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
+#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
+#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
+#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
+/* Level 2 metrics */
+#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */
+#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */
+#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */
+#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */
+
+#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
+#define INTEL_TD_METRIC_NUM 8
+
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
+#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
+static inline bool is_metric_idx(int idx)
+{
+ return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
+}
+
+static inline bool is_topdown_idx(int idx)
+{
+ return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
+}
+
+#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
+ (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
+
+#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
+#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
+#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
+#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
+#define GLOBAL_STATUS_ASIF BIT_ULL(60)
+#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
+#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
+#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
+#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
+#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
+#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
+
+#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
+/*
+ * We model guest LBR event tracing as another fixed-mode PMC like BTS.
+ *
+ * We choose bit 58 because it's used to indicate LBR stack frozen state
+ * for architectural perfmon v4, also we unconditionally mask that bit in
+ * the handle_pmi_common(), so it'll never be set in the overflow handling.
+ *
+ * With this fake counter assigned, the guest LBR event user (such as KVM),
+ * can program the LBR registers on its own, and we don't actually do anything
+ * with then in the host context.
+ */
+#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT)
+
+/*
+ * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b,
+ * since it would claim bit 58 which is effectively Fixed26.
+ */
+#define INTEL_FIXED_VLBR_EVENT 0x1b00
+
+/*
+ * Adaptive PEBS v4
+ */
+
+struct pebs_basic {
+ u64 format_group:32,
+ retire_latency:16,
+ format_size:16;
+ u64 ip;
+ u64 applicable_counters;
+ u64 tsc;
+};
+
+struct pebs_meminfo {
+ u64 address;
+ u64 aux;
+ union {
+ /* pre Alder Lake */
+ u64 mem_latency;
+ /* Alder Lake and later */
+ struct {
+ u64 instr_latency:16;
+ u64 pad2:16;
+ u64 cache_latency:16;
+ u64 pad3:16;
+ };
+ };
+ u64 tsx_tuning;
+};
+
+struct pebs_gprs {
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+ u64 r8, r9, r10, r11, r12, r13, r14, r15;
+};
+
+struct pebs_xmm {
+ u64 xmm[16*2]; /* two entries for each register */
+};
+
+struct pebs_cntr_header {
+ u32 cntr;
+ u32 fixed;
+ u32 metrics;
+ u32 reserved;
+};
+
+#define INTEL_CNTR_METRICS 0x3
+
+/*
+ * Arch PEBS
+ */
+union arch_pebs_index {
+ struct {
+ u64 rsvd:4,
+ wr:23,
+ rsvd2:4,
+ full:1,
+ en:1,
+ rsvd3:3,
+ thresh:23,
+ rsvd4:5;
+ };
+ u64 whole;
+};
+
+struct arch_pebs_header {
+ union {
+ u64 format;
+ struct {
+ u64 size:16, /* Record size */
+ rsvd:14,
+ mode:1, /* 64BIT_MODE */
+ cont:1,
+ rsvd2:3,
+ cntr:5,
+ lbr:2,
+ rsvd3:7,
+ xmm:1,
+ ymmh:1,
+ rsvd4:2,
+ opmask:1,
+ zmmh:1,
+ h16zmm:1,
+ rsvd5:5,
+ gpr:1,
+ aux:1,
+ basic:1;
+ };
+ };
+ u64 rsvd6;
+};
+
+struct arch_pebs_basic {
+ u64 ip;
+ u64 applicable_counters;
+ u64 tsc;
+ u64 retire :16, /* Retire Latency */
+ valid :1,
+ rsvd :47;
+ u64 rsvd2;
+ u64 rsvd3;
+};
+
+struct arch_pebs_aux {
+ u64 address;
+ u64 rsvd;
+ u64 rsvd2;
+ u64 rsvd3;
+ u64 rsvd4;
+ u64 aux;
+ u64 instr_latency :16,
+ pad2 :16,
+ cache_latency :16,
+ pad3 :16;
+ u64 tsx_tuning;
+};
+
+struct arch_pebs_gprs {
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+ u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
+ u64 rsvd;
+};
+
+struct arch_pebs_xer_header {
+ u64 xstate;
+ u64 rsvd;
+};
+
+#define ARCH_PEBS_LBR_NAN 0x0
+#define ARCH_PEBS_LBR_NUM_8 0x1
+#define ARCH_PEBS_LBR_NUM_16 0x2
+#define ARCH_PEBS_LBR_NUM_VAR 0x3
+#define ARCH_PEBS_BASE_LBR_ENTRIES 8
+struct arch_pebs_lbr_header {
+ u64 rsvd;
+ u64 ctl;
+ u64 depth;
+ u64 ler_from;
+ u64 ler_to;
+ u64 ler_info;
+};
+
+struct arch_pebs_cntr_header {
+ u32 cntr;
+ u32 fixed;
+ u32 metrics;
+ u32 reserved;
+};
+
+/*
+ * AMD Extended Performance Monitoring and Debug cpuid feature detection
+ */
+#define EXT_PERFMON_DEBUG_FEATURES 0x80000022
/*
* IBS cpuid feature detection
@@ -197,6 +636,9 @@ struct x86_pmu_capability {
#define IBS_CAPS_OPBRNFUSE (1U<<8)
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)
+#define IBS_CAPS_ZEN4 (1U<<11)
+#define IBS_CAPS_OPLDLAT (1U<<12)
+#define IBS_CAPS_OPDTLBPGSIZE (1U<<19)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -209,27 +651,39 @@ struct x86_pmu_capability {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* ibs fetch bits/masks */
+/* IBS fetch bits/masks */
+#define IBS_FETCH_L3MISSONLY (1ULL<<59)
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* ibs op bits/masks */
-/* lower 4 bits of the current count are ignored: */
-#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
+/*
+ * IBS op bits/masks
+ * The lower 7 bits of the current count are random bits
+ * preloaded by hardware and ignored in software
+ */
+#define IBS_OP_LDLAT_EN (1ULL<<63)
+#define IBS_OP_LDLAT_THRSH (0xFULL<<59)
+#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
+#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
+#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
+#define IBS_OP_L3MISSONLY (1ULL<<16)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
#define IBS_RIP_INVALID (1ULL<<38)
#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
+extern int forward_event_to_ibs(struct perf_event *event);
#else
static inline u32 get_ibs_caps(void) { return 0; }
+static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
#endif
#ifdef CONFIG_PERF_EVENTS
@@ -248,26 +702,28 @@ extern void perf_events_lapic_init(void);
#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+struct x86_perf_regs {
+ struct pt_regs regs;
+ u64 *xmm_regs;
+};
+
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#include <asm/stacktrace.h>
/*
- * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
+ * We abuse bit 3 from flags to pass exact information, see
+ * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
- (regs)->bp = caller_frame_pointer(); \
+ (regs)->sp = (unsigned long)__builtin_frame_address(0); \
(regs)->cs = __KERNEL_CS; \
regs->flags = 0; \
- asm volatile( \
- _ASM_MOV "%%"_ASM_SP ", %0\n" \
- : "=m" ((regs)->sp) \
- :: "memory" \
- ); \
}
struct perf_guest_switch_msr {
@@ -275,33 +731,78 @@ struct perf_guest_switch_msr {
u64 host, guest;
};
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+struct x86_pmu_lbr {
+ unsigned int nr;
+ unsigned int from;
+ unsigned int to;
+ unsigned int info;
+ bool has_callstack;
+};
+
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+extern u64 perf_get_hw_event_config(int hw_event);
extern void perf_check_microcode(void);
+extern void perf_clear_dirty_counters(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
- *nr = 0;
- return NULL;
+ memset(cap, 0, sizeof(*cap));
}
-static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+static inline u64 perf_get_hw_event_config(int hw_event)
{
- memset(cap, 0, sizeof(*cap));
+ return 0;
}
static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
+extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
+#else
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
+static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+{
+ memset(lbr, 0, sizeof(*lbr));
+}
+#endif
+
#ifdef CONFIG_CPU_SUP_INTEL
extern void intel_pt_handle_vmx(int on);
+#else
+static inline void intel_pt_handle_vmx(int on)
+{
+
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
+
+#if defined(CONFIG_PERF_EVENTS_AMD_BRS)
+
+#define PERF_NEEDS_LOPWR_CB 1
+
+/*
+ * architectural low power callback impacts
+ * drivers/acpi/processor_idle.c
+ * drivers/acpi/acpi_pad.c
+ */
+extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
+
+DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+
+static __always_inline void perf_lopwr_cb(bool lopwr_in)
+{
+ static_call_mod(perf_lopwr_cb)(lopwr_in);
+}
+
+#endif /* PERF_NEEDS_LOPWR_CB */
+
#else
static inline void amd_pmu_enable_virt(void) { }
static inline void amd_pmu_disable_virt(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 94de1a05aeba..d65e338b6a5f 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config)
static inline int p4_ht_active(void)
{
#ifdef CONFIG_SMP
- return smp_num_siblings > 1;
+ return __max_threads_per_core > 1;
#endif
return 0;
}
@@ -189,7 +189,7 @@ static inline int p4_ht_active(void)
static inline int p4_ht_thread(int cpu)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
+ if (__max_threads_per_core == 2)
return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
#endif
return 0;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61ec60c..c88691b15f3c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,12 @@
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
+#include <asm/cpufeature.h>
+
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
+#include <asm-generic/pgalloc.h>
+
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
#ifdef CONFIG_PARAVIRT_XXL
@@ -26,20 +32,16 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
#endif
/*
- * Flags to use when allocating a user page table page.
- */
-extern gfp_t __userpte_alloc_gfp;
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-/*
- * Instead of one PGD, we acquire two PGDs. Being order-1, it is
- * both 8k in size and 8k-aligned. That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
+ * In case of Page Table Isolation active, we acquire two PGDs instead of one.
+ * Being order-1, it is both 8k in size and 8k-aligned. That lets us just
+ * flip bit 12 in a pointer to swap between the two 4k halves.
*/
-#define PGD_ALLOCATION_ORDER 1
-#else
-#define PGD_ALLOCATION_ORDER 0
-#endif
+static inline unsigned int pgd_allocation_order(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_PTI))
+ return 1;
+ return 0;
+}
/*
* Allocate and free page tables.
@@ -47,24 +49,8 @@ extern gfp_t __userpte_alloc_gfp;
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
extern pgtable_t pte_alloc_one(struct mm_struct *);
-/* Should really implement gc for free page table pages. This could be
- done with a reference count in struct page. */
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -96,33 +82,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
#if CONFIG_PGTABLE_LEVELS > 2
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- struct page *page;
- gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- page = alloc_pages(gfp, 0);
- if (!page)
- return NULL;
- if (!pgtable_pmd_page_ctor(page)) {
- __free_pages(page, 0);
- return NULL;
- }
- return (pmd_t *)page_address(page);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
- BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- pgtable_pmd_page_dtor(virt_to_page(pmd));
- free_page((unsigned long)pmd);
-}
-
extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
@@ -160,21 +120,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu
set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- return (pud_t *)get_zeroed_page(gfp);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
- BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- free_page((unsigned long)pud);
-}
-
extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -200,24 +145,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- return (p4d_t *)get_zeroed_page(gfp);
-}
-
-static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
-{
- if (!pgtable_l5_enabled())
- return;
-
- BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
- free_page((unsigned long)p4d);
-}
-
extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 60d0f9015317..e9482a11ac52 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -80,21 +80,37 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * <----------------- offset ------------------> 0 E <- type --> 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
#define SWP_TYPE_BITS 5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
+#define _SWP_TYPE_SHIFT (_PAGE_BIT_PRESENT + 1)
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
- & ((1U << SWP_TYPE_BITS) - 1))
+#define __swp_type(x) (((x).val >> _SWP_TYPE_SHIFT) \
+ & _SWP_TYPE_MASK)
#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << (_PAGE_BIT_PRESENT + 1)) \
+ (((type) & _SWP_TYPE_MASK) << _SWP_TYPE_SHIFT) \
| ((offset) << SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE
+
/* No inverted PFNs on 2 level page tables */
static inline u64 protnone_mask(u64 val)
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 6deb6cd236e3..54690bd4ddbe 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef unsigned long pteval_t;
@@ -16,22 +16,22 @@ typedef union {
pteval_t pte;
pteval_t pte_low;
} pte_t;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
- * traditional i386 two-level paging structure:
+ * Traditional i386 two-level paging structure:
*/
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-
/*
- * the i386 is two-level, so we don't really have any
- * PMD directory physically.
+ * The i386 is two-level, so we don't really have any
+ * PMD directory physically:
*/
+#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 1024
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f8b1ad2c3828..dabafba957ea 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,8 +2,6 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
-#include <asm/atomic64_32.h>
-
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -21,7 +19,15 @@
pr_err("%s:%d: bad pgd %p(%016Lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
-/* Rules for using set_pte: the pte being assigned *must* be
+#define pxx_xchg64(_pxx, _ptr, _val) ({ \
+ _pxx##val_t *_p = (_pxx##val_t *)_ptr; \
+ _pxx##val_t _o = *_p; \
+ do { } while (!try_cmpxchg64(_p, &_o, (_val))); \
+ native_make_##_pxx(_o); \
+})
+
+/*
+ * Rules for using set_pte: the pte being assigned *must* be
* either not present or in a state where the hardware will
* not attempt to update the pte. In places where this is
* not possible, use pte_get_and_clear to obtain the old pte
@@ -29,81 +35,27 @@
*/
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- ptep->pte_high = pte.pte_high;
+ WRITE_ONCE(ptep->pte_high, pte.pte_high);
smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-
-#define pmd_read_atomic pmd_read_atomic
-/*
- * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by gcc. Problem is, in certain places
- * where pte_offset_map_lock is called, concurrent page faults are
- * allowed, if the mmap_sem is hold for reading. An example is mincore
- * vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate rightfully does a set_64bit, but if we're reading the
- * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because gcc will not read the 64bit of the pmd atomically. To fix
- * this all places running pmd_offset_map_lock() while holding the
- * mmap_sem in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null nor not, and in turn to know if
- * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
- * operations.
- *
- * Without THP if the mmap_sem is hold for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic runs. So
- * we can always return atomic pmd values with this function.
- *
- * With THP if the mmap_sem is hold for reading, the pmd can become
- * trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic. We could read it really
- * atomically here with a atomic64_read for the THP enabled case (and
- * it would be a whole lot simpler), but to avoid using cmpxchg8b we
- * only return an atomic pmdval if the low part of the pmdval is later
- * found stable (i.e. pointing to a pte). And we're returning a none
- * pmdval if the low part of the pmd is none. In some cases the high
- * and low part of the pmdval returned may not be consistent if THP is
- * enabled (the low part may point to previously mapped hugepage,
- * while the high part may point to a more recently mapped hugepage),
- * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
- * of the pmd to be read atomically to decide if the pmd is unstable
- * or not, with the only exception of when the low part of the pmd is
- * zero in which case we return a none pmd.
- */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
- pmdval_t ret;
- u32 *tmp = (u32 *)pmdp;
-
- ret = (pmdval_t) (*tmp);
- if (ret) {
- /*
- * If the low part is null, we must not read the high part
- * or we can end up with a partial pmd.
- */
- smp_rmb();
- ret |= ((pmdval_t)*(tmp + 1)) << 32;
- }
-
- return (pmd_t) { ret };
+ WRITE_ONCE(ptep->pte_low, pte.pte_low);
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
- set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+ pxx_xchg64(pte, ptep, native_pte_val(pte));
}
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
+ pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
}
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
#endif
- set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
+ pxx_xchg64(pud, pudp, native_pud_val(pud));
}
/*
@@ -114,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- ptep->pte_low = 0;
+ WRITE_ONCE(ptep->pte_low, 0);
smp_wmb();
- ptep->pte_high = 0;
+ WRITE_ONCE(ptep->pte_high, 0);
}
-static inline void native_pmd_clear(pmd_t *pmd)
+static inline void native_pmd_clear(pmd_t *pmdp)
{
- u32 *tmp = (u32 *)pmd;
- *tmp = 0;
+ WRITE_ONCE(pmdp->pmd_low, 0);
smp_wmb();
- *(tmp + 1) = 0;
+ WRITE_ONCE(pmdp->pmd_high, 0);
}
static inline void native_pud_clear(pud_t *pudp)
@@ -147,41 +98,26 @@ static inline void pud_clear(pud_t *pudp)
*/
}
+
#ifdef CONFIG_SMP
static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
- pte_t res;
-
- res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
-
- return res;
+ return pxx_xchg64(pte, ptep, 0ULL);
}
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-union split_pmd {
- struct {
- u32 pmd_low;
- u32 pmd_high;
- };
- pmd_t pmd;
-};
-#ifdef CONFIG_SMP
static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
{
- union split_pmd res, *orig = (union split_pmd *)pmdp;
-
- /* xchg acts as a barrier before setting of the high bits */
- res.pmd_low = xchg(&orig->pmd_low, 0);
- res.pmd_high = orig->pmd_high;
- orig->pmd_high = 0;
+ return pxx_xchg64(pmd, pmdp, 0ULL);
+}
- return res.pmd;
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+ return pxx_xchg64(pud, pudp, 0ULL);
}
#else
+#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif
#ifndef pmdp_establish
@@ -197,67 +133,47 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* anybody.
*/
if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
- union split_pmd old, new, *ptr;
-
- ptr = (union split_pmd *)pmdp;
-
- new.pmd = pmd;
-
/* xchg acts as a barrier before setting of the high bits */
- old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
- old.pmd_high = ptr->pmd_high;
- ptr->pmd_high = new.pmd_high;
- return old.pmd;
- }
-
- do {
- old = *pmdp;
- } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+ old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
+ old.pmd_high = READ_ONCE(pmdp->pmd_high);
+ WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
- return old;
-}
-#endif
-
-#ifdef CONFIG_SMP
-union split_pud {
- struct {
- u32 pud_low;
- u32 pud_high;
- };
- pud_t pud;
-};
-
-static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
-{
- union split_pud res, *orig = (union split_pud *)pudp;
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
-#endif
-
- /* xchg acts as a barrier before setting of the high bits */
- res.pud_low = xchg(&orig->pud_low, 0);
- res.pud_high = orig->pud_high;
- orig->pud_high = 0;
+ return old;
+ }
- return res.pud;
+ return pxx_xchg64(pmd, pmdp, pmd.pmd);
}
-#else
-#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ * < type -> <---------------------- offset ----------------------
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * --------------------------------------------> 0 E 0 0 0 0 0 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
#define SWP_TYPE_BITS 5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
/* We always extract/encode the offset by shifting it all the way up, and then down again */
#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x) (((x).val) & 0x1f)
-#define __swp_offset(x) ((x).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define __swp_type(x) (((x).val) & _SWP_TYPE_MASK)
+#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS)
+#define __swp_entry(type, offset) ((swp_entry_t){((type) & _SWP_TYPE_MASK) \
+ | (offset) << SWP_TYPE_BITS})
/*
* Normally, __swp_entry() converts from arch-independent swp_entry_t to
@@ -285,52 +201,8 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
__pteval_swp_offset(pte)))
-#define gup_get_pte gup_get_pte
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE. What
- * we do have is the guarantee that a PTE will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' iff pte_high
- * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have gotten rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. Because get_user_pages_fast() only operates on present ptes
- * we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- pte_t pte;
-
- do {
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- } while (unlikely(pte.pte_low != ptep->pte_low));
-
- return pte;
-}
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE
#include <asm/pgtable-invert.h>
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 33845d36897c..580b09bf6a45 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef u64 pteval_t;
@@ -18,14 +18,16 @@ typedef union {
};
pteval_t pte;
} pte_t;
-#endif /* !__ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT_XXL
-#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \
- (pv_info.shared_kernel_pmd)))
-#else
-#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-#endif
+
+typedef union {
+ struct {
+ unsigned long pmd_low, pmd_high;
+ };
+ pmdval_t pmd;
+} pmd_t;
+#endif /* !__ASSEMBLER__ */
+
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index a0c1525f1b6f..e12e52ae8083 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -2,7 +2,7 @@
#ifndef _ASM_PGTABLE_INVERT_H
#define _ASM_PGTABLE_INVERT_H 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* A clear pte value is special, and doesn't get inverted.
@@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
return val;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..e33df3da6980 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,28 +15,35 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/spinlock.h>
#include <asm/x86_init.h>
+#include <asm/pkru.h>
+#include <asm/fpu/api.h>
+#include <asm/coco.h>
+#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
-
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
-void ptdump_walk_pgd_level_checkwx(void);
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
+struct seq_file;
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
+ bool user);
+bool ptdump_walk_pgd_level_checkwx(void);
+#define ptdump_check_wx ptdump_walk_pgd_level_checkwx
void ptdump_walk_user_pgd_level_checkwx(void);
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
+
#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx()
#else
-#define debug_checkwx() do { } while (0)
#define debug_checkwx_user() do { } while (0)
#endif
@@ -46,7 +53,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
@@ -59,7 +66,6 @@ extern pmdval_t early_pmd_flags;
#include <asm/paravirt.h>
#else /* !CONFIG_PARAVIRT_XXL */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -114,47 +120,81 @@ extern pmdval_t early_pmd_flags;
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v & ~clear);
+}
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
{
- return pte_flags(pte) & _PAGE_DIRTY;
+ return pte_flags(pte) & _PAGE_DIRTY_BITS;
}
+static inline bool pte_shstk(pte_t pte)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
+}
-static inline u32 read_pkru(void)
+static inline int pte_young(pte_t pte)
{
- if (boot_cpu_has(X86_FEATURE_OSPKE))
- return __read_pkru();
- return 0;
+ return pte_flags(pte) & _PAGE_ACCESSED;
}
-static inline void write_pkru(u32 pkru)
+static inline bool pte_decrypted(pte_t pte)
{
- if (boot_cpu_has(X86_FEATURE_OSPKE))
- __write_pkru(pkru);
+ return cc_mkdec(pte_val(pte)) == pte_val(pte);
}
-static inline int pte_young(pte_t pte)
+#define pmd_dirty pmd_dirty
+static inline bool pmd_dirty(pmd_t pmd)
{
- return pte_flags(pte) & _PAGE_ACCESSED;
+ return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
}
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_shstk(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_DIRTY;
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
{
- return pud_flags(pud) & _PAGE_DIRTY;
+ return pud_flags(pud) & _PAGE_DIRTY_BITS;
}
static inline int pud_young(pud_t pud)
@@ -162,9 +202,36 @@ static inline int pud_young(pud_t pud)
return pud_flags(pud) & _PAGE_ACCESSED;
}
+static inline bool pud_shstk(pud_t pud)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
+}
+
static inline int pte_write(pte_t pte)
{
- return pte_flags(pte) & _PAGE_RW;
+ /*
+ * Shadow stack pages are logically writable, but do not have
+ * _PAGE_RW. Check for them separately from _PAGE_RW itself.
+ */
+ return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+ /*
+ * Shadow stack pages are logically writable, but do not have
+ * _PAGE_RW. Check for them separately from _PAGE_RW itself.
+ */
+ return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
+}
+
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_RW;
}
static inline int pte_huge(pte_t pte)
@@ -191,6 +258,8 @@ static inline int pte_special(pte_t pte)
static inline u64 protnone_mask(u64 val);
+#define PFN_PTE_SHIFT PAGE_SHIFT
+
static inline unsigned long pte_pfn(pte_t pte)
{
phys_addr_t pfn = pte_val(pte);
@@ -205,6 +274,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
phys_addr_t pfn = pud_val(pud);
@@ -222,15 +292,10 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
-static inline int p4d_large(p4d_t p4d)
-{
- /* No 512 GiB pages yet */
- return 0;
-}
-
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
-static inline int pmd_large(pmd_t pte)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pte)
{
return pmd_flags(pte) & _PAGE_PSE;
}
@@ -238,13 +303,13 @@ static inline int pmd_large(pmd_t pte)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_trans_huge(pmd_t pmd)
{
- return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+ return (pmd_val(pmd) & _PAGE_PSE) == _PAGE_PSE;
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_trans_huge(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+ return (pud_val(pud) & _PAGE_PSE) == _PAGE_PSE;
}
#endif
@@ -254,29 +319,29 @@ static inline int has_transparent_hugepage(void)
return boot_cpu_has(X86_FEATURE_PSE);
}
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-static inline int pmd_devmap(pmd_t pmd)
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
{
- return !!(pmd_val(pmd) & _PAGE_DEVMAP);
+ return pmd_flags(pmd) & _PAGE_SPECIAL;
}
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static inline int pud_devmap(pud_t pud)
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
{
- return !!(pud_val(pud) & _PAGE_DEVMAP);
+ return pmd_set_flags(pmd, _PAGE_SPECIAL);
}
-#else
-static inline int pud_devmap(pud_t pud)
+#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
{
- return 0;
+ return pud_flags(pud) & _PAGE_SPECIAL;
}
-#endif
-static inline int pgd_devmap(pgd_t pgd)
+static inline pud_t pud_mkspecial(pud_t pud)
{
- return 0;
+ return pud_set_flags(pud, _PAGE_SPECIAL);
}
-#endif
+#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
@@ -293,19 +358,90 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}
-static inline pte_t pte_mkclean(pte_t pte)
+/*
+ * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
+ * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
+ * when creating dirty, write-protected memory, a software bit is used:
+ * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
+ * Dirty bit to SavedDirty, and vice-vesra.
+ *
+ * This shifting is only done if needed. In the case of shifting
+ * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
+ * shifting SavedDirty->Dirty, the condition is Write=1.
+ */
+static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
+{
+ pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
+ v &= ~(cond << _PAGE_BIT_DIRTY);
+
+ return v;
+}
+
+static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
{
- return pte_clear_flags(pte, _PAGE_DIRTY);
+ pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
+
+ v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
+ v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
+
+ return v;
}
-static inline pte_t pte_mkold(pte_t pte)
+static inline pte_t pte_mksaveddirty(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_ACCESSED);
+ pteval_t v = native_pte_val(pte);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pte(v);
+}
+
+static inline pte_t pte_clear_saveddirty(pte_t pte)
+{
+ pteval_t v = native_pte_val(pte);
+
+ v = clear_saveddirty_shift(v);
+ return native_make_pte(v);
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- return pte_clear_flags(pte, _PAGE_RW);
+ pte = pte_clear_flags(pte, _PAGE_RW);
+
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PTE (Write=0,Dirty=1). Move the hardware
+ * dirty value to the software bit, if present.
+ */
+ return pte_mksaveddirty(pte);
+}
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pte_uffd_wp(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_UFFD_WP;
+}
+
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+ return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP));
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_ACCESSED);
}
static inline pte_t pte_mkexec(pte_t pte)
@@ -315,7 +451,16 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
- return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+ return pte_mksaveddirty(pte);
+}
+
+static inline pte_t pte_mkwrite_shstk(pte_t pte)
+{
+ pte = pte_clear_flags(pte, _PAGE_RW);
+
+ return pte_set_flags(pte, _PAGE_DIRTY);
}
static inline pte_t pte_mkyoung(pte_t pte)
@@ -323,11 +468,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte_set_flags(pte, _PAGE_ACCESSED);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
return pte_set_flags(pte, _PAGE_RW);
}
+struct vm_area_struct;
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
+#define pte_mkwrite pte_mkwrite
+
static inline pte_t pte_mkhuge(pte_t pte)
{
return pte_set_flags(pte, _PAGE_PSE);
@@ -353,48 +502,75 @@ static inline pte_t pte_mkspecial(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL);
}
-static inline pte_t pte_mkdevmap(pte_t pte)
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
- return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
+ pmdval_t v = native_pmd_val(pmd);
+
+ v = mksaveddirty_shift(v);
+ return native_make_pmd(v);
}
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
{
pmdval_t v = native_pmd_val(pmd);
- return native_make_pmd(v | set);
+ v = clear_saveddirty_shift(v);
+ return native_make_pmd(v);
}
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- pmdval_t v = native_pmd_val(pmd);
+ pmd = pmd_clear_flags(pmd, _PAGE_RW);
- return native_make_pmd(v & ~clear);
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PMD (RW=0, Dirty=1). Move the hardware
+ * dirty value to the software bit.
+ */
+ return pmd_mksaveddirty(pmd);
}
-static inline pmd_t pmd_mkold(pmd_t pmd)
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pmd_uffd_wp(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_ACCESSED);
+ return pmd_flags(pmd) & _PAGE_UFFD_WP;
}
-static inline pmd_t pmd_mkclean(pmd_t pmd)
+static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_DIRTY);
+ return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP));
}
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
+static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_RW);
+ return pmd_clear_flags(pmd, _PAGE_ACCESSED);
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+ return pmd_mksaveddirty(pmd);
}
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_DEVMAP);
+ pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+ return pmd_set_flags(pmd, _PAGE_DIRTY);
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
@@ -407,23 +583,30 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_ACCESSED);
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_RW);
}
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+#define pmd_mkwrite pmd_mkwrite
+
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_mksaveddirty(pud_t pud)
{
pudval_t v = native_pud_val(pud);
- return native_make_pud(v | set);
+ v = mksaveddirty_shift(v);
+ return native_make_pud(v);
}
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_clear_saveddirty(pud_t pud)
{
pudval_t v = native_pud_val(pud);
- return native_make_pud(v & ~clear);
+ v = clear_saveddirty_shift(v);
+ return native_make_pud(v);
}
static inline pud_t pud_mkold(pud_t pud)
@@ -433,22 +616,26 @@ static inline pud_t pud_mkold(pud_t pud)
static inline pud_t pud_mkclean(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_DIRTY);
+ return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
}
static inline pud_t pud_wrprotect(pud_t pud)
{
- return pud_clear_flags(pud, _PAGE_RW);
+ pud = pud_clear_flags(pud, _PAGE_RW);
+
+ /*
+ * Blindly clearing _PAGE_RW might accidentally create
+ * a shadow stack PUD (RW=0, Dirty=1). Move the hardware
+ * dirty value to the software bit.
+ */
+ return pud_mksaveddirty(pud);
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
-}
+ pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
-static inline pud_t pud_mkdevmap(pud_t pud)
-{
- return pud_set_flags(pud, _PAGE_DEVMAP);
+ return pud_mksaveddirty(pud);
}
static inline pud_t pud_mkhuge(pud_t pud)
@@ -463,7 +650,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud)
{
- return pud_set_flags(pud, _PAGE_RW);
+ pud = pud_set_flags(pud, _PAGE_RW);
+
+ return pud_clear_saveddirty(pud);
}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -548,6 +737,9 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ /* This bit combination is used to mark shadow stacks */
+ WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) ==
+ _PAGE_DIRTY);
pfn ^= protnone_mask(pgprot_val(pgprot));
pfn &= PTE_PFN_MASK;
return __pte(pfn | check_pgprot(pgprot));
@@ -569,16 +761,16 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
return __pud(pfn | check_pgprot(pgprot));
}
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
{
return pfn_pmd(pmd_pfn(pmd),
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
-static inline pud_t pud_mknotpresent(pud_t pud)
+static inline pud_t pud_mkinvalid(pud_t pud)
{
return pfn_pud(pud_pfn(pud),
- __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
@@ -586,6 +778,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pteval_t val = pte_val(pte), oldval = val;
+ pte_t pte_result;
/*
* Chop off the NX bit (if present), and add the NX portion of
@@ -594,25 +787,82 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
- return __pte(val);
+
+ pte_result = __pte(val);
+
+ /*
+ * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
+ * 1. Marking Write=0 PTEs Dirty=1
+ * 2. Marking Dirty=1 PTEs Write=0
+ *
+ * The first case cannot happen because the _PAGE_CHG_MASK will filter
+ * out any Dirty bit passed in newprot. Handle the second case by
+ * going through the mksaveddirty exercise. Only do this if the old
+ * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ */
+ if (oldval & _PAGE_RW)
+ pte_result = pte_mksaveddirty(pte_result);
+ else
+ pte_result = pte_clear_saveddirty(pte_result);
+
+ return pte_result;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmdval_t val = pmd_val(pmd), oldval = val;
+ pmd_t pmd_result;
- val &= _HPAGE_CHG_MASK;
+ val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
- return __pmd(val);
+
+ pmd_result = __pmd(val);
+
+ /*
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pmd_result = pmd_mksaveddirty(pmd_result);
+ else
+ pmd_result = pmd_clear_saveddirty(pmd_result);
+
+ return pmd_result;
+}
+
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
}
-/* mprotect needs to preserve PAT bits when updating vm_page_prot */
+/*
+ * mprotect needs to preserve PAT and encryption bits when updating
+ * vm_page_prot
+ */
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
- pgprotval_t addbits = pgprot_val(newprot);
+ pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
return __pgprot(preservebits | addbits);
}
@@ -623,11 +873,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
- return canon_pgprot(prot);
-}
-
static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
enum page_cache_mode pcm,
enum page_cache_mode new_pcm)
@@ -663,7 +908,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
pmd_t *populate_extra_pmd(unsigned long vaddr);
pte_t *populate_extra_pte(unsigned long vaddr);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
/*
@@ -677,14 +922,14 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
return pgd;
return __pti_set_user_pgtbl(pgdp, pgd);
}
-#else /* CONFIG_PAGE_TABLE_ISOLATION */
+#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_X86_32
@@ -693,7 +938,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
# include <asm/pgtable_64.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
@@ -710,17 +955,18 @@ static inline int pte_same(pte_t a, pte_t b)
return a.pte == b.pte;
}
-static inline int pte_present(pte_t a)
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
{
- return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+ if (__pte_needs_invert(pte_val(pte)))
+ return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT));
+ return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
+#define pte_advance_pfn pte_advance_pfn
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-static inline int pte_devmap(pte_t a)
+static inline int pte_present(pte_t a)
{
- return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
-#endif
#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
@@ -729,7 +975,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
return true;
if ((pte_flags(a) & _PAGE_PROTNONE) &&
- mm_tlb_flush_pending(mm))
+ atomic_read(&mm->tlb_flush_pending))
return true;
return false;
@@ -749,7 +995,7 @@ static inline int pmd_present(pmd_t pmd)
#ifdef CONFIG_NUMA_BALANCING
/*
* These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h
+ * comment in include/linux/pgtable.h
*/
static inline int pte_protnone(pte_t pte)
{
@@ -783,45 +1029,10 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
*/
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
-/*
- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
- *
- * this macro returns the index of the entry in the pmd page which would
- * control the given virtual address
- */
-static inline unsigned long pmd_index(unsigned long address)
-{
- return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
-}
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * (Currently stuck as a macro because of indirect forward reference
- * to linux/mm.h:page_to_nid())
- */
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-
-/*
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
- *
- * this function returns the index of the entry in the pte page which would
- * control the given virtual address
- */
-static inline unsigned long pte_index(unsigned long address)
-{
- return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
-}
-
-static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
-{
- return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
-}
-
static inline int pmd_bad(pmd_t pmd)
{
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
+ (_KERNPG_TABLE & ~_PAGE_ACCESSED);
}
static inline unsigned long pages_to_mb(unsigned long npg)
@@ -840,9 +1051,9 @@ static inline int pud_present(pud_t pud)
return pud_flags(pud) & _PAGE_PRESENT;
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
+ return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
@@ -851,34 +1062,18 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
*/
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
-{
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
-}
-
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pud_val(pud) & _PAGE_PSE;
}
static inline int pud_bad(pud_t pud)
{
return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
}
-#else
-static inline int pud_large(pud_t pud)
-{
- return 0;
-}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
-static inline unsigned long pud_index(unsigned long address)
-{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
#if CONFIG_PGTABLE_LEVELS > 3
static inline int p4d_none(p4d_t p4d)
{
@@ -890,9 +1085,9 @@ static inline int p4d_present(p4d_t p4d)
return p4d_flags(p4d) & _PAGE_PRESENT;
}
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
{
- return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+ return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
}
/*
@@ -901,17 +1096,11 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
*/
#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
-/* Find an entry in the third-level page table.. */
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
-{
- return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
-}
-
static inline int p4d_bad(p4d_t p4d)
{
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (p4d_flags(p4d) & ~ignore_flags) != 0;
@@ -957,7 +1146,7 @@ static inline int pgd_bad(pgd_t pgd)
if (!pgtable_l5_enabled())
return 0;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
@@ -977,57 +1166,22 @@ static inline int pgd_none(pgd_t pgd)
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
-#endif /* __ASSEMBLY__ */
-
-/*
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
- *
- * this macro returns the index of the entry in the pgd page which would
- * control the given virtual address
- */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-
-/*
- * pgd_offset() returns a (pgd_t *)
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
- */
-#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
-/*
- * a shortcut to get a pgd_t in a given mm
- */
-#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
-/*
- * a shortcut which implies the use of the kernel's pgd, instead
- * of a process's
- */
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
-
+#endif /* __ASSEMBLER__ */
#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
-extern void memblock_find_dma_reserve(void);
+void __init poking_init(void);
+unsigned long init_memory_mapping(unsigned long start,
+ unsigned long end, pgprot_t prot);
#ifdef CONFIG_X86_64
-/* Realmode trampoline initialization. */
extern pgd_t trampoline_pgd_entry;
-static inline void __meminit init_trampoline_default(void)
-{
- /* Default trampoline pgd value */
- trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
-}
-# ifdef CONFIG_RANDOMIZE_MEMORY
-void __meminit init_trampoline(void);
-# else
-# define init_trampoline init_trampoline_default
-# endif
-#else
-static inline void init_trampoline(void) { }
#endif
/* local pte updates need not use xchg for locking */
@@ -1056,21 +1210,17 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
return res;
}
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep , pte_t pte)
-{
- native_set_pte(ptep, pte);
-}
-
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- native_set_pmd(pmdp, pmd);
+ page_table_check_pmd_set(mm, pmdp, pmd);
+ set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
+ page_table_check_pud_set(mm, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1101,6 +1251,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
@@ -1116,6 +1267,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
@@ -1126,12 +1278,20 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
-}
+ /*
+ * Avoid accidentally creating shadow stack PTEs
+ * (Write=0,Dirty=1). Use cmpxchg() to prevent races with
+ * the hardware setting Dirty=1.
+ */
+ pte_t old_pte, new_pte;
-#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+ old_pte = READ_ONCE(*ptep);
+ do {
+ new_pte = pte_wrprotect(old_pte);
+ } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
+}
-#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
@@ -1152,37 +1312,43 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-#define pmd_write pmd_write
-static inline int pmd_write(pmd_t pmd)
-{
- return pmd_flags(pmd) & _PAGE_RW;
-}
-
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- return native_pmdp_get_and_clear(pmdp);
+ pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+ page_table_check_pmd_clear(mm, pmd);
+
+ return pmd;
}
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- return native_pudp_get_and_clear(pudp);
+ pud_t pud = native_pudp_get_and_clear(pudp);
+
+ page_table_check_pud_clear(mm, pud);
+
+ return pud;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
-}
+ /*
+ * Avoid accidentally creating shadow stack PTEs
+ * (Write=0,Dirty=1). Use cmpxchg() to prevent races with
+ * the hardware setting Dirty=1.
+ */
+ pmd_t old_pmd, new_pmd;
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
- return pud_flags(pud) & _PAGE_RW;
+ old_pmd = READ_ONCE(*pmdp);
+ do {
+ new_pmd = pmd_wrprotect(old_pmd);
+ } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
}
#ifndef pmdp_establish
@@ -1190,6 +1356,7 @@ static inline int pud_write(pud_t pud)
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
@@ -1199,6 +1366,29 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
}
#endif
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
+#define __HAVE_ARCH_PMDP_INVALIDATE_AD
+extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1213,11 +1403,9 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
-static inline int pgd_large(pgd_t pgd) { return 0; }
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
- * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
* the user one is in the last 4k. To switch between them, you
* just need to flip the 12th bit in their addresses.
@@ -1262,12 +1450,12 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
- * dst - pointer to pgd range anwhere on a pgd page
+ * dst - pointer to pgd range anywhere on a pgd page
* src - ""
* count - the number of pgds to copy.
*
@@ -1277,7 +1465,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/* Clone the user space pgd as well */
@@ -1308,6 +1496,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
}
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+}
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
@@ -1316,6 +1509,20 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
}
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
@@ -1351,26 +1558,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#endif
#endif
-#define PKRU_AD_BIT 0x1
-#define PKRU_WD_BIT 0x2
-#define PKRU_BITS_PER_PKEY 2
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
+}
+
+static inline int pte_swp_uffd_wp(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
+}
-static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
- int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
- return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
+ return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
}
-static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
+static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
{
- int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
- /*
- * Access-disable disables writes too so we need to check
- * both bits here.
- */
- return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
+ return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
}
+static inline int pmd_swp_uffd_wp(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
+}
+
+static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
static inline u16 pte_flags_pkey(unsigned long pte_flags)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -1402,6 +1621,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write)
{
unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+ /*
+ * Write=0,Dirty=1 PTEs are shadow stack, which the kernel
+ * shouldn't generally allow access to, but since they
+ * are already Write=0, the below logic covers both cases.
+ */
if (write)
need_pte_bits |= _PAGE_RW;
@@ -1437,7 +1661,85 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}
-#include <asm-generic/pgtable.h>
-#endif /* __ASSEMBLY__ */
+#define arch_check_zapped_pte arch_check_zapped_pte
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
+
+#define arch_check_zapped_pmd arch_check_zapped_pmd
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
+#ifdef CONFIG_XEN_PV
+#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+#endif
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
+}
+#endif
+
+#ifdef CONFIG_X86_SGX
+int arch_memory_failure(unsigned long pfn, int flags);
+#define arch_memory_failure arch_memory_failure
+
+bool arch_is_platform_page(u64 paddr);
+#define arch_is_platform_page arch_is_platform_page
+#endif
+
+/*
+ * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
+ * TLB flush will be required as a result of the "set". For example, use
+ * in scenarios where it is known ahead of time that the routine is
+ * setting non-present entries, or re-setting an existing entry to the
+ * same value. Otherwise, use the typical "set" helpers and flush the
+ * TLB.
+ */
+#define set_pte_safe(ptep, pte) \
+({ \
+ WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
+ set_pte(ptep, pte); \
+})
+
+#define set_pmd_safe(pmdp, pmd) \
+({ \
+ WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
+ set_pmd(pmdp, pmd); \
+})
+
+#define set_pud_safe(pudp, pud) \
+({ \
+ WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
+ set_pud(pudp, pud); \
+})
+
+#define set_p4d_safe(p4dp, p4d) \
+({ \
+ WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
+ set_p4d(p4dp, p4d); \
+})
+
+#define set_pgd_safe(pgdp, pgd) \
+({ \
+ WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
+ set_pgd(pgdp, pgd); \
+})
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 4fe9e7fc74d3..b612cc57a4d3 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -13,7 +13,7 @@
* This file contains the functions and defines necessary to modify and use
* the i386 page table tree.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
@@ -29,68 +29,32 @@ extern pgd_t swapper_pg_dir[1024];
extern pgd_t initial_page_table[1024];
extern pmd_t initial_pg_pmd[];
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
void paging_init(void);
void sync_initial_page_table(void);
-/*
- * Define this if things work differently on an i386 and an i486:
- * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'access_ok( ..)'
- */
-#undef TEST_ACCESS_OK
-
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else
# include <asm/pgtable-2level.h>
#endif
-#if defined(CONFIG_HIGHPTE)
-#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \
- pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte))
-#else
-#define pte_offset_map(dir, address) \
- ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_unmap(pte) do { } while (0)
-#endif
-
/* Clear a kernel PTE and flush it from the TLB */
#define kpte_clear_flush(ptep, vaddr) \
do { \
pte_clear(&init_mm, (vaddr), (ptep)); \
- __flush_tlb_one_kernel((vaddr)); \
+ flush_tlb_one_kernel((vaddr)); \
} while (0)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
- * kern_addr_valid() is (1) for FLATMEM and (0) for
- * SPARSEMEM and DISCONTIGMEM
- */
-#ifdef CONFIG_FLATMEM
-#define kern_addr_valid(addr) (1)
-#else
-#define kern_addr_valid(kaddr) (0)
-#endif
-
-/*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
+ * This is used to calculate the .brk reservation for initial pagetables.
+ * Enough space is reserved to allocate pagetables sufficient to cover all
+ * of LOWMEM_PAGES, which is an upper bound on the size of the direct map of
+ * lowmem.
*
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
+ * With PAE paging (PTRS_PER_PMD > 1), we allocate PTRS_PER_PGD == 4 pages for
+ * the PMD's in addition to the pages required for the last level pagetables.
*/
#if PTRS_PER_PMD > 1
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
@@ -106,6 +70,6 @@ do { \
* with only a host target support using a 32-bit type for internal
* representation.
*/
-#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
#endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..921148b42967
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLER__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE \
+ ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
+#define MODULES_LEN (MODULES_VADDR - MODULES_END)
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index b0bc0fff5f1f..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,54 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
- * to avoid include recursion hell
- */
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
-
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
- & PMD_MASK)
-
-#define LDT_BASE_ADDR \
- ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE \
- ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR VMALLOC_START
-#define MODULES_END VMALLOC_END
-#define MODULES_LEN (MODULES_VADDR - MODULES_END)
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9c85b54bf03c..f06e5d6a2747 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -5,7 +5,7 @@
#include <linux/const.h>
#include <asm/pgtable_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This file contains the functions and defines necessary to modify and use
@@ -41,11 +41,9 @@ static inline void sync_initial_page_table(void) { }
pr_err("%s:%d: bad pud %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pud_val(e))
-#if CONFIG_PGTABLE_LEVELS >= 5
#define p4d_ERROR(e) \
pr_err("%s:%d: bad p4d %p(%016lx)\n", \
__FILE__, __LINE__, &(e), p4d_val(e))
-#endif
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
@@ -53,6 +51,12 @@ static inline void sync_initial_page_table(void) { }
struct mm_struct;
+#define mm_p4d_folded mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ return !pgtable_l5_enabled();
+}
+
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
@@ -137,7 +141,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
- if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ if (pgtable_l5_enabled() ||
+ !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) {
WRITE_ONCE(*p4dp, p4d);
return;
}
@@ -162,34 +167,25 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
-
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*/
-/*
- * Level 4 access.
- */
-#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
-
-/* PUD - Level3 access */
+/* PGD - Level 4 access */
-/* PMD - Level 2 access */
+/* PUD - Level 3 access */
-/* PTE - Level 1 access. */
+/* PMD - Level 2 access */
-/* x86-64 always has all page tables mapped. */
-#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_unmap(pte) ((void)(pte))/* NOP */
+/* PTE - Level 1 access */
/*
* Encode and de-code a swap entry
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@@ -197,9 +193,17 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
* erratum where they can be incorrectly set by hardware on
* non-present PTEs.
*
+ * SD Bits 1-4 are not used in non-present format and available for
+ * special use described below:
+ *
* SD (1) in swp entry is used to store soft dirty bit, which helps us
* remember soft dirty over page migration
*
+ * F (2) in swp entry is used to record when a pagetable is
+ * writeprotected by userfaultfd WP support.
+ *
+ * E (3) in swp entry is used to remember PG_anon_exclusive.
+ *
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
* but also L and G.
*
@@ -232,18 +236,14 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
-#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
-#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
+#define __swp_entry_to_pte(x) (__pte((x).val))
+#define __swp_entry_to_pmd(x) (__pmd((x).val))
-extern int kern_addr_valid(unsigned long addr);
extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define pgtable_cache_init() do { } while (0)
-#define check_pgt_cache() do { } while (0)
-
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
@@ -259,15 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
- int write)
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
- unsigned long len, end;
-
- len = (unsigned long)nr_pages << PAGE_SHIFT;
- end = start + len;
- if (end < start)
- return false;
if (end >> __VIRTUAL_MASK_SHIFT)
return false;
return true;
@@ -275,5 +268,26 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
#include <asm/pgtable-invert.h>
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
+
+#define l4_index(x) (((x) >> 39) & 511)
+#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 88bca456da99..7eb61ef6a185 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -4,7 +4,7 @@
#include <asm/sparsemem.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/kaslr.h>
@@ -19,8 +19,8 @@ typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
-#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
#ifdef USE_EARLY_PGTABLE_L5
@@ -36,18 +36,13 @@ static inline bool pgtable_l5_enabled(void)
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
-#else
-#define pgtable_l5_enabled() 0
-#endif /* CONFIG_X86_5LEVEL */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
-#endif /* !__ASSEMBLY__ */
-
-#define SHARED_KERNEL_PMD 0
-
-#ifdef CONFIG_X86_5LEVEL
+#endif /* !__ASSEMBLER__ */
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
@@ -66,17 +61,6 @@ extern unsigned int ptrs_per_p4d;
#define MAX_POSSIBLE_PHYSMEM_BITS 52
-#else /* CONFIG_X86_5LEVEL */
-
-/*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
-#define MAX_PTRS_PER_P4D 1
-
-#endif /* CONFIG_X86_5LEVEL */
-
/*
* 3rd level page
*/
@@ -103,7 +87,7 @@ extern unsigned int ptrs_per_p4d;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/*
- * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ * See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
@@ -129,21 +113,68 @@ extern unsigned int ptrs_per_p4d;
#define __VMEMMAP_BASE_L4 0xffffea0000000000UL
#define __VMEMMAP_BASE_L5 0xffd4000000000000UL
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
+
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end
+#endif
+
+/*
+ * End of the region for which vmalloc page tables are pre-allocated.
+ * For non-KMSAN builds, this is the same as VMALLOC_END.
+ * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than
+ * VMALLOC_START..VMALLOC_END (see below).
+ */
+#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+
+#ifndef CONFIG_KMSAN
+#define VMALLOC_END VMEMORY_END
#else
-# define VMALLOC_START __VMALLOC_BASE_L4
-# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
-# define VMEMMAP_START __VMEMMAP_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+/*
+ * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4
+ * are used to keep the metadata for virtual pages. The memory formerly
+ * belonging to vmalloc area is now laid out as follows:
+ *
+ * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area
+ * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to
+ * VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow
+ * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to
+ * VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins
+ * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START
+ * - shadow for modules,
+ * KMSAN_MODULES_ORIGIN_START to
+ * KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
+ */
+#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
-#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+/*
+ * vmalloc metadata addresses are calculated by adding shadow/origin offsets
+ * to vmalloc address.
+ */
+#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE
+#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1)
+
+#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET)
+#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET)
+
+/*
+ * The shadow/origin for modules are placed one by one in the last 1/4 of
+ * vmalloc space.
+ */
+#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1)
+#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#endif /* CONFIG_KMSAN */
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
-#define MODULES_END _AC(0xffffffffff000000, UL)
+#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+# define MODULES_END _AC(0xffffffffff000000, UL)
+#else
+# define MODULES_END _AC(0xfffffffffe000000, UL)
+#endif
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
@@ -159,4 +190,9 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
+/*
+ * We borrow bit 3 to remember PG_anon_exclusive.
+ */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PWT
+
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..4f056fb88174
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \
+ (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \
+ CPU_ENTRY_AREA_BASE)
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE
+#endif
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d6ff0bbdb394..2ec250ba467e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -7,8 +7,6 @@
#include <asm/page_types.h>
-#define FIRST_USER_ADDRESS 0UL
-
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
@@ -23,7 +21,8 @@
#define _PAGE_BIT_SOFTW2 10 /* " */
#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
-#define _PAGE_BIT_SOFTW4 58 /* available for programmer */
+#define _PAGE_BIT_SOFTW4 57 /* available for programmer */
+#define _PAGE_BIT_SOFTW5 58 /* available for programmer */
#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */
#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */
#define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */
@@ -32,8 +31,18 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
-#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
+#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
+
+#ifdef CONFIG_X86_64
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
+#else
+/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
+#endif
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
@@ -55,6 +64,7 @@
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)
@@ -100,21 +110,37 @@
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
+#define _PAGE_SWP_UFFD_WP _PAGE_USER
+#else
+#define _PAGE_UFFD_WP (_AT(pteval_t, 0))
+#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0))
+#endif
+
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
-#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
-#define __HAVE_ARCH_PTE_DEVMAP
+#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
#else
#define _PAGE_NX (_AT(pteval_t, 0))
-#define _PAGE_DEVMAP (_AT(pteval_t, 0))
+#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
#endif
+/*
+ * The hardware requires shadow stack to be Write=0,Dirty=1. However,
+ * there are valid cases where the kernel might create read-only PTEs that
+ * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
+ * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
+ * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
+ * (Write=0,SavedDirty=1,Dirty=0) set.
+ */
+#define _PAGE_SAVED_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)
+
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
/*
* Set of bits not changed in pte_modify. The pte's
@@ -122,10 +148,12 @@
* instance, and is *not* included in this mask since
* pte_modify() does modify it.
*/
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+ _PAGE_SPECIAL | _PAGE_ACCESSED | \
+ _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
+ _PAGE_CC | _PAGE_UFFD_WP)
+#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
+#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
/*
* The cache modes defined here are used to translate between pure SW usage
@@ -135,103 +163,102 @@
* to have the WB mode at index 0 (all bits clear). This is the default
* right now and likely would break too much if changed.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
enum page_cache_mode {
- _PAGE_CACHE_MODE_WB = 0,
- _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
- _PAGE_CACHE_MODE_UC = 3,
- _PAGE_CACHE_MODE_WT = 4,
- _PAGE_CACHE_MODE_WP = 5,
- _PAGE_CACHE_MODE_NUM = 8
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+
+ _PAGE_CACHE_MODE_NUM = 8
};
#endif
-#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
+#define _PAGE_CC (_AT(pteval_t, cc_get_mask()))
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+
+#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
+#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)
+
#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
-
-#ifndef __ASSEMBLY__
-
-#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
-
-#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
-
-#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
-
-#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
-
-#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
-
-#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
-
-#endif /* __ASSEMBLY__ */
-
-/* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pg(x) __pgprot(x)
+
+#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
+#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+
+/*
+ * Page tables needs to have Write=1 in order for any lower PTEs to be
+ * writable. This includes shadow stack memory (Write=0, Dirty=1)
+ */
+#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
+#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
+#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
+#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
+
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G)
+#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G)
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G)
+#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO __PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
+
+
+#ifndef __ASSEMBLER__
+
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
+
+#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
+
+#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
+#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
+#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
+#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
+#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
+
+#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
+
+#endif /* __ASSEMBLER__ */
/*
* early identity mapping pte attrib macros.
@@ -250,7 +277,7 @@ enum page_cache_mode {
# include <asm/pgtable_64_types.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -267,6 +294,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
+static inline pgprot_t pgprot_nx(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) | _PAGE_NX);
+}
+#define pgprot_nx pgprot_nx
+
#ifdef CONFIG_X86_PAE
/*
@@ -358,11 +391,9 @@ static inline pudval_t native_pud_val(pud_t pud)
#endif
#if CONFIG_PGTABLE_LEVELS > 2
-typedef struct { pmdval_t pmd; } pmd_t;
-
static inline pmd_t native_make_pmd(pmdval_t val)
{
- return (pmd_t) { val };
+ return (pmd_t) { .pmd = val };
}
static inline pmdval_t native_pmd_val(pmd_t pmd)
@@ -450,12 +481,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
-extern uint8_t __pte2cachemode_tbl[8];
-
#define __pte2cm_idx(cb) \
((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \
(((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \
@@ -465,43 +490,26 @@ extern uint8_t __pte2cachemode_tbl[8];
(((i) & 2) << (_PAGE_BIT_PCD - 1)) | \
(((i) & 1) << _PAGE_BIT_PWT))
-static inline unsigned long cachemode2protval(enum page_cache_mode pcm)
+unsigned long cachemode2protval(enum page_cache_mode pcm);
+
+static inline pgprotval_t protval_4k_2_large(pgprotval_t val)
{
- if (likely(pcm == 0))
- return 0;
- return __cachemode2pte_tbl[pcm];
+ return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
}
-static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
+static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
{
- return __pgprot(cachemode2protval(pcm));
+ return __pgprot(protval_4k_2_large(pgprot_val(pgprot)));
}
-static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
+static inline pgprotval_t protval_large_2_4k(pgprotval_t val)
{
- unsigned long masked;
-
- masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
- if (likely(masked == 0))
- return 0;
- return __pte2cachemode_tbl[__pte2cm_idx(masked)];
-}
-static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
-{
- pgprotval_t val = pgprot_val(pgprot);
- pgprot_t new;
-
- pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
- ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
- return new;
+ return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT_LARGE) >>
+ (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
}
static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
{
- pgprotval_t val = pgprot_val(pgprot);
- pgprot_t new;
-
- pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
- ((val & _PAGE_PAT_LARGE) >>
- (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
- return new;
+ return __pgprot(protval_large_2_4k(pgprot_val(pgprot)));
}
@@ -509,8 +517,6 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern pteval_t __default_kernel_pte_mask;
-extern void set_nx(void);
-extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
@@ -535,15 +541,13 @@ extern void native_pagetable_init(void);
#define native_pagetable_init paging_init
#endif
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
+ PG_LEVEL_256T,
PG_LEVEL_NUM
};
@@ -562,6 +566,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
@@ -570,6 +576,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
unsigned long page_flags);
extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
unsigned long numpages);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 19b137f1b3be..2e6c04d8a45b 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,16 +2,19 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
-#define ARCH_DEFAULT_PKEY 0
-
-#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
+/*
+ * If more than 16 keys are ever supported, a thorough audit
+ * will be necessary to ensure that the types that store key
+ * numbers and masks have sufficient capacity.
+ */
+#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
static inline bool arch_pkeys_enabled(void)
{
- return boot_cpu_has(X86_FEATURE_OSPKE);
+ return cpu_feature_enabled(X86_FEATURE_OSPKE);
}
/*
@@ -21,7 +24,7 @@ static inline bool arch_pkeys_enabled(void)
extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
- if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm);
@@ -32,15 +35,12 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
int prot, int pkey)
{
- if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return 0;
return __arch_override_mprotect_pkey(vma, prot, pkey);
}
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
@@ -115,12 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
return 0;
}
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-extern void copy_init_pkru_to_fpregs(void);
-
static inline int vma_pkey(struct vm_area_struct *vma)
{
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
new file mode 100644
index 000000000000..74f0a2d34ffd
--- /dev/null
+++ b/arch/x86/include/asm/pkru.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_PKRU_H
+#define _ASM_X86_PKRU_H
+
+#include <asm/cpufeature.h>
+
+#define PKRU_AD_BIT 0x1u
+#define PKRU_WD_BIT 0x2u
+#define PKRU_BITS_PER_PKEY 2
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+extern u32 init_pkru_value;
+#define pkru_get_init_value() READ_ONCE(init_pkru_value)
+#else
+#define init_pkru_value 0
+#define pkru_get_init_value() 0
+#endif
+
+static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
+{
+ int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
+ return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
+}
+
+static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
+{
+ int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
+ /*
+ * Access-disable disables writes too so we need to check
+ * both bits here.
+ */
+ return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
+}
+
+static inline u32 read_pkru(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return rdpkru();
+ return 0;
+}
+
+static inline void write_pkru(u32 pkru)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
+ /*
+ * WRPKRU is relatively expensive compared to RDPKRU.
+ * Avoid WRPKRU when it would not change the value.
+ */
+ if (pkru != rdpkru())
+ wrpkru(pkru);
+}
+
+static inline void pkru_write_default(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
+
+ wrpkru(pkru_get_init_value());
+}
+
+#endif
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
index 059823bb8af7..40f92270515b 100644
--- a/arch/x86/include/asm/platform_sst_audio.h
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* platform_sst_audio.h: sst audio platform data header file
*
@@ -5,17 +6,10 @@
* Author: Jeeja KP <jeeja.kp@intel.com>
* Omair Mohammed Abdullah <omair.m.abdullah@intel.com>
* Vinod Koul ,vinod.koul@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _PLATFORM_SST_AUDIO_H_
#define _PLATFORM_SST_AUDIO_H_
-#include <linux/sfi.h>
-
#define MAX_NUM_STREAMS_MRFLD 25
#define MAX_NUM_STREAMS MAX_NUM_STREAMS_MRFLD
diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
new file mode 100644
index 000000000000..a5f761fbf45b
--- /dev/null
+++ b/arch/x86/include/asm/posted_intr.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_POSTED_INTR_H
+#define _X86_POSTED_INTR_H
+
+#include <asm/cmpxchg.h>
+#include <asm/rwonce.h>
+#include <asm/irq_vectors.h>
+
+#include <linux/bitmap.h>
+
+#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
+#define PID_TABLE_ENTRY_VALID 1
+
+#define NR_PIR_VECTORS 256
+#define NR_PIR_WORDS (NR_PIR_VECTORS / BITS_PER_LONG)
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+ unsigned long pir[NR_PIR_WORDS]; /* Posted interrupt requested */
+ union {
+ struct {
+ u16 notifications; /* Suppress and outstanding bits */
+ u8 nv;
+ u8 rsvd_2;
+ u32 ndst;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
+} __aligned(64);
+
+/*
+ * De-multiplexing posted interrupts is on the performance path, the code
+ * below is written to optimize the cache performance based on the following
+ * considerations:
+ * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
+ * accessed by both CPU and IOMMU.
+ * 2.During software processing of posted interrupts, the CPU needs to do
+ * natural width read and xchg for checking and clearing posted interrupt
+ * request (PIR), a 256 bit field within the PID.
+ * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
+ * line when posting interrupts and setting control bits.
+ * 4.The CPU can access the cache line a magnitude faster than the IOMMU.
+ * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
+ * cache line. The cache line states after each operation are as follows,
+ * assuming a 64-bit kernel:
+ * CPU IOMMU PID Cache line state
+ * ---------------------------------------------------------------
+ *...read64 exclusive
+ *...lock xchg64 modified
+ *... post/atomic swap invalid
+ *...-------------------------------------------------------------
+ *
+ * To reduce L1 data cache miss, it is important to avoid contention with
+ * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
+ * when processing posted interrupts in software, e.g. to dispatch interrupt
+ * handlers for posted MSIs, or to move interrupts from the PIR to the vIRR
+ * in KVM.
+ *
+ * In addition, the code is trying to keep the cache line state consistent
+ * as much as possible. e.g. when making a copy and clearing the PIR
+ * (assuming non-zero PIR bits are present in the entire PIR), it does:
+ * read, read, read, read, xchg, xchg, xchg, xchg
+ * instead of:
+ * read, xchg, read, xchg, read, xchg, read, xchg
+ */
+static __always_inline bool pi_harvest_pir(unsigned long *pir,
+ unsigned long *pir_vals)
+{
+ unsigned long pending = 0;
+ int i;
+
+ for (i = 0; i < NR_PIR_WORDS; i++) {
+ pir_vals[i] = READ_ONCE(pir[i]);
+ pending |= pir_vals[i];
+ }
+
+ if (!pending)
+ return false;
+
+ for (i = 0; i < NR_PIR_WORDS; i++) {
+ if (!pir_vals[i])
+ continue;
+
+ pir_vals[i] = arch_xchg(&pir[i], 0);
+ }
+
+ return true;
+}
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(vector, pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty(pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
+#ifdef CONFIG_X86_POSTED_MSI
+/*
+ * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
+ * own interrupts. Here we do not distinguish them since those vector bits in
+ * PIR will always be zero.
+ */
+static inline bool pi_pending_this_cpu(unsigned int vector)
+{
+ struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
+
+ if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
+ return false;
+
+ return test_bit(vector, pid->pir);
+}
+
+extern void intel_posted_msi_init(void);
+#else
+static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
+
+static inline void intel_posted_msi_init(void) {};
+#endif /* X86_POSTED_MSI */
+
+#endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..578441db09f0 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,9 +4,10 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
-#include <linux/thread_info.h>
-DECLARE_PER_CPU(int, __preempt_count);
+#include <linux/static_call_types.h>
+
+DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count);
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@@ -30,11 +31,11 @@ static __always_inline void preempt_count_set(int pc)
{
int old, new;
+ old = raw_cpu_read_4(__preempt_count);
do {
- old = raw_cpu_read_4(__preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
- } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
+ } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new));
}
/*
@@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+ per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
@@ -91,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e,
+ __percpu_arg([var]));
}
/*
@@ -102,17 +104,48 @@ static __always_inline bool should_resched(int preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
-#ifdef CONFIG_PREEMPT
- extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
- asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
+#ifdef CONFIG_PREEMPTION
+
+extern asmlinkage void preempt_schedule(void);
+extern asmlinkage void preempt_schedule_thunk(void);
+
+#define preempt_schedule_dynamic_enabled preempt_schedule_thunk
+#define preempt_schedule_dynamic_disabled NULL
+
+extern asmlinkage void preempt_schedule_notrace(void);
+extern asmlinkage void preempt_schedule_notrace_thunk(void);
+
+#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_disabled NULL
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
+
+#define __preempt_schedule() \
+do { \
+ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \
+ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
+
+#define __preempt_schedule_notrace() \
+do { \
+ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \
+ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+#else /* PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() \
+ asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT);
+
+#define __preempt_schedule_notrace() \
+ asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT);
- extern asmlinkage void preempt_schedule(void);
- extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
- asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+#endif /* PREEMPT_DYNAMIC */
- extern asmlinkage void preempt_schedule_notrace(void);
-#endif
+#endif /* PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h
index aaedd73ea2c6..efe3e46e454b 100644
--- a/arch/x86/include/asm/processor-cyrix.h
+++ b/arch/x86/include/asm/processor-cyrix.h
@@ -3,37 +3,16 @@
* NSC/Cyrix CPU indexed register access. Must be inlined instead of
* macros to ensure correct access ordering
* Access order is always 0x22 (=offset), 0x23 (=value)
- *
- * When using the old macros a line like
- * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
- * gets expanded to:
- * do {
- * outb((CX86_CCR2), 0x22);
- * outb((({
- * outb((CX86_CCR2), 0x22);
- * inb(0x23);
- * }) | 0x88), 0x23);
- * } while (0);
- *
- * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23).
*/
+#include <asm/pc-conf-reg.h>
+
static inline u8 getCx86(u8 reg)
{
- outb(reg, 0x22);
- return inb(0x23);
+ return pc_conf_get(reg);
}
static inline void setCx86(u8 reg, u8 data)
{
- outb(reg, 0x22);
- outb(data, 0x23);
+ pc_conf_set(reg, data);
}
-
-#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); })
-
-#define setCx86_old(reg, data) do { \
- outb((reg), 0x22); \
- outb((data), 0x23); \
-} while (0)
-
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 02c2cbda4a74..e5f204b9b33d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -28,6 +28,8 @@
* On systems with SME, one bit (in a variable position!) is stolen to indicate
* that the top-level paging structure is encrypted.
*
+ * On systemms with LAM, bits 61 and 62 are used to indicate LAM mode.
+ *
* All of the remaining bits indicate the physical address of the top-level
* paging structure.
*
@@ -35,7 +37,7 @@
*/
#ifdef CONFIG_X86_64
/* Mask off the address space ID and SME encryption bits. */
-#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)
@@ -49,7 +51,7 @@
#define CR3_NOFLUSH 0
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..a24c7805acdb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -15,15 +16,18 @@ struct vm86;
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeatures.h>
+#include <asm/cpuid/api.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
-#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
+#include <asm/vdso/processor.h>
+#include <asm/shstk.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -56,86 +60,135 @@ struct vm86;
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
-enum tlb_infos {
- ENTRIES,
- NR_INFO
-};
-
-extern u16 __read_mostly tlb_lli_4k[NR_INFO];
-extern u16 __read_mostly tlb_lli_2m[NR_INFO];
-extern u16 __read_mostly tlb_lli_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4k[NR_INFO];
-extern u16 __read_mostly tlb_lld_2m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+extern u16 __read_mostly tlb_lli_4k;
+extern u16 __read_mostly tlb_lli_2m;
+extern u16 __read_mostly tlb_lli_4m;
+extern u16 __read_mostly tlb_lld_4k;
+extern u16 __read_mostly tlb_lld_2m;
+extern u16 __read_mostly tlb_lld_4m;
+extern u16 __read_mostly tlb_lld_1g;
/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head_32.S, so think twice
- * before touching them. [mj]
+ * CPU type and hardware bug flags. Kept separately for each CPU.
*/
+struct cpuinfo_topology {
+ // Real APIC ID read from the local APIC
+ u32 apicid;
+ // The initial APIC ID provided by CPUID
+ u32 initial_apicid;
+
+ // Physical package ID
+ u32 pkg_id;
+
+ // Physical die ID on AMD, Relative on Intel
+ u32 die_id;
+
+ // Compute unit ID - AMD specific
+ u32 cu_id;
+
+ // Core ID relative to the package
+ u32 core_id;
+
+ // Logical ID mappings
+ u32 logical_pkg_id;
+ u32 logical_die_id;
+ u32 logical_core_id;
+
+ // AMD Node ID and Nodes per Package info
+ u32 amd_node_id;
+
+ // Cache level topology IDs
+ u32 llc_id;
+ u32 l2c_id;
+
+ // Hardware defined CPU-type
+ union {
+ u32 cpu_type;
+ struct {
+ // CPUID.1A.EAX[23-0]
+ u32 intel_native_model_id :24;
+ // CPUID.1A.EAX[31-24]
+ u32 intel_type :8;
+ };
+ struct {
+ // CPUID 0x80000026.EBX
+ u32 amd_num_processors :16,
+ amd_power_eff_ranking :8,
+ amd_native_model_id :4,
+ amd_type :4;
+ };
+ };
+};
+
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
+ union {
+ /*
+ * The particular ordering (low-to-high) of (vendor,
+ * family, model) is done in case range of models, like
+ * it is usually done on AMD, need to be compared.
+ */
+ struct {
+ __u8 x86_model;
+ /* CPU family */
+ __u8 x86;
+ /* CPU vendor */
+ __u8 x86_vendor;
+ __u8 x86_reserved;
+ };
+ /* combined vendor, family, model */
+ __u32 x86_vfm;
+ };
__u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+ __u32 vmx_capability[NVMXINTS];
+#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
- __u8 cu_id;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
+ /*
+ * Align to size of unsigned long because the x86_capability array
+ * is passed to bitops which require the alignment. Use unnamed
+ * union to enforce the array is aligned to size of unsigned long.
+ */
+ union {
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ unsigned long x86_capability_alignment;
+ };
char x86_vendor_id[16];
char x86_model_id[64];
+ struct cpuinfo_topology topo;
/* in KB - valid for CPUS which support this call: */
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
+ /* Cache QoS architectural values, valid only on the BSP: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
- u16 apicid;
- u16 initial_apicid;
+ /* protected processor identification number */
+ u64 ppin;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
- /* Physical processor id: */
- u16 phys_proc_id;
- /* Logical processor id: */
- u16 logical_proc_id;
- /* Core id: */
- u16 cpu_core_id;
/* Index into per_cpu list: */
u16 cpu_index;
+ /* Is SMT active on this core? */
+ bool smt_active;
u32 microcode;
/* Address space bits used by the cache internally */
u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
-struct cpuid_regs {
- u32 eax, ebx, ecx, edx;
-};
-
-enum cpuid_regs_idx {
- CPUID_EAX = 0,
- CPUID_EBX,
- CPUID_ECX,
- CPUID_EDX,
-};
-
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
@@ -144,7 +197,9 @@ enum cpuid_regs_idx {
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_VORTEX 11
+#define X86_VENDOR_NUM 12
#define X86_VENDOR_UNKNOWN 0xff
@@ -154,17 +209,11 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
-#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#else
-#define cpu_info boot_cpu_data
-#define cpu_data(cpu) boot_cpu_data
-#endif
extern const struct seq_operations cpuinfo_op;
@@ -177,51 +226,13 @@ static inline unsigned long long l1tf_pfn_limit(void)
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
-extern void identify_boot_cpu(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void identify_secondary_cpu(unsigned int cpu);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
-#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
-#else
-static inline int have_cpuid_p(void)
-{
- return 1;
-}
-#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-#define native_cpuid_reg(reg) \
-static inline unsigned int native_cpuid_##reg(unsigned int op) \
-{ \
- unsigned int eax = op, ebx, ecx = 0, edx; \
- \
- native_cpuid(&eax, &ebx, &ecx, &edx); \
- \
- return reg; \
-}
-
-/*
- * Native CPUID functions returning a single datum.
- */
-native_cpuid_reg(eax)
-native_cpuid_reg(ebx)
-native_cpuid_reg(ecx)
-native_cpuid_reg(edx)
-
/*
* Friendlier CR3 helpers.
*/
@@ -297,11 +308,6 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
-
- /*
- * We store cpu_current_top_of_stack in sp1 so it's always accessible.
- * Linux does not use ring 1, so sp1 is not otherwise needed.
- */
u64 sp1;
/*
@@ -325,26 +331,56 @@ struct x86_hw_tss {
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
- unsigned long words[64];
+ char stack[PAGE_SIZE];
};
struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
/*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
*/
- struct x86_hw_tss x86_tss;
+ unsigned int prev_max;
/*
* The extra 1 is there because the CPU will access an
@@ -352,101 +388,63 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+ struct x86_io_bitmap io_bitmap;
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+ char stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
-#ifdef CONFIG_X86_32
-DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
#else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
#endif
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
- unsigned long ist[7];
-};
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override,
+ const_cpu_current_top_of_stack);
#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
- char irq_stack[IRQ_STACK_SIZE];
- /*
- * GCC hardcodes the stack canary as %gs:40. Since the
- * irq_stack is the object at %gs:0, we reserve the bottom
- * 48 bytes of the irq stack for the canary.
- */
- struct {
- char gs_base[40];
- unsigned long stack_canary;
- };
-};
-
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
-
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
}
-DECLARE_PER_CPU(char *, irq_stack_ptr);
-DECLARE_PER_CPU(unsigned int, irq_count);
-extern asmlinkage void ignore_sysret(void);
+extern asmlinkage void entry_SYSCALL32_ignore(void);
-#if IS_ENABLED(CONFIG_KVM)
/* Save actual FS/GS selectors and bases to current->thread */
-void save_fsgs_for_kvm(void);
-#endif
-#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-/*
- * Make sure stack canary segment base is cached-aligned:
- * "For Intel Atom processors, avoid non zero segment base address
- * that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
-struct stack_canary {
- char __pad[20]; /* canary at %gs:20 */
- unsigned long canary;
-};
-DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
-#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
- u32 stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+void current_save_fsgs(void);
#endif /* X86_64 */
-extern unsigned int fpu_kernel_xstate_size;
-extern unsigned int fpu_user_xstate_size;
-
struct perf_event;
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -478,7 +476,7 @@ struct thread_struct {
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
- unsigned long debugreg6;
+ unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7;
/* Fault info: */
@@ -490,58 +488,46 @@ struct thread_struct {
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
- mm_segment_t addr_limit;
+ /*
+ * IOPL. Privilege level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
- unsigned int sig_on_uaccess_err:1;
- unsigned int uaccess_err:1; /* uaccess failed */
+ unsigned int iopl_warn:1;
- /* Floating point and extended processor state */
- struct fpu fpu;
/*
- * WARNING: 'fpu' is dynamically-sized. It *MUST* be at
- * the end.
+ * Protection Keys Register for Userspace. Loaded immediately on
+ * context switch. Store it in thread_struct to avoid a lookup in
+ * the tasks's FPU xstate buffer. This value is only valid when a
+ * task is scheduled out. For 'current' the authoritative source of
+ * PKRU is the hardware itself.
*/
+ u32 pkru;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ unsigned long features;
+ unsigned long features_locked;
+
+ struct thread_shstk shstk;
+#endif
};
-/* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
- unsigned long *size)
-{
- *offset = offsetof(struct thread_struct, fpu.state);
- *size = fpu_kernel_xstate_size;
-}
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
+#endif
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
+ fpu_thread_struct_whitelist(offset, size);
}
static inline void
@@ -550,24 +536,27 @@ native_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
-static inline void native_swapgs(void)
+static __always_inline void native_swapgs(void)
{
#ifdef CONFIG_X86_64
asm volatile("swapgs" ::: "memory");
#endif
}
-static inline unsigned long current_top_of_stack(void)
+static __always_inline unsigned long current_top_of_stack(void)
{
/*
* We can't read directly from tss.sp0: sp0 on x86_32 is special in
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_cpu_current_top_of_stack);
+
return this_cpu_read_stable(cpu_current_top_of_stack);
}
-static inline bool on_thread_stack(void)
+static __always_inline bool on_thread_stack(void)
{
return (unsigned long)(current_top_of_stack() -
current_stack_pointer) < THREAD_SIZE;
@@ -576,162 +565,17 @@ static inline bool on_thread_stack(void)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
-#define __cpuid native_cpuid
static inline void load_sp0(unsigned long sp0)
{
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT_XXL */
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-unsigned long get_wchan(struct task_struct *p);
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return eax;
-}
-
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ebx;
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ecx;
-}
-
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return edx;
-}
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
-{
- asm volatile("rep; nop" ::: "memory");
-}
-
-static __always_inline void cpu_relax(void)
-{
- rep_nop();
-}
-
-/*
- * This function forces the icache and prefetched instruction stream to
- * catch up with reality in two very specific cases:
- *
- * a) Text was modified using one virtual address and is about to be executed
- * from the same physical page at a different virtual address.
- *
- * b) Text was modified on a different CPU, may subsequently be
- * executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in a IPI.
- *
- * If you're calling this for a different reason, you're probably doing
- * it wrong.
- */
-static inline void sync_core(void)
-{
- /*
- * There are quite a few ways to do this. IRET-to-self is nice
- * because it works on every CPU, at any CPL (so it's compatible
- * with paravirtualization), and it never exits to a hypervisor.
- * The only down sides are that it's a bit slow (it seems to be
- * a bit more than 2x slower than the fastest options) and that
- * it unmasks NMIs. The "push %cs" is needed because, in
- * paravirtual environments, __KERNEL_CS may not be a valid CS
- * value when we do IRET directly.
- *
- * In case NMI unmasking or performance ever becomes a problem,
- * the next best option appears to be MOV-to-CR2 and an
- * unconditional jump. That sequence also works on all CPUs,
- * but it will fault at CPL3 (i.e. Xen PV).
- *
- * CPUID is the conventional way, but it's nasty: it doesn't
- * exist on some 486-like CPUs, and it usually exits to a
- * hypervisor.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
- */
-#ifdef CONFIG_X86_32
- asm volatile (
- "pushfl\n\t"
- "pushl %%cs\n\t"
- "pushl $1f\n\t"
- "iret\n\t"
- "1:"
- : ASM_CALL_CONSTRAINT : : "memory");
-#else
- unsigned int tmp;
-
- asm volatile (
- UNWIND_HINT_SAVE
- "mov %%ss, %0\n\t"
- "pushq %q0\n\t"
- "pushq %%rsp\n\t"
- "addq $8, (%%rsp)\n\t"
- "pushfq\n\t"
- "mov %%cs, %0\n\t"
- "pushq %q0\n\t"
- "pushq $1f\n\t"
- "iretq\n\t"
- UNWIND_HINT_RESTORE
- "1:"
- : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
-#endif
-}
+unsigned long __get_wchan(struct task_struct *p);
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
+extern void select_idle_routine(void);
extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
@@ -740,40 +584,18 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
IDLE_POLL};
extern void enable_sep_cpu(void);
-extern int sysenter_setup(void);
-void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
-extern void switch_to_new_gdt(int);
+extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
-extern void load_percpu_segment(int);
extern void cpu_init(void);
-
-static inline unsigned long get_debugctlmsr(void)
-{
- unsigned long debugctlmsr = 0;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return 0;
-#endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
- return debugctlmsr;
-}
-
-static inline void update_debugctlmsr(unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return;
-#endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-}
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
+extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);
@@ -785,13 +607,12 @@ extern char ignore_fpu_irq;
#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 %P1"
+# define BASE_PREFETCH "prefetcht0 %1"
#endif
/*
@@ -802,7 +623,7 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchnta %P1",
+ alternative_input(BASE_PREFETCH, "prefetchnta %1",
X86_FEATURE_XMM,
"m" (*(const char *)x));
}
@@ -812,18 +633,13 @@ static inline void prefetch(const void *x)
* Useful for spinlocks to avoid one state transition in the
* cache coherency protocol:
*/
-static inline void prefetchw(const void *x)
+static __always_inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ alternative_input(BASE_PREFETCH, "prefetchw %1",
X86_FEATURE_3DNOWPREFETCH,
"m" (*(const char *)x));
}
-static inline void spin_lock_prefetch(const void *x)
-{
- prefetchw(x);
-}
-
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
@@ -837,71 +653,18 @@ static inline void spin_lock_prefetch(const void *x)
})
#ifdef CONFIG_X86_32
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET PAGE_OFFSET
-#define TASK_SIZE PAGE_OFFSET
-#define TASK_SIZE_LOW TASK_SIZE
-#define TASK_SIZE_MAX TASK_SIZE
-#define DEFAULT_MAP_WINDOW TASK_SIZE
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
- .addr_limit = KERNEL_DS, \
}
-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-
#else
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
- 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP TASK_SIZE_LOW
-#define STACK_TOP_MAX TASK_SIZE_MAX
+extern unsigned long __top_init_kernel_stack[];
-#define INIT_THREAD { \
- .addr_limit = KERNEL_DS, \
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__top_init_kernel_stack, \
}
-extern unsigned long KSTK_ESP(struct task_struct *task);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -915,6 +678,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
/* Get/set a process' ability to use the timestamp counter instruction */
#define GET_TSC_CTL(adr) get_tsc_mode((adr))
@@ -925,50 +689,36 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
+static inline u32 per_cpu_llc_id(unsigned int cpu)
{
- return -EINVAL;
+ return per_cpu(cpu_info.topo.llc_id, cpu);
}
-static inline int mpx_disable_management(void)
+
+static inline u32 per_cpu_l2c_id(unsigned int cpu)
{
- return -EINVAL;
+ return per_cpu(cpu_info.topo.l2c_id, cpu);
}
-#endif /* CONFIG_X86_INTEL_MPX */
#ifdef CONFIG_CPU_SUP_AMD
-extern u16 amd_get_nb_id(int cpu);
-extern u32 amd_get_nodes_per_socket(void);
-#else
-static inline u16 amd_get_nb_id(int cpu) { return 0; }
-static inline u32 amd_get_nodes_per_socket(void) { return 0; }
-#endif
-
-static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
{
- uint32_t base, eax, signature[3];
-
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
- cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
-
- if (!memcmp(sig, signature, 12) &&
- (leaves == 0 || ((eax - base) >= leaves)))
- return base;
- }
-
- return 0;
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
}
+extern void amd_check_microcode(void);
+#else
+static inline void amd_clear_divider(void) { }
+static inline void amd_check_microcode(void) { }
+#endif
+
extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
void default_idle(void);
#ifdef CONFIG_XEN
@@ -977,12 +727,15 @@ bool xen_set_default_idle(void);
#define xen_set_default_idle 0
#endif
-void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
-void microcode_check(void);
+void __noreturn stop_this_cpu(void *dummy);
+void microcode_check(struct cpuinfo_x86 *prev_info);
+void store_cpu_caps(struct cpuinfo_x86 *info);
+
+DECLARE_PER_CPU(bool, cache_state_incoherent);
enum l1tf_mitigations {
L1TF_MITIGATION_OFF,
+ L1TF_MITIGATION_AUTO,
L1TF_MITIGATION_FLUSH_NOWARN,
L1TF_MITIGATION_FLUSH,
L1TF_MITIGATION_FLUSH_NOSMT,
@@ -992,4 +745,31 @@ enum l1tf_mitigations {
extern enum l1tf_mitigations l1tf_mitigation;
+enum mds_mitigations {
+ MDS_MITIGATION_OFF,
+ MDS_MITIGATION_AUTO,
+ MDS_MITIGATION_FULL,
+ MDS_MITIGATION_VMWERV,
+};
+
+extern bool gds_ucode_mitigated(void);
+
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 1d081ac1cd69..5d0dbab85264 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1,18 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Definitions for Device tree / OpenFirmware handling on X86
*
* based on arch/powerpc/include/asm/prom.h which is
* Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_X86_PROM_H
#define _ASM_X86_PROM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/of.h>
#include <linux/types.h>
@@ -27,15 +23,15 @@ extern int of_ioapic;
extern u64 initial_dtb;
extern void add_dtb(u64 data);
void x86_of_pci_init(void);
-void x86_dtb_init(void);
+void x86_flattree_get_config(void);
#else
static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
-static inline void x86_dtb_init(void) { }
+static inline void x86_flattree_get_config(void) { }
#define of_ioapic 0
#endif
extern char cmd_line[COMMAND_LINE_SIZE];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6e81788a30c1..05224a695872 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -4,13 +4,17 @@
#include <asm/ldt.h>
+struct task_struct;
+
/* misc architecture specific prototypes */
void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
-long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
+void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
#endif
#ifdef CONFIG_X86_32
@@ -24,18 +28,18 @@ void __end_SYSENTER_singlestep_region(void);
void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
-void entry_INT80_compat(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-void xen_entry_INT80_compat(void);
-#endif
+void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
+#else /* !CONFIG_IA32_EMULATION */
+#define entry_SYSCALL_compat NULL
+#define entry_SYSENTER_compat NULL
#endif
void x86_configure_nx(void);
-void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long cpuid_enabled);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 5df09a0b80b8..88d0a1ab1f77 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
extern void pti_finalize(void);
@@ -11,5 +11,5 @@ extern void pti_finalize(void);
static inline void pti_check_boottime_disable(void) { }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..35d062a2e304 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,7 @@
#include <asm/page_types.h>
#include <uapi/asm/ptrace.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
struct pt_regs {
@@ -37,7 +37,10 @@ struct pt_regs {
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
- /* On interrupt, gs and __gsh store the vector number. */
+ /*
+ * On interrupt, gs and __gsh store the vector number. They never
+ * store gs any more.
+ */
unsigned short gs;
unsigned short __gsh;
/* On interrupt, this is the error code. */
@@ -53,18 +56,64 @@ struct pt_regs {
#else /* __i386__ */
+struct fred_cs {
+ /* CS selector */
+ u64 cs : 16,
+ /* Stack level at event time */
+ sl : 2,
+ /* IBT in WAIT_FOR_ENDBRANCH state */
+ wfe : 1,
+ : 45;
+};
+
+struct fred_ss {
+ /* SS selector */
+ u64 ss : 16,
+ /* STI state */
+ sti : 1,
+ /* Set if syscall, sysenter or INT n */
+ swevent : 1,
+ /* Event is NMI type */
+ nmi : 1,
+ : 13,
+ /* Event vector */
+ vector : 8,
+ : 8,
+ /* Event type */
+ type : 4,
+ : 4,
+ /* Event was incident to enclave execution */
+ enclave : 1,
+ /* CPU was in 64-bit mode */
+ l : 1,
+ /*
+ * Nested exception during FRED delivery, not set
+ * for #DF.
+ */
+ nested : 1,
+ : 1,
+ /*
+ * The length of the instruction causing the event.
+ * Only set for INTO, INT1, INT3, INT n, SYSCALL
+ * and SYSENTER. 0 otherwise.
+ */
+ insnlen : 4;
+};
+
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -74,18 +123,50 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ *
+ * A FRED stack frame starts here:
+ * 1) It _always_ includes an error code;
+ *
+ * 2) The return frame for ERET[US] starts here, but
+ * the content of orig_ax is ignored.
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* CS selector */
+ u16 cs;
+ /* The extended 64-bit data slot containing CS */
+ u64 csx;
+ /* The FRED CS extension */
+ struct fred_cs fred_cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* SS selector */
+ u16 ss;
+ /* The extended 64-bit data slot containing SS */
+ u64 ssx;
+ /* The FRED SS extension */
+ struct fred_ss fred_ss;
+ };
+
+ /*
+ * Top of stack on IDT systems, while FRED systems have extra fields
+ * defined above for storing exception related information, e.g. CR2 or
+ * DR6.
+ */
};
#endif /* !__i386__ */
@@ -94,24 +175,24 @@ struct pt_regs {
#include <asm/paravirt_types.h>
#endif
+#include <asm/proto.h>
+
struct cpuinfo_x86;
struct task_struct;
extern unsigned long profile_pc(struct pt_regs *regs);
-#define profile_pc profile_pc
extern unsigned long
convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code);
+extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
-static inline unsigned long regs_return_value(struct pt_regs *regs)
+static __always_inline unsigned long regs_return_value(struct pt_regs *regs)
{
return regs->ax;
}
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->ax = rc;
}
@@ -125,7 +206,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
* On x86_64, vm86 mode is mercifully nonexistent, and we don't need
* the extra check.
*/
-static inline int user_mode(struct pt_regs *regs)
+static __always_inline int user_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
@@ -134,7 +215,7 @@ static inline int user_mode(struct pt_regs *regs)
#endif
}
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return (regs->flags & X86_VM_MASK);
@@ -161,25 +242,77 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+ return !user_mode(regs) || user_64bit_mode(regs);
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
+
+static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+ bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+ regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
+
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETQ_end);
+#ifdef CONFIG_IA32_EMULATION
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+ regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETL_compat_end);
#endif
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+ return ret;
+}
+#endif
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-#endif
-#define GET_IP(regs) ((regs)->ip)
-#define GET_FP(regs) ((regs)->bp)
-#define GET_USP(regs) ((regs)->sp)
+static __always_inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->ip;
+}
-#include <asm-generic/ptrace.h>
+static __always_inline
+void instruction_pointer_set(struct pt_regs *regs, unsigned long val)
+{
+ regs->ip = val;
+}
+
+static __always_inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return regs->bp;
+}
+
+static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ return regs->sp;
+}
+
+static __always_inline
+void user_stack_pointer_set(struct pt_regs *regs, unsigned long val)
+{
+ regs->sp = val;
+}
+
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+{
+ return !(regs->flags & X86_EFLAGS_IF);
+}
/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
@@ -201,14 +334,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
#ifdef CONFIG_X86_32
- /*
- * Traps from the kernel do not save sp and ss.
- * Use the helper function to retrieve sp.
- */
- if (offset == offsetof(struct pt_regs, sp) &&
- regs->cs == __KERNEL_CS)
- return kernel_stack_pointer(regs);
-
/* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +359,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
- return ((addr & ~(THREAD_SIZE - 1)) ==
- (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+ return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}
/**
@@ -249,7 +373,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *addr = (unsigned long *)regs->sp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
@@ -259,7 +383,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs
}
/* To avoid include hell, we can't include uaccess.h */
-extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
/**
* regs_get_kernel_stack_nth() - get Nth entry of the stack
@@ -279,7 +403,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
addr = regs_get_kernel_stack_nth_addr(regs, n);
if (addr) {
- ret = probe_kernel_read(&val, addr, sizeof(val));
+ ret = copy_from_kernel_nofault(&val, addr, sizeof(val));
if (!ret)
return val;
}
@@ -303,8 +427,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
static const unsigned int argument_offs[] = {
#ifdef __i386__
offsetof(struct pt_regs, ax),
- offsetof(struct pt_regs, cx),
offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, cx),
#define NR_REG_ARGUMENTS 3
#else
offsetof(struct pt_regs, di),
@@ -333,27 +457,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
-/*
- * When hitting ptrace_stop(), we cannot return using SYSRET because
- * that does not restore the full CPU state, only a minimal set. The
- * ptracer can change arbitrary register values, which is usually okay
- * because the usual ptrace stops run off the signal delivery path which
- * forces IRET; however, ptrace_event() stops happen in arbitrary places
- * in the kernel and don't force IRET path.
- *
- * So force IRET path after a ptrace stop.
- */
-#define arch_ptrace_stop_needed(code, info) \
-({ \
- force_iret(); \
- false; \
-})
-
struct user_desc;
extern int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info);
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t) (0)
+#endif
+
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 92c34e517da1..2fee5e9f1ccc 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -2,20 +2,10 @@
#ifndef _ASM_X86_PURGATORY_H
#define _ASM_X86_PURGATORY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/purgatory.h>
extern void purgatory(void);
-/*
- * These forward declarations serve two purposes:
- *
- * 1) Make sparse happy when checking arch/purgatory
- * 2) Document that these are required to be global so the symbol
- * lookup in kexec works
- */
-extern unsigned long purgatory_backup_dest;
-extern unsigned long purgatory_backup_src;
-extern unsigned long purgatory_backup_sz;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..6e4f8fae3ce9 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,11 +2,13 @@
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
+struct timespec64;
/* some helper functions for xen and kvm pv clock sources */
u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
void pvclock_set_flags(u8 flags);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -39,7 +41,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
-static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
+static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
{
u64 product;
#ifdef __i386__
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..68da67df304d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+extern bool nopvspin;
#define queued_spin_unlock queued_spin_unlock
/**
@@ -52,6 +53,7 @@ static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
static inline void queued_spin_unlock(struct qspinlock *lock)
{
+ kcsan_release();
pv_queued_spin_unlock(lock);
}
@@ -63,13 +65,30 @@ static inline bool vcpu_is_preempted(long cpu)
#endif
#ifdef CONFIG_PARAVIRT
-DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
-
-void native_pv_lock_init(void) __init;
+/*
+ * virt_spin_lock_key - disables by default the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should keep this key
+ * disabled. Native does not touch the key.
+ *
+ * When in a guest then native_pv_lock_init() enables the key first and
+ * KVM/XEN might conditionally disable it later in the boot process again.
+ */
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ * true - lock has been negotiated, all done;
+ * false - queued_spin_lock_slowpath() will do its thing.
+ */
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
+ int val;
+
if (!static_branch_likely(&virt_spin_lock_key))
return false;
@@ -79,17 +98,17 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
* horrible lock 'holder' preemption issues.
*/
- do {
- while (atomic_read(&lock->val) != 0)
- cpu_relax();
- } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
+ __retry:
+ val = atomic_read(&lock->val);
+
+ if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+ cpu_relax();
+ goto __retry;
+ }
return true;
}
-#else
-static inline void native_pv_lock_init(void)
-{
-}
+
#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 159622ee0674..0a985784be9b 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -2,6 +2,10 @@
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
+#include <asm/ibt.h>
+
+void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked);
+
/*
* For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
* registers. For i386, however, only 1 32-bit register needs to be saved
@@ -10,21 +14,20 @@
*/
#ifdef CONFIG_64BIT
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
-#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
-#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
/*
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
* which combines the registers saving trunk and the body of the following
- * C code:
+ * C code. Note that it puts the code in the .spinlock.text section which
+ * is equivalent to adding __lockfunc in the C code:
*
- * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* {
- * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = _Q_LOCKED_VAL;
*
- * if (likely(lockval == _Q_LOCKED_VAL))
+ * if (try_cmpxchg(&lock->locked, &lockval, 0))
* return;
* pv_queued_spin_unlock_slowpath(lock, lockval);
* }
@@ -34,36 +37,31 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
-asm (".pushsection .text;"
- ".globl " PV_UNLOCK ";"
- ".type " PV_UNLOCK ", @function;"
- ".align 4,0x90;"
- PV_UNLOCK ": "
- FRAME_BEGIN
- "push %rdx;"
- "mov $0x1,%eax;"
- "xor %edx,%edx;"
- LOCK_PREFIX "cmpxchg %dl,(%rdi);"
- "cmp $0x1,%al;"
- "jne .slowpath;"
- "pop %rdx;"
+#define PV_UNLOCK_ASM \
+ FRAME_BEGIN \
+ "push %rdx\n\t" \
+ "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
+ "xor %edx,%edx\n\t" \
+ LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
+ "jne .slowpath\n\t" \
+ "pop %rdx\n\t" \
+ FRAME_END \
+ ASM_RET \
+ ".slowpath:\n\t" \
+ "push %rsi\n\t" \
+ "movzbl %al,%esi\n\t" \
+ "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \
+ "pop %rsi\n\t" \
+ "pop %rdx\n\t" \
FRAME_END
- "ret;"
- ".slowpath: "
- "push %rsi;"
- "movzbl %al,%esi;"
- "call " PV_UNLOCK_SLOWPATH ";"
- "pop %rsi;"
- "pop %rdx;"
- FRAME_END
- "ret;"
- ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
- ".popsection");
+
+DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock,
+ PV_UNLOCK_ASM, .spinlock.text);
#else /* CONFIG_64BIT */
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
#endif /* CONFIG_64BIT */
#endif
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 63b3393bd98e..e406a1e92c63 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -9,20 +9,23 @@
#define TH_FLAGS_SME_ACTIVE_BIT 0
#define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/io.h>
-/* This must match data at realmode.S */
+/* This must match data at realmode/rm/header.S */
struct real_mode_header {
u32 text_start;
u32 ro_end;
/* SMP trampoline */
u32 trampoline_start;
- u32 trampoline_status;
u32 trampoline_header;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ u32 sev_es_trampoline_start;
+#endif
#ifdef CONFIG_X86_64
+ u32 trampoline_start64;
u32 trampoline_pgd;
#endif
/* ACPI S3 wakeup */
@@ -37,7 +40,7 @@ struct real_mode_header {
#endif
};
-/* This must match data at trampoline_32/64.S */
+/* This must match data at realmode/rm/trampoline_{32,64}.S */
struct trampoline_header {
#ifdef CONFIG_X86_32
u32 start;
@@ -49,6 +52,7 @@ struct trampoline_header {
u64 efer;
u32 cr4;
u32 flags;
+ u32 lock;
#endif
};
@@ -56,8 +60,12 @@ extern struct real_mode_header *real_mode_header;
extern unsigned char real_mode_blob_end[];
extern unsigned long initial_code;
-extern unsigned long initial_gs;
extern unsigned long initial_stack;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern unsigned long initial_vc_handler;
+#endif
+
+extern u32 *trampoline_lock;
extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];
@@ -67,9 +75,10 @@ extern unsigned char startup_32_smp[];
extern unsigned char boot_gdt[];
#else
extern unsigned char secondary_startup_64[];
+extern unsigned char secondary_startup_64_no_verify[];
#endif
-static inline size_t real_mode_size_needed(void)
+static __always_inline size_t real_mode_size_needed(void)
{
if (real_mode_header)
return 0; /* already allocated. */
@@ -77,9 +86,15 @@ static inline size_t real_mode_size_needed(void)
return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
}
-void set_real_mode_mem(phys_addr_t mem, size_t size);
+static inline void set_real_mode_mem(phys_addr_t mem)
+{
+ real_mode_header = (struct real_mode_header *) __va(mem);
+}
+
void reserve_real_mode(void);
+void load_trampoline_pgtable(void);
+void init_real_mode(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..ecd58ea9a837 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,8 +25,18 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+typedef void (cpu_emergency_virt_cb)(void);
+#if IS_ENABLED(CONFIG_KVM_X86)
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_X86 */
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
-void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
void run_crash_ipi_callback(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
deleted file mode 100644
index dbaed55c1c24..000000000000
--- a/arch/x86/include/asm/refcount.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef __ASM_X86_REFCOUNT_H
-#define __ASM_X86_REFCOUNT_H
-/*
- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
- * PaX/grsecurity.
- */
-#include <linux/refcount.h>
-#include <asm/bug.h>
-
-/*
- * This is the first portion of the refcount error handling, which lives in
- * .text.unlikely, and is jumped to from the CPU flag check (in the
- * following macros). This saves the refcount value location into CX for
- * the exception handler to use (in mm/extable.c), and then triggers the
- * central refcount exception. The fixup address for the exception points
- * back to the regular execution flow in .text.
- */
-#define _REFCOUNT_EXCEPTION \
- ".pushsection .text..refcount\n" \
- "111:\tlea %[var], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD2 "\n" \
- ASM_UNREACHABLE \
- ".popsection\n" \
- "113:\n" \
- _ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO \
- "js 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO \
- "jz 111f\n\t" \
- REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR \
- "jmp 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-static __always_inline void refcount_add(unsigned int i, refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : "ir" (i)
- : "cc", "cx");
-}
-
-static __always_inline void refcount_inc(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "incl %0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline void refcount_dec(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "decl %0\n\t"
- REFCOUNT_CHECK_LE_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline __must_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
- return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "er", i, "cx");
-}
-
-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
- return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "cx");
-}
-
-static __always_inline __must_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
- int c, result;
-
- c = atomic_read(&(r->refs));
- do {
- if (unlikely(c == 0))
- return false;
-
- result = c + i;
-
- /* Did we try to increment from/to an undesirable state? */
- if (unlikely(c < 0 || c == INT_MAX || result < c)) {
- asm volatile(REFCOUNT_ERROR
- : : [var] "m" (r->refs.counter)
- : "cc", "cx");
- break;
- }
-
- } while (!atomic_try_cmpxchg(&(r->refs), &c, result));
-
- return c != 0;
-}
-
-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
- return refcount_add_not_zero(1, r);
-}
-
-#endif
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
deleted file mode 100644
index 6847d85400a8..000000000000
--- a/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-#ifdef CONFIG_X86_USE_3DNOW
-# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
-#else
-# define NEED_3DNOW 0
-#endif
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
new file mode 100644
index 000000000000..575f8408a9e7
--- /dev/null
+++ b/arch/x86/include/asm/resctrl.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_RESCTRL_H
+#define _ASM_X86_RESCTRL_H
+
+#ifdef CONFIG_X86_CPU_RESCTRL
+
+#include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/resctrl_types.h>
+#include <linux/sched.h>
+
+#include <asm/msr.h>
+
+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed. The index is a software defined value.
+ */
+#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)
+
+/**
+ * struct resctrl_pqr_state - State cache for the PQR MSR
+ * @cur_rmid: The cached Resource Monitoring ID
+ * @cur_closid: The cached Class Of Service ID
+ * @default_rmid: The user assigned Resource Monitoring ID
+ * @default_closid: The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct resctrl_pqr_state {
+ u32 cur_rmid;
+ u32 cur_closid;
+ u32 default_rmid;
+ u32 default_closid;
+};
+
+DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+
+static inline bool resctrl_arch_alloc_capable(void)
+{
+ return rdt_alloc_capable;
+}
+
+static inline void resctrl_arch_enable_alloc(void)
+{
+ static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_alloc(void)
+{
+ static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+static inline bool resctrl_arch_mon_capable(void)
+{
+ return rdt_mon_capable;
+}
+
+static inline void resctrl_arch_enable_mon(void)
+{
+ static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_mon(void)
+{
+ static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+/*
+ * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ * which supports resource control or monitoring and we enable by
+ * mounting the resctrl file system.
+ * - Caches the per cpu CLOSid/RMID values and does the MSR write only
+ * when a task with a different CLOSid/RMID is scheduled in.
+ * - We allocate RMIDs/CLOSids globally in order to keep this as
+ * simple as possible.
+ * Must be called with preemption disabled.
+ */
+static inline void __resctrl_sched_in(struct task_struct *tsk)
+{
+ struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 closid = READ_ONCE(state->default_closid);
+ u32 rmid = READ_ONCE(state->default_rmid);
+ u32 tmp;
+
+ /*
+ * If this task has a closid/rmid assigned, use it.
+ * Else use the closid/rmid assigned to this cpu.
+ */
+ if (static_branch_likely(&rdt_alloc_enable_key)) {
+ tmp = READ_ONCE(tsk->closid);
+ if (tmp)
+ closid = tmp;
+ }
+
+ if (static_branch_likely(&rdt_mon_enable_key)) {
+ tmp = READ_ONCE(tsk->rmid);
+ if (tmp)
+ rmid = tmp;
+ }
+
+ if (closid != state->cur_closid || rmid != state->cur_rmid) {
+ state->cur_closid = closid;
+ state->cur_rmid = rmid;
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
+ }
+}
+
+static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
+{
+ unsigned int scale = boot_cpu_data.x86_cache_occ_scale;
+
+ /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
+ val /= scale;
+ return val * scale;
+}
+
+static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid,
+ u32 rmid)
+{
+ WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid);
+ WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid);
+}
+
+static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
+ u32 closid, u32 rmid)
+{
+ WRITE_ONCE(tsk->closid, closid);
+ WRITE_ONCE(tsk->rmid, rmid);
+}
+
+static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
+{
+ return READ_ONCE(tsk->closid) == closid;
+}
+
+static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
+ u32 rmid)
+{
+ return READ_ONCE(tsk->rmid) == rmid;
+}
+
+static inline void resctrl_arch_sched_in(struct task_struct *tsk)
+{
+ if (static_branch_likely(&rdt_enable_key))
+ __resctrl_sched_in(tsk);
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = X86_RESCTRL_EMPTY_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
+{
+ return rmid;
+}
+
+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
+ enum resctrl_event_id evtid)
+{
+ might_sleep();
+ return NULL;
+}
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
+ enum resctrl_event_id evtid,
+ void *ctx) { }
+
+void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
+#else
+
+static inline void resctrl_arch_sched_in(struct task_struct *tsk) {}
+static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
+
+#endif /* CONFIG_X86_CPU_RESCTRL */
+
+#endif /* _ASM_X86_RESCTRL_H */
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
deleted file mode 100644
index 40ebddde6ac2..000000000000
--- a/arch/x86/include/asm/resctrl_sched.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_RESCTRL_SCHED_H
-#define _ASM_X86_RESCTRL_SCHED_H
-
-#ifdef CONFIG_X86_RESCTRL
-
-#include <linux/sched.h>
-#include <linux/jump_label.h>
-
-#define IA32_PQR_ASSOC 0x0c8f
-
-/**
- * struct resctrl_pqr_state - State cache for the PQR MSR
- * @cur_rmid: The cached Resource Monitoring ID
- * @cur_closid: The cached Class Of Service ID
- * @default_rmid: The user assigned Resource Monitoring ID
- * @default_closid: The user assigned cached Class Of Service ID
- *
- * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them. This also
- * stores the user configured per cpu CLOSID and RMID.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct resctrl_pqr_state {
- u32 cur_rmid;
- u32 cur_closid;
- u32 default_rmid;
- u32 default_closid;
-};
-
-DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
-
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-
-/*
- * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
- *
- * Following considerations are made so that this has minimal impact
- * on scheduler hot path:
- * - This will stay as no-op unless we are running on an Intel SKU
- * which supports resource control or monitoring and we enable by
- * mounting the resctrl file system.
- * - Caches the per cpu CLOSid/RMID values and does the MSR write only
- * when a task with a different CLOSid/RMID is scheduled in.
- * - We allocate RMIDs/CLOSids globally in order to keep this as
- * simple as possible.
- * Must be called with preemption disabled.
- */
-static void __resctrl_sched_in(void)
-{
- struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
- u32 closid = state->default_closid;
- u32 rmid = state->default_rmid;
-
- /*
- * If this task has a closid/rmid assigned, use it.
- * Else use the closid/rmid assigned to this cpu.
- */
- if (static_branch_likely(&rdt_alloc_enable_key)) {
- if (current->closid)
- closid = current->closid;
- }
-
- if (static_branch_likely(&rdt_mon_enable_key)) {
- if (current->rmid)
- rmid = current->rmid;
- }
-
- if (closid != state->cur_closid || rmid != state->cur_rmid) {
- state->cur_closid = closid;
- state->cur_rmid = rmid;
- wrmsr(IA32_PQR_ASSOC, rmid, closid);
- }
-}
-
-static inline void resctrl_sched_in(void)
-{
- if (static_branch_likely(&rdt_enable_key))
- __resctrl_sched_in();
-}
-
-#else
-
-static inline void resctrl_sched_in(void) {}
-
-#endif /* CONFIG_X86_RESCTRL */
-
-#endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h
deleted file mode 100644
index 0a21986d2238..000000000000
--- a/arch/x86/include/asm/rio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Derived from include/asm-x86/mach-summit/mach_mpparse.h
- * and include/asm-x86/mach-default/bios_ebda.h
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- */
-
-#ifndef _ASM_X86_RIO_H
-#define _ASM_X86_RIO_H
-
-#define RIO_TABLE_VERSION 3
-
-struct rio_table_hdr {
- u8 version; /* Version number of this data structure */
- u8 num_scal_dev; /* # of Scalability devices */
- u8 num_rio_dev; /* # of RIO I/O devices */
-} __attribute__((packed));
-
-struct scal_detail {
- u8 node_id; /* Scalability Node ID */
- u32 CBAR; /* Address of 1MB register space */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF = None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port2node; /* Node ID port connected to: 0xFF = None */
- u8 port2port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- u8 node_id; /* RIO Node ID */
- u32 BBAR; /* Address of 1MB register space */
- u8 type; /* Type of device */
- u8 owner_id; /* Node ID of Hurricane that owns this */
- /* node */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF=None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 first_slot; /* Lowest slot number below this Calgary */
- u8 status; /* Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie: */
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- u8 WP_index; /* instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- u8 chassis_num; /* 1 based Chassis number */
-} __attribute__((packed));
-
-enum {
- HURR_SCALABILTY = 0, /* Hurricane Scalability info */
- HURR_RIOIB = 2, /* Hurricane RIOIB info */
- COMPAT_CALGARY = 4, /* Compatibility Calgary */
- ALT_CALGARY = 5, /* Second Planar Calgary */
-};
-
-#endif /* _ASM_X86_RIO_H */
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8a9eba191516..54c8fc430684 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,53 +2,26 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
-#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-#define __RMWcc_CONCAT(a, b) a ## b
-#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
+#include <linux/args.h>
#define __CLOBBERS_MEM(clb...) "memory", ## clb
-#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO)
-
-/* Use asm goto */
-
-#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
-({ \
- bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
- : : [var] "m" (_var), ## __VA_ARGS__ \
- : clobbers : cc_label); \
- if (0) { \
-cc_label: c = true; \
- } \
- c; \
-})
-
-#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
-
-/* Use flags output or a set instruction */
-
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c; \
- asm volatile (fullop CC_SET(cc) \
- : [var] "+m" (_var), CC_OUT(cc) (c) \
+ asm_inline volatile (fullop \
+ : [var] "+m" (_var), "=@cc" #cc (c) \
: __VA_ARGS__ : clobbers); \
c; \
})
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
-
#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
#define GEN_UNARY_RMWcc_3(op, var, cc) \
GEN_UNARY_RMWcc_4(op, var, cc, "%[var]")
-#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X)
+#define GEN_UNARY_RMWcc(X...) CONCATENATE(GEN_UNARY_RMWcc_, COUNT_ARGS(X))(X)
#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \
__GEN_RMWcc(op " %[val], " arg0, var, cc, \
@@ -57,7 +30,7 @@ cc_label: c = true; \
#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \
GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
-#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X)
+#define GEN_BINARY_RMWcc(X...) CONCATENATE(GEN_BINARY_RMWcc_, COUNT_ARGS(X))(X)
#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \
__GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \
diff --git a/arch/x86/include/asm/rqspinlock.h b/arch/x86/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..24a885449ee6
--- /dev/null
+++ b/arch/x86/include/asm/rqspinlock.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_RQSPINLOCK_H
+#define _ASM_X86_RQSPINLOCK_H
+
+#include <asm/paravirt.h>
+
+#ifdef CONFIG_PARAVIRT
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+
+#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled
+static __always_inline bool resilient_virt_spin_lock_enabled(void)
+{
+ return static_branch_likely(&virt_spin_lock_key);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+typedef struct qspinlock rqspinlock_t;
+#else
+typedef struct rqspinlock rqspinlock_t;
+#endif
+extern int resilient_tas_spin_lock(rqspinlock_t *lock);
+
+#define resilient_virt_spin_lock resilient_virt_spin_lock
+static inline int resilient_virt_spin_lock(rqspinlock_t *lock)
+{
+ return resilient_tas_spin_lock(lock);
+}
+
+#endif /* CONFIG_PARAVIRT */
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_X86_RQSPINLOCK_H */
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..e5a13dc8816e
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#ifdef MODULE
+ #error "Cannot use runtime-const infrastructure from modules"
+#endif
+
+#ifdef __ASSEMBLY__
+
+.macro RUNTIME_CONST_PTR sym reg
+ movq $0x0123456789abcdef, %\reg
+ 1:
+ .pushsection runtime_ptr_\sym, "a"
+ .long 1b - 8 - .
+ .popsection
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path) \
-({ \
- struct rw_semaphore* ret; \
- asm volatile("# beginning down_read\n\t" \
- LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
- /* adds 0x00000001 */ \
- " jns 1f\n" \
- " call " slow_path "\n" \
- "1:\n\t" \
- "# ending down_read\n\t" \
- : "+m" (sem->count), "=a" (ret), \
- ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- ____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
- return -EINTR;
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
- long result, tmp;
- asm volatile("# beginning __down_read_trylock\n\t"
- " mov %[count],%[result]\n\t"
- "1:\n\t"
- " mov %[result],%[tmp]\n\t"
- " add %[inc],%[tmp]\n\t"
- " jle 2f\n\t"
- LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
- : [count] "+m" (sem->count), [result] "=&a" (result),
- [tmp] "=&r" (tmp)
- : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
- return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path) \
-({ \
- long tmp; \
- struct rw_semaphore* ret; \
- \
- asm volatile("# beginning down_write\n\t" \
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
- /* adds 0xffff0001, returns the old value */ \
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
- /* was the active mask 0 before? */\
- " jz 1f\n" \
- " call " slow_path "\n" \
- "1:\n" \
- "# ending down_write" \
- : "+m" (sem->count), [tmp] "=d" (tmp), \
- "=a" (ret), ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- ____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
- bool result;
- long tmp0, tmp1;
- asm volatile("# beginning __down_write_trylock\n\t"
- " mov %[count],%[tmp0]\n\t"
- "1:\n\t"
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jnz 2f\n\t"
- " mov %[tmp0],%[tmp1]\n\t"
- " add %[inc],%[tmp1]\n\t"
- LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- CC_SET(e)
- "# ending __down_write_trylock\n\t"
- : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
- [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
- : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory");
- return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 1, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n"
- "# ending __up_read\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_write\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 0xffff0001, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n\t"
- "# ending __up_write\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
- /*
- * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
- * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
- */
- " jns 1f\n\t"
- " call call_rwsem_downgrade_wake\n"
- "1:\n\t"
- "# ending __downgrade_write\n"
- : "+m" (sem->count)
- : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
- : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index 2bd1338de236..42bcd42d70d1 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -9,13 +9,33 @@
#endif
#ifdef CONFIG_COMPAT
-#include <asm/ia32_unistd.h>
+#include <asm/unistd_32_ia32.h>
#define __NR_seccomp_read_32 __NR_ia32_read
#define __NR_seccomp_write_32 __NR_ia32_write
#define __NR_seccomp_exit_32 __NR_ia32_exit
#define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn
#endif
+#ifdef CONFIG_X86_64
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_X86_64
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "x86_64"
+# ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_I386
+# define SECCOMP_ARCH_COMPAT_NR IA32_NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "ia32"
+# endif
+/*
+ * x32 will have __X32_SYSCALL_BIT set in syscall number. We don't support
+ * caching them and they are treated as out of range syscalls, which will
+ * always pass through the BPF filter.
+ */
+#else /* !CONFIG_X86_64 */
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_I386
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "ia32"
+#endif
+
#include <asm-generic/seccomp.h>
#endif /* _ASM_X86_SECCOMP_H */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 8ea1cfdbeabc..30e8ee7006f9 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -5,12 +5,16 @@
#include <asm-generic/sections.h>
#include <asm/extable.h>
+extern char __relocate_kernel_start[], __relocate_kernel_end[];
extern char __brk_base[], __brk_limit[];
-extern struct exception_table_entry __stop___ex_table[];
extern char __end_rodata_aligned[];
#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
#endif
+extern char __end_of_kernel_reserve[];
+
+extern unsigned long _brk_start, _brk_end;
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..f59ae7186940 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -4,6 +4,7 @@
#include <linux/const.h>
#include <asm/alternative.h>
+#include <asm/ibt.h>
/*
* Constructor for a conventional segment GDT (or LDT) entry.
@@ -31,6 +32,18 @@
*/
#define SEGMENT_RPL_MASK 0x3
+/*
+ * When running on Xen PV, the actual privilege level of the kernel is 1,
+ * not 0. Testing the Requested Privilege Level in a segment selector to
+ * determine whether the context is user mode or kernel mode with
+ * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
+ * matches the 0x3 mask.
+ *
+ * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
+ * kernels because privilege level 2 is never used.
+ */
+#define USER_SEGMENT_RPL_MASK 0x2
+
/* User mode is privilege level 3: */
#define USER_RPL 0x3
@@ -43,7 +56,7 @@
#define GDT_ENTRY_INVALID_SEG 0
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64)
/*
* The layout of the per-CPU GDT under Linux:
*
@@ -83,7 +96,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
+ * 28 - VDSO getcpu
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
@@ -106,7 +119,7 @@
#define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
-#define GDT_ENTRY_STACK_CANARY 28
+#define GDT_ENTRY_CPUNODE 28
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
@@ -123,6 +136,7 @@
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __USER32_CS __USER_CS
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
/* segment for calling fn: */
@@ -146,11 +160,7 @@
# define __KERNEL_PERCPU 0
#endif
-#ifdef CONFIG_STACKPROTECTOR
-# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
-#else
-# define __KERNEL_STACK_CANARY 0
-#endif
+#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3)
#else /* 64-bit: */
@@ -204,33 +214,26 @@
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
-#define __USER32_DS __USER_DS
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3)
#endif
-#ifndef CONFIG_PARAVIRT_XXL
-# define get_kernel_rpl() 0
-#endif
-
#define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack: */
-#define EXCEPTION_ERRCODE_MASK 0x00027d00
+#define EXCEPTION_ERRCODE_MASK 0x20027d00
#define GDT_SIZE (GDT_ENTRIES*8)
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
-#ifdef CONFIG_X86_64
-
/* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */
#define VDSO_CPUNODE_BITS 12
#define VDSO_CPUNODE_MASK 0xfff
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Helper functions to store/load CPU and node numbers */
@@ -241,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
{
- unsigned int p;
+ unsigned long p;
/*
* Load CPU and node number from the GDT. LSL is faster than RDTSCP
@@ -251,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[seg],%[p]",
- ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+ alternative_io ("lsl %[seg],%k[p]",
+ "rdpid %[p]",
X86_FEATURE_RDPID,
- [p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+ [p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
if (cpu)
*cpu = (p & VDSO_CPUNODE_MASK);
@@ -262,8 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*node = (p >> VDSO_CPUNODE_BITS);
}
-#endif /* !__ASSEMBLY__ */
-#endif /* CONFIG_X86_64 */
+#endif /* !__ASSEMBLER__ */
#ifdef __KERNEL__
@@ -274,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
* vector has no error code (two bytes), a 'push $vector_number' (two
* bytes), and a jump to the common entry code (up to five bytes).
*/
-#define EARLY_IDT_HANDLER_SIZE 9
+#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE)
/*
* xen_early_idt_handler_array is for Xen pv guests: for each entry in
@@ -282,14 +284,14 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
* pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
* max 8 bytes.
*/
-#define XEN_EARLY_IDT_HANDLER_SIZE 8
+#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
#endif
@@ -306,14 +308,7 @@ do { \
\
asm volatile(" \n" \
"1: movl %k0,%%" #seg " \n" \
- \
- ".section .fixup,\"ax\" \n" \
- "2: xorl %k0,%k0 \n" \
- " jmp 1b \n" \
- ".previous \n" \
- \
- _ASM_EXTABLE(1b, 2b) \
- \
+ _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\
: "+r" (__val) : : "memory"); \
} while (0)
@@ -338,7 +333,7 @@ static inline void __loadsegment_fs(unsigned short value)
"1: movw %0, %%fs \n"
"2: \n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
: : "rm" (value) : "memory");
}
@@ -355,26 +350,7 @@ static inline void __loadsegment_fs(unsigned short value)
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-/*
- * x86-32 user GS accessors:
- */
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_32_LAZY_GS
-# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk) ((tsk)->thread.gs)
-# define lazy_save_gs(v) savesegment(gs, (v))
-# define lazy_load_gs(v) loadsegment(gs, (v))
-# else /* X86_32_LAZY_GS */
-# define get_user_gs(regs) (u16)((regs)->gs)
-# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
-# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
-# define lazy_save_gs(v) do { } while (0)
-# define lazy_load_gs(v) do { } while (0)
-# endif /* X86_32_LAZY_GS */
-#endif /* X86_32 */
-
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 07a25753e85c..61f56cdaccb5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -4,12 +4,16 @@
#include <asm/page.h>
#include <asm-generic/set_memory.h>
+#include <asm/pgtable.h>
+
+#define set_memory_rox set_memory_rox
+int set_memory_rox(unsigned long addr, int numpages);
/*
* The set_memory_* API can be used to change various attributes of a virtual
* address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
+ * Cacheability : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXecutable, NoteXecutable
* Read/Write : ReadOnly, ReadWrite
* Presence : NotPresent
* Encryption : Encrypted, Decrypted
@@ -34,28 +38,28 @@
* The caller is required to take care of these.
*/
+int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot);
int _set_memory_uc(unsigned long addr, int numpages);
int _set_memory_wc(unsigned long addr, int numpages);
int _set_memory_wt(unsigned long addr, int numpages);
int _set_memory_wb(unsigned long addr, int numpages);
int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
+int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
+
+bool set_memory_enc_stop_conversion(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
-int set_memory_np_noalias(unsigned long addr, int numpages);
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
+int set_memory_np_noalias(unsigned long addr, int numpages);
+int set_memory_nonglobal(unsigned long addr, int numpages);
+int set_memory_global(unsigned long addr, int numpages);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
int set_pages_array_wb(struct page **pages, int addrinarray);
/*
@@ -80,55 +84,14 @@ int set_pages_array_wb(struct page **pages, int addrinarray);
int set_pages_uc(struct page *page, int numpages);
int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
int set_pages_ro(struct page *page, int numpages);
int set_pages_rw(struct page *page, int numpages);
-extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-
-#ifdef CONFIG_X86_64
-static inline int set_mce_nospec(unsigned long pfn)
-{
- unsigned long decoy_addr;
- int rc;
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
+bool kernel_page_present(struct page *page);
- /*
- * Mark the linear address as UC to make sure we don't log more
- * errors because of speculative access to the page.
- * We would like to just call:
- * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
- * but doing that would radically increase the odds of a
- * speculative access to the poison page because we'd have
- * the virtual address of the kernel 1:1 mapping sitting
- * around in registers.
- * Instead we get tricky. We create a non-canonical address
- * that looks just like the one we want, but has bit 63 flipped.
- * This relies on set_memory_uc() properly sanitizing any __pa()
- * results with __PHYSICAL_MASK or PTE_PFN_MASK.
- */
- decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-
- rc = set_memory_uc(decoy_addr, 1);
- if (rc)
- pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
- return rc;
-}
-#define set_mce_nospec set_mce_nospec
-
-/* Restore full speculative operation to the pfn. */
-static inline int clear_mce_nospec(unsigned long pfn)
-{
- return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
-}
-#define clear_mce_nospec clear_mce_nospec
-#else
-/*
- * Few people would run a 32-bit kernel on a machine that supports
- * recoverable errors because they have too much memory to boot 32-bit.
- */
-#endif
+extern int kernel_set_to_readonly;
#endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ed8ec011a9fd..914eb32581c7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,7 @@
#include <linux/linkage.h>
#include <asm/page_types.h>
+#include <asm/ibt.h>
#ifdef __i386__
@@ -26,12 +27,12 @@
#define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/cache.h>
+
#include <asm/bootparam.h>
#include <asm/x86_init.h>
-extern u64 relocated_ramdisk;
-
/* Interrupt control for vSMPowered x86_64 systems */
#ifdef CONFIG_X86_64
void vsmp_init(void);
@@ -39,16 +40,22 @@ void vsmp_init(void);
static inline void vsmp_init(void) { }
#endif
+struct pt_regs;
+
void setup_bios_corruption_check(void);
void early_platform_quirks(void);
extern unsigned long saved_video_mode;
+extern unsigned long acpi_realmode_flags;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
-extern int early_make_pgtable(unsigned long address);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
+extern void startup_64_setup_gdt_idt(void);
+extern void startup_64_load_idt(void *vc_handler);
+extern void __pi_startup_64_load_idt(void *vc_handler);
+extern void early_setup_idt(void);
+extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
#ifdef CONFIG_X86_INTEL_MID
extern void x86_intel_mid_early_setup(void);
@@ -62,6 +69,8 @@ extern void x86_ce4100_early_setup(void);
static inline void x86_ce4100_early_setup(void) { }
#endif
+#include <linux/kexec_handover.h>
+
#ifndef _SETUP
#include <asm/espfix.h>
@@ -75,7 +84,17 @@ extern char _text[];
static inline bool kaslr_enabled(void)
{
- return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+ return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) &&
+ !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+ return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
}
static inline unsigned long kaslr_offset(void)
@@ -94,50 +113,51 @@ extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within
- * the file, and somewhat descriptive. The size is in bytes. Must be
- * used at file scope.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk(). All allocated memory will be eventually converted
+ * to memblock. Any leftover unallocated memory will be freed.
*
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to. We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The size is in bytes.
*/
-#define RESERVE_BRK(name,sz) \
- static void __section(.discard.text) __used notrace \
- __brk_reservation_fn_##name##__(void) { \
- asm volatile ( \
- ".pushsection .brk_reservation,\"aw\",@nobits;" \
- ".brk." #name ":" \
- " 1:.skip %c0;" \
- " .size .brk." #name ", . - 1b;" \
- " .popsection" \
- : : "i" (sz)); \
- }
-
-/* Helper for reserving space for arrays of things */
-#define RESERVE_BRK_ARRAY(type, name, entries) \
- type *name; \
- RESERVE_BRK(name, sizeof(type) * entries)
+#define RESERVE_BRK(name, size) \
+ __section(".bss..brk") __aligned(1) __used \
+ static char __brk_##name[size]
extern void probe_roms(void);
+
+void clear_bss(void);
+
#ifdef __i386__
-asmlinkage void __init i386_start_kernel(void);
+asmlinkage void __init __noreturn i386_start_kernel(void);
+void __init mk_early_pgtbl_32(void);
#else
-asmlinkage void __init x86_64_start_kernel(char *real_mode);
-asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
+asmlinkage void __init __noreturn x86_64_start_kernel(char *real_mode);
+asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
+
+#ifdef CONFIG_CMDLINE_BOOL
+extern bool builtin_cmdline_added __ro_after_init;
#else
-#define RESERVE_BRK(name,sz) \
- .pushsection .brk_reservation,"aw",@nobits; \
-.brk.name: \
-1: .skip sz; \
- .size .brk.name,.-1b; \
+#define builtin_cmdline_added 0
+#endif
+
+#else /* __ASSEMBLER__ */
+
+.macro __RESERVE_BRK name, size
+ .pushsection .bss..brk, "aw"
+SYM_DATA_START(__brk_\name)
+ .skip \size
+SYM_DATA_END(__brk_\name)
.popsection
-#endif /* __ASSEMBLY__ */
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
+#endif /* __ASSEMBLER__ */
+
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
new file mode 100644
index 000000000000..7bb16f843c93
--- /dev/null
+++ b/arch/x86/include/asm/setup_data.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SETUP_DATA_H
+#define _ASM_X86_SETUP_DATA_H
+
+#include <uapi/asm/setup_data.h>
+
+#ifndef __ASSEMBLER__
+
+struct pci_setup_rom {
+ struct setup_data data;
+ uint16_t vendor;
+ uint16_t devid;
+ uint64_t pcilen;
+ unsigned long segment;
+ unsigned long bus;
+ unsigned long device;
+ unsigned long function;
+ uint8_t romdata[];
+};
+
+/* kexec external ABI */
+struct efi_setup_data {
+ u64 fw_vendor;
+ u64 __unused;
+ u64 tables;
+ u64 smbios;
+ u64 reserved[8];
+};
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
new file mode 100644
index 000000000000..01a6e4dbe423
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS 0
+#define GHCB_DATA_LOW 12
+#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1)
+
+#define GHCB_DATA(v) \
+ (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)
+
+/* SEV Information Request/Response */
+#define GHCB_MSR_SEV_INFO_RESP 0x001
+#define GHCB_MSR_SEV_INFO_REQ 0x002
+
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
+ /* GHCBData[63:48] */ \
+ ((((_max) & 0xffff) << 48) | \
+ /* GHCBData[47:32] */ \
+ (((_min) & 0xffff) << 32) | \
+ /* GHCBData[31:24] */ \
+ (((_cbit) & 0xff) << 24) | \
+ GHCB_MSR_SEV_INFO_RESP)
+
+#define GHCB_MSR_INFO(v) ((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v) (((v) >> 48) & 0xffff)
+#define GHCB_MSR_PROTO_MIN(v) (((v) >> 32) & 0xffff)
+
+/* CPUID Request/Response */
+#define GHCB_MSR_CPUID_REQ 0x004
+#define GHCB_MSR_CPUID_RESP 0x005
+#define GHCB_MSR_CPUID_FUNC_POS 32
+#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS 32
+#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff
+#define GHCB_MSR_CPUID_REG_POS 30
+#define GHCB_MSR_CPUID_REG_MASK 0x3
+#define GHCB_CPUID_REQ_EAX 0
+#define GHCB_CPUID_REQ_EBX 1
+#define GHCB_CPUID_REQ_ECX 2
+#define GHCB_CPUID_REQ_EDX 3
+#define GHCB_CPUID_REQ(fn, reg) \
+ /* GHCBData[11:0] */ \
+ (GHCB_MSR_CPUID_REQ | \
+ /* GHCBData[31:12] */ \
+ (((unsigned long)(reg) & 0x3) << 30) | \
+ /* GHCBData[63:32] */ \
+ (((unsigned long)fn) << 32))
+
+/* AP Reset Hold */
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0)
+
+/* Preferred GHCB GPA Request */
+#define GHCB_MSR_PREF_GPA_REQ 0x010
+#define GHCB_MSR_GPA_VALUE_POS 12
+#define GHCB_MSR_GPA_VALUE_MASK GENMASK_ULL(51, 0)
+
+#define GHCB_MSR_PREF_GPA_RESP 0x011
+#define GHCB_MSR_PREF_GPA_NONE 0xfffffffffffff
+
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ 0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP 0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ * 0x0001 Page assignment, Private
+ * 0x0002 Page assignment, Shared
+ */
+enum psc_op {
+ SNP_PAGE_STATE_PRIVATE = 1,
+ SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ 0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \
+ /* GHCBData[55:52] */ \
+ (((u64)((op) & 0xf) << 52) | \
+ /* GHCBData[51:12] */ \
+ ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_REQ_TO_GFN(msr) (((msr) & GENMASK_ULL(51, 12)) >> 12)
+#define GHCB_MSR_PSC_REQ_TO_OP(msr) (((msr) & GENMASK_ULL(55, 52)) >> 52)
+
+#define GHCB_MSR_PSC_RESP 0x015
+#define GHCB_MSR_PSC_RESP_VAL(val) \
+ /* GHCBData[63:32] */ \
+ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+
+/* Set highest bit as a generic error response */
+#define GHCB_MSR_PSC_RESP_ERROR (BIT_ULL(63) | GHCB_MSR_PSC_RESP)
+
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ 0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
+ /* GHCBData[39:32] */ \
+ ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \
+ /* GHCBDdata[11:0] */ \
+ GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP 0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v) \
+ /* GHCBData[63:32] */ \
+ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
+/* GHCB Hypervisor Feature Request/Response */
+#define GHCB_MSR_HV_FT_REQ 0x080
+#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_POS 12
+#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0)
+#define GHCB_MSR_HV_FT_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
+
+/*
+ * SNP Page State Change NAE event
+ * The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
+ * is a local stack variable in set_pages_state(). Do not increase this value
+ * without evaluating the impact to stack usage.
+ *
+ * Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
+ * is needed, such as when processing GHCB requests on the hypervisor side.
+ */
+#define VMGEXIT_PSC_MAX_ENTRY 64
+#define VMGEXIT_PSC_MAX_COUNT 253
+
+#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32)
+#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1)
+#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2)
+
+#define VMGEXIT_PSC_OP_PRIVATE 1
+#define VMGEXIT_PSC_OP_SHARED 2
+
+struct psc_hdr {
+ u16 cur_entry;
+ u16 end_entry;
+ u32 reserved;
+} __packed;
+
+struct psc_entry {
+ u64 cur_page : 12,
+ gfn : 40,
+ operation : 4,
+ pagesize : 1,
+ reserved : 7;
+} __packed;
+
+struct snp_psc_desc {
+ struct psc_hdr hdr;
+ struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+#define GHCB_MSR_TERM_REQ 0x100
+#define GHCB_MSR_TERM_REASON_SET_POS 12
+#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
+#define GHCB_MSR_TERM_REASON_POS 16
+#define GHCB_MSR_TERM_REASON_MASK 0xff
+
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \
+ /* GHCBData[15:12] */ \
+ (((((u64)reason_set) & 0xf) << 12) | \
+ /* GHCBData[23:16] */ \
+ ((((u64)reason_val) & 0xff) << 16))
+
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN 0
+#define GHCB_SEV_ES_GEN_REQ 0
+#define GHCB_SEV_ES_PROT_UNSUPPORTED 1
+#define GHCB_SNP_UNSUPPORTED 2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX 1
+#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */
+#define GHCB_TERM_PSC 1 /* Page State Change failure */
+#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
+#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */
+#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */
+#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */
+#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
+#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
+#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
+#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */
+
+#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
+
+/*
+ * GHCB-defined return codes that are communicated back to the guest via
+ * SW_EXITINFO1.
+ */
+#define GHCB_HV_RESP_NO_ACTION 0
+#define GHCB_HV_RESP_ISSUE_EXCEPTION 1
+#define GHCB_HV_RESP_MALFORMED_INPUT 2
+
+/*
+ * GHCB-defined sub-error codes for malformed input (see above) that are
+ * communicated back to the guest via SW_EXITINFO2[31:0].
+ */
+#define GHCB_ERR_NOT_REGISTERED 1
+#define GHCB_ERR_INVALID_USAGE 2
+#define GHCB_ERR_INVALID_SCRATCH_AREA 3
+#define GHCB_ERR_MISSING_INPUT 4
+#define GHCB_ERR_INVALID_INPUT 5
+#define GHCB_ERR_INVALID_EVENT 6
+
+struct sev_config {
+ __u64 debug : 1,
+
+ /*
+ * Indicates when the per-CPU GHCB has been created and registered
+ * and thus can be used by the BSP instead of the early boot GHCB.
+ *
+ * For APs, the per-CPU GHCB is created before they are started
+ * and registered upon startup, so this flag can be used globally
+ * for the BSP and APs.
+ */
+ ghcbs_initialized : 1,
+
+ /*
+ * Indicates when the per-CPU SVSM CA is to be used instead of the
+ * boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ use_cas : 1,
+
+ __reserved : 61;
+};
+
+extern struct sev_config sev_cfg;
+
+#endif
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
new file mode 100644
index 000000000000..c58c47c68ab6
--- /dev/null
+++ b/arch/x86/include/asm/sev-internal.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define DR7_RESET_VALUE 0x400
+
+extern u64 sev_hv_features;
+extern u64 sev_secrets_pa;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+ struct ghcb ghcb_page;
+
+ /*
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+ * It is needed when an NMI happens while the #VC handler uses the real
+ * GHCB, and the NMI handler itself is causing another #VC exception. In
+ * that case the GHCB content of the first handler needs to be backed up
+ * and restored.
+ */
+ struct ghcb backup_ghcb;
+
+ /*
+ * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+ * There is no need for it to be atomic, because nothing is written to
+ * the GHCB between the read and the write of ghcb_active. So it is safe
+ * to use it when a nested #VC exception happens before the write.
+ *
+ * This is necessary for example in the #VC->NMI->#VC case when the NMI
+ * happens while the first #VC handler uses the GHCB. When the NMI code
+ * raises a second #VC handler it might overwrite the contents of the
+ * GHCB written by the first handler. To avoid this the content of the
+ * GHCB is saved and restored when the GHCB is detected to be in use
+ * already.
+ */
+ bool ghcb_active;
+ bool backup_ghcb_active;
+
+ /*
+ * Cached DR7 value - write it on DR7 writes and return it on reads.
+ * That value will never make it to the real hardware DR7 as debugging
+ * is currently unsupported in SEV-ES guests.
+ */
+ unsigned long dr7;
+};
+
+struct ghcb_state {
+ struct ghcb *ghcb;
+};
+
+extern struct svsm_ca boot_svsm_ca_page;
+
+struct ghcb *__sev_get_ghcb(struct ghcb_state *state);
+void __sev_put_ghcb(struct ghcb_state *state);
+
+DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, const struct psc_desc *desc);
+
+DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
+DECLARE_PER_CPU(u64, svsm_caa_pa);
+
+extern u64 boot_svsm_caa_pa;
+
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt);
+void vc_forward_exception(struct es_em_ctxt *ctxt);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ u32 low, high;
+
+ low = (u32)(val);
+ high = (u32)(val >> 32);
+
+ native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
+u64 get_hv_features(void);
+
+const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
new file mode 100644
index 000000000000..0e6c0940100f
--- /dev/null
+++ b/arch/x86/include/asm/sev.h
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#ifndef __ASM_ENCRYPTED_STATE_H
+#define __ASM_ENCRYPTED_STATE_H
+
+#include <linux/types.h>
+#include <linux/sev-guest.h>
+
+#include <asm/insn.h>
+#include <asm/sev-common.h>
+#include <asm/coco.h>
+#include <asm/set_memory.h>
+#include <asm/svm.h>
+
+#define GHCB_PROTOCOL_MIN 1ULL
+#define GHCB_PROTOCOL_MAX 2ULL
+#define GHCB_DEFAULT_USAGE 0ULL
+
+#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
+
+struct boot_params;
+
+enum es_result {
+ ES_OK, /* All good */
+ ES_UNSUPPORTED, /* Requested operation not supported */
+ ES_VMM_ERROR, /* Unexpected state from the VMM */
+ ES_DECODE_FAILED, /* Instruction decoding failed */
+ ES_EXCEPTION, /* Instruction caused exception */
+ ES_RETRY, /* Retry instruction emulation */
+};
+
+struct es_fault_info {
+ unsigned long vector;
+ unsigned long error_code;
+ unsigned long cr2;
+};
+
+struct pt_regs;
+
+/* ES instruction emulation context */
+struct es_em_ctxt {
+ struct pt_regs *regs;
+ struct insn insn;
+ struct es_fault_info fi;
+};
+
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41
+struct cc_blob_sev_info {
+ u32 magic;
+ u16 version;
+ u16 reserved;
+ u64 secrets_phys;
+ u32 secrets_len;
+ u32 rsvd1;
+ u64 cpuid_phys;
+ u32 cpuid_len;
+ u32 rsvd2;
+} __packed;
+
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
+
+static inline u64 lower_bits(u64 val, unsigned int bits)
+{
+ u64 mask = (1ULL << bits) - 1;
+
+ return (val & mask);
+}
+
+struct real_mode_header;
+enum stack_type;
+
+/* Early IDT entry points for #VC handler */
+extern void vc_no_ghcb(void);
+extern void vc_boot_ghcb(void);
+extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+ u32 eax_in;
+ u32 ecx_in;
+ u64 xcr0_in;
+ u64 xss_in;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+ u32 count;
+ u32 __reserved1;
+ u64 __reserved2;
+ struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
+/* PVALIDATE return codes */
+#define PVALIDATE_FAIL_SIZEMISMATCH 6
+
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE 255
+
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP 4
+
+/* PSMASH failed due to concurrent access by another CPU */
+#define PSMASH_FAIL_INUSE 3
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K 0
+#define RMP_PG_SIZE_2M 1
+#define RMP_TO_PG_LEVEL(level) (((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define PG_LEVEL_TO_RMP(level) (((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
+
+struct rmp_state {
+ u64 gpa;
+ u8 assigned;
+ u8 pagesize;
+ u8 immutable;
+ u8 rsvd;
+ u32 asid;
+} __packed;
+
+#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+ unsigned long req_gpa;
+ unsigned long resp_gpa;
+ unsigned long data_gpa;
+ unsigned int data_npages;
+};
+
+#define MAX_AUTHTAG_LEN 32
+#define AUTHTAG_LEN 16
+#define AAD_LEN 48
+#define MSG_HDR_VER 1
+
+#define SNP_REQ_MAX_RETRY_DURATION (60*HZ)
+#define SNP_REQ_RETRY_DELAY (2*HZ)
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+ SNP_MSG_TYPE_INVALID = 0,
+ SNP_MSG_CPUID_REQ,
+ SNP_MSG_CPUID_RSP,
+ SNP_MSG_KEY_REQ,
+ SNP_MSG_KEY_RSP,
+ SNP_MSG_REPORT_REQ,
+ SNP_MSG_REPORT_RSP,
+ SNP_MSG_EXPORT_REQ,
+ SNP_MSG_EXPORT_RSP,
+ SNP_MSG_IMPORT_REQ,
+ SNP_MSG_IMPORT_RSP,
+ SNP_MSG_ABSORB_REQ,
+ SNP_MSG_ABSORB_RSP,
+ SNP_MSG_VMRK_REQ,
+ SNP_MSG_VMRK_RSP,
+
+ SNP_MSG_TSC_INFO_REQ = 17,
+ SNP_MSG_TSC_INFO_RSP,
+
+ SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+ SNP_AEAD_INVALID,
+ SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+ u8 authtag[MAX_AUTHTAG_LEN];
+ u64 msg_seqno;
+ u8 rsvd1[8];
+ u8 algo;
+ u8 hdr_version;
+ u16 hdr_sz;
+ u8 msg_type;
+ u8 msg_version;
+ u16 msg_sz;
+ u32 rsvd2;
+ u8 msg_vmpck;
+ u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+ struct snp_guest_msg_hdr hdr;
+ u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)];
+} __packed;
+
+#define SNP_TSC_INFO_REQ_SZ 128
+
+struct snp_tsc_info_req {
+ u8 rsvd[SNP_TSC_INFO_REQ_SZ];
+} __packed;
+
+struct snp_tsc_info_resp {
+ u32 status;
+ u32 rsvd1;
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u32 tsc_factor;
+ u8 rsvd2[100];
+} __packed;
+
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000)
+
+struct snp_guest_req {
+ void *req_buf;
+ size_t req_sz;
+
+ void *resp_buf;
+ size_t resp_sz;
+
+ u64 exit_code;
+ u64 exitinfo2;
+ unsigned int vmpck_id;
+ u8 msg_version;
+ u8 msg_type;
+
+ struct snp_req_data input;
+ void *certs_data;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+ u32 msg_seqno_0;
+ u32 msg_seqno_1;
+ u32 msg_seqno_2;
+ u32 msg_seqno_3;
+ u64 ap_jump_table_pa;
+ u8 rsvd[40];
+ u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN 32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page {
+ u32 version;
+ u32 imien : 1,
+ rsvd1 : 31;
+ u32 fms;
+ u32 rsvd2;
+ u8 gosvw[16];
+ u8 vmpck0[VMPCK_KEY_LEN];
+ u8 vmpck1[VMPCK_KEY_LEN];
+ u8 vmpck2[VMPCK_KEY_LEN];
+ u8 vmpck3[VMPCK_KEY_LEN];
+ struct secrets_os_area os_area;
+
+ u8 vmsa_tweak_bitmap[64];
+
+ /* SVSM fields */
+ u64 svsm_base;
+ u64 svsm_size;
+ u64 svsm_caa;
+ u32 svsm_max_version;
+ u8 svsm_guest_vmpl;
+ u8 rsvd3[3];
+
+ /* The percentage decrease from nominal to mean TSC frequency. */
+ u32 tsc_factor;
+
+ /* Remainder of page */
+ u8 rsvd4[3740];
+} __packed;
+
+struct snp_msg_desc {
+ /* request and response are in unencrypted memory */
+ struct snp_guest_msg *request, *response;
+
+ /*
+ * Avoid information leakage by double-buffering shared messages
+ * in fields that are in regular encrypted memory.
+ */
+ struct snp_guest_msg secret_request, secret_response;
+
+ struct snp_secrets_page *secrets;
+
+ struct aesgcm_ctx *ctx;
+
+ u32 *os_area_msg_seqno;
+ u8 *vmpck;
+ int vmpck_id;
+};
+
+/*
+ * The SVSM Calling Area (CA) related structures.
+ */
+struct svsm_ca {
+ u8 call_pending;
+ u8 mem_available;
+ u8 rsvd1[6];
+
+ u8 svsm_buffer[PAGE_SIZE - 8];
+};
+
+#define SVSM_SUCCESS 0
+#define SVSM_ERR_INCOMPLETE 0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002
+#define SVSM_ERR_INVALID_ADDRESS 0x80000003
+#define SVSM_ERR_INVALID_FORMAT 0x80000004
+#define SVSM_ERR_INVALID_PARAMETER 0x80000005
+#define SVSM_ERR_INVALID_REQUEST 0x80000006
+#define SVSM_ERR_BUSY 0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+ u64 page_size : 2,
+ action : 1,
+ ignore_cf : 1,
+ rsvd : 8,
+ pfn : 52;
+};
+
+struct svsm_pvalidate_call {
+ u16 num_entries;
+ u16 cur_index;
+
+ u8 rsvd1[4];
+
+ struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \
+ offsetof(struct svsm_pvalidate_call, entry)) / \
+ sizeof(struct svsm_pvalidate_entry))
+
+/*
+ * The SVSM Attestation related structures
+ */
+struct svsm_loc_entry {
+ u64 pa;
+ u32 len;
+ u8 rsvd[4];
+};
+
+struct svsm_attest_call {
+ struct svsm_loc_entry report_buf;
+ struct svsm_loc_entry nonce;
+ struct svsm_loc_entry manifest_buf;
+ struct svsm_loc_entry certificates_buf;
+
+ /* For attesting a single service */
+ u8 service_guid[16];
+ u32 service_manifest_ver;
+ u8 rsvd[4];
+};
+
+/* PTE descriptor used for the prepare_pte_enc() operations. */
+struct pte_enc_desc {
+ pte_t *kpte;
+ int pte_level;
+ bool encrypt;
+ /* pfn of the kpte above */
+ unsigned long pfn;
+ /* physical address of @pfn */
+ unsigned long pa;
+ /* virtual address of @pfn */
+ void *va;
+ /* memory covered by the pte */
+ unsigned long size;
+ pgprot_t new_pgprot;
+};
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+ struct svsm_ca *caa;
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 rax_out;
+ u64 rcx_out;
+ u64 rdx_out;
+ u64 r8_out;
+ u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA 0
+#define SVSM_CORE_PVALIDATE 1
+#define SVSM_CORE_CREATE_VCPU 2
+#define SVSM_CORE_DELETE_VCPU 3
+
+#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x))
+#define SVSM_ATTEST_SERVICES 0
+#define SVSM_ATTEST_SINGLE_SERVICE 1
+
+#define SVSM_VTPM_CALL(x) ((2ULL << 32) | (x))
+#define SVSM_VTPM_QUERY 0
+#define SVSM_VTPM_CMD 1
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern u8 snp_vmpl;
+
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ __sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ __sev_es_ist_exit();
+}
+extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
+extern void __sev_es_nmi_complete(void);
+static __always_inline void sev_es_nmi_complete(void)
+{
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ __sev_es_nmi_complete();
+}
+extern int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);
+
+/*
+ * RMPADJUST modifies the RMP permissions of a page of a lesser-
+ * privileged (numerically higher) VMPL.
+ *
+ * If the guest is running at a higher-privilege than the privilege
+ * level the instruction is targeting, the instruction will succeed,
+ * otherwise, it will fail.
+ */
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+ int rc;
+
+ /* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+ : "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+ : "memory", "cc");
+
+ return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+ bool no_rmpupdate;
+ int rc;
+
+ /* "pvalidate" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+ : "=@ccc"(no_rmpupdate), "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+ : "memory", "cc");
+
+ if (no_rmpupdate)
+ return PVALIDATE_FAIL_NOUPDATE;
+
+ return rc;
+}
+
+void setup_ghcb(void);
+void snp_register_ghcb_early(unsigned long paddr);
+void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
+void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
+void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void snp_dmi_setup(void);
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
+void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
+void sev_show_status(void);
+int prepare_pte_enc(struct pte_enc_desc *d);
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
+void snp_kexec_finish(void);
+void snp_kexec_begin(void);
+
+int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id);
+struct snp_msg_desc *snp_msg_alloc(void);
+void snp_msg_free(struct snp_msg_desc *mdesc);
+int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req);
+
+int snp_svsm_vtpm_send_command(u8 *buffer);
+
+void __init snp_secure_tsc_prepare(void);
+void __init snp_secure_tsc_init(void);
+enum es_result savic_register_gpa(u64 gpa);
+enum es_result savic_unregister_gpa(u64 *gpa);
+u64 savic_ghcb_msr_read(u32 reg);
+void savic_ghcb_msr_write(u32 reg, u64 value);
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+ ghcb->save.sw_exit_code = 0;
+ __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+ u32 fn;
+ u32 subfn;
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+int svsm_perform_msr_protocol(struct svsm_call *call);
+int __pi_svsm_perform_msr_protocol(struct svsm_call *call);
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf);
+
+void svsm_issue_call(struct svsm_call *call, u8 *pending);
+int svsm_process_result_codes(struct svsm_call *call);
+
+void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2);
+
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+
+extern u16 ghcb_version;
+extern struct ghcb *boot_ghcb;
+extern bool sev_snp_needs_sfw;
+
+struct psc_desc {
+ enum psc_op op;
+ struct svsm_ca *ca;
+ u64 caa_pa;
+};
+
+static inline void sev_evict_cache(void *va, int npages)
+{
+ volatile u8 val __always_unused;
+ u8 *bytes = va;
+ int page_idx;
+
+ /*
+ * For SEV guests, a read from the first/last cache-lines of a 4K page
+ * using the guest key is sufficient to cause a flush of all cache-lines
+ * associated with that 4K page without incurring all the overhead of a
+ * full CLFLUSH sequence.
+ */
+ for (page_idx = 0; page_idx < npages; page_idx++) {
+ val = bytes[page_idx * PAGE_SIZE];
+ val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
+ }
+}
+
+#else /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define snp_vmpl 0
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+static inline void sev_es_nmi_complete(void) { }
+static inline int sev_es_efi_map_ghcbs_cas(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_dmi_setup(void) { }
+static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
+{
+ return -ENOTTY;
+}
+static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
+static inline void sev_show_status(void) { }
+static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
+static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
+static inline void snp_kexec_finish(void) { }
+static inline void snp_kexec_begin(void) { }
+static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; }
+static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
+static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
+static inline int snp_send_guest_request(struct snp_msg_desc *mdesc,
+ struct snp_guest_req *req) { return -ENODEV; }
+static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
+static inline void __init snp_secure_tsc_prepare(void) { }
+static inline void __init snp_secure_tsc_init(void) { }
+static inline void sev_evict_cache(void *va, int npages) {}
+static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
+static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; }
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
+int psmash(u64 pfn);
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_shared(u64 pfn, enum pg_level level);
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp);
+void kdump_sev_callback(void);
+void snp_fixup_e820_tables(void);
+static inline void snp_leak_pages(u64 pfn, unsigned int pages)
+{
+ __snp_leak_pages(pfn, pages, true);
+}
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
+static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
+static inline int psmash(u64 pfn) { return -ENODEV; }
+static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid,
+ bool immutable)
+{
+ return -ENODEV;
+}
+static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {}
+static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
+static inline void snp_fixup_e820_tables(void) {}
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
new file mode 100644
index 000000000000..04958459a7ca
--- /dev/null
+++ b/arch/x86/include/asm/sgx.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ *
+ * Intel Software Guard Extensions (SGX) support.
+ */
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/*
+ * This file contains both data structures defined by SGX architecture and Linux
+ * defined software data structures and functions. The two should not be mixed
+ * together for better readability. The architectural definitions come first.
+ */
+
+/* The SGX specific CPUID function. */
+#define SGX_CPUID 0x12
+/* EPC enumeration. */
+#define SGX_CPUID_EPC 2
+/* An invalid EPC section, i.e. the end marker. */
+#define SGX_CPUID_EPC_INVALID 0x0
+/* A valid EPC section. */
+#define SGX_CPUID_EPC_SECTION 0x1
+/* The bitmask for the EPC section type. */
+#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
+
+enum sgx_encls_function {
+ ECREATE = 0x00,
+ EADD = 0x01,
+ EINIT = 0x02,
+ EREMOVE = 0x03,
+ EDGBRD = 0x04,
+ EDGBWR = 0x05,
+ EEXTEND = 0x06,
+ ELDU = 0x08,
+ EBLOCK = 0x09,
+ EPA = 0x0A,
+ EWB = 0x0B,
+ ETRACK = 0x0C,
+ EAUG = 0x0D,
+ EMODPR = 0x0E,
+ EMODT = 0x0F,
+ EUPDATESVN = 0x18,
+};
+
+/**
+ * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
+ *
+ * ENCLS has its own (positive value) error codes and also generates
+ * ENCLS specific #GP and #PF faults. And the ENCLS values get munged
+ * with system error codes as everything percolates back up the stack.
+ * Unfortunately (for us), we need to precisely identify each unique
+ * error code, e.g. the action taken if EWB fails varies based on the
+ * type of fault and on the exact SGX error code, i.e. we can't simply
+ * convert all faults to -EFAULT.
+ *
+ * To make all three error types coexist, we set bit 30 to identify an
+ * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate
+ * between positive (faults and SGX error codes) and negative (system
+ * error codes) values.
+ */
+#define SGX_ENCLS_FAULT_FLAG 0x40000000
+
+/**
+ * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * @SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
+ * @SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
+ * been completed yet.
+ * @SGX_CHILD_PRESENT: SECS has child pages present in the EPC.
+ * @SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
+ * public key does not match IA32_SGXLEPUBKEYHASH.
+ * @SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
+ * is in the PENDING or MODIFIED state.
+ * @SGX_INSUFFICIENT_ENTROPY: Insufficient entropy in RNG.
+ * @SGX_NO_UPDATE: EUPDATESVN could not update the CPUSVN because the
+ * current SVN was not newer than CPUSVN. This is the most
+ * common error code returned by EUPDATESVN.
+ * @SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
+ */
+enum sgx_return_code {
+ SGX_EPC_PAGE_CONFLICT = 7,
+ SGX_NOT_TRACKED = 11,
+ SGX_CHILD_PRESENT = 13,
+ SGX_INVALID_EINITTOKEN = 16,
+ SGX_PAGE_NOT_MODIFIABLE = 20,
+ SGX_INSUFFICIENT_ENTROPY = 29,
+ SGX_NO_UPDATE = 31,
+ SGX_UNMASKED_EVENT = 128,
+};
+
+/* The modulus size for 3072-bit RSA keys. */
+#define SGX_MODULUS_SIZE 384
+
+/**
+ * enum sgx_miscselect - additional information to an SSA frame
+ * @SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame.
+ *
+ * Save State Area (SSA) is a stack inside the enclave used to store processor
+ * state when an exception or interrupt occurs. This enum defines additional
+ * information stored to an SSA frame.
+ */
+enum sgx_miscselect {
+ SGX_MISC_EXINFO = BIT(0),
+};
+
+#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1)
+
+#define SGX_SSA_GPRS_SIZE 184
+#define SGX_SSA_MISC_EXINFO_SIZE 16
+
+/**
+ * enum sgx_attribute - the attributes field in &struct sgx_secs
+ * @SGX_ATTR_INIT: Enclave can be entered (is initialized).
+ * @SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
+ * @SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave.
+ * @SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote
+ * attestation.
+ * @SGX_ATTR_KSS: Allow to use key separation and sharing (KSS).
+ * @SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to
+ * sign cryptographic tokens that can be passed to
+ * EINIT as an authorization to run an enclave.
+ * @SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an
+ * asynchronous exit has occurred.
+ */
+enum sgx_attribute {
+ SGX_ATTR_INIT = BIT(0),
+ SGX_ATTR_DEBUG = BIT(1),
+ SGX_ATTR_MODE64BIT = BIT(2),
+ /* BIT(3) is reserved */
+ SGX_ATTR_PROVISIONKEY = BIT(4),
+ SGX_ATTR_EINITTOKENKEY = BIT(5),
+ /* BIT(6) is for CET */
+ SGX_ATTR_KSS = BIT(7),
+ /* BIT(8) is reserved */
+ /* BIT(9) is reserved */
+ SGX_ATTR_ASYNC_EXIT_NOTIFY = BIT(10),
+};
+
+#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | \
+ BIT_ULL(6) | \
+ BIT_ULL(8) | \
+ BIT_ULL(9) | \
+ GENMASK_ULL(63, 11))
+
+#define SGX_ATTR_UNPRIV_MASK (SGX_ATTR_DEBUG | \
+ SGX_ATTR_MODE64BIT | \
+ SGX_ATTR_KSS | \
+ SGX_ATTR_ASYNC_EXIT_NOTIFY)
+
+#define SGX_ATTR_PRIV_MASK (SGX_ATTR_PROVISIONKEY | \
+ SGX_ATTR_EINITTOKENKEY)
+
+/**
+ * struct sgx_secs - SGX Enclave Control Structure (SECS)
+ * @size: size of the address space
+ * @base: base address of the address space
+ * @ssa_frame_size: size of an SSA frame
+ * @miscselect: additional information stored to an SSA frame
+ * @attributes: attributes for enclave
+ * @xfrm: XSave-Feature Request Mask (subset of XCR0)
+ * @mrenclave: SHA256-hash of the enclave contents
+ * @mrsigner: SHA256-hash of the public key used to sign the SIGSTRUCT
+ * @config_id: a user-defined value that is used in key derivation
+ * @isv_prod_id: a user-defined value that is used in key derivation
+ * @isv_svn: a user-defined value that is used in key derivation
+ * @config_svn: a user-defined value that is used in key derivation
+ *
+ * SGX Enclave Control Structure (SECS) is a special enclave page that is not
+ * visible in the address space. In fact, this structure defines the address
+ * range and other global attributes for the enclave and it is the first EPC
+ * page created for any enclave. It is moved from a temporary buffer to an EPC
+ * by the means of ENCLS[ECREATE] function.
+ */
+struct sgx_secs {
+ u64 size;
+ u64 base;
+ u32 ssa_frame_size;
+ u32 miscselect;
+ u8 reserved1[24];
+ u64 attributes;
+ u64 xfrm;
+ u32 mrenclave[8];
+ u8 reserved2[32];
+ u32 mrsigner[8];
+ u8 reserved3[32];
+ u32 config_id[16];
+ u16 isv_prod_id;
+ u16 isv_svn;
+ u16 config_svn;
+ u8 reserved4[3834];
+} __packed;
+
+/**
+ * enum sgx_tcs_flags - execution flags for TCS
+ * @SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints
+ * inside an enclave. It is cleared by EADD but can
+ * be set later with EDBGWR.
+ */
+enum sgx_tcs_flags {
+ SGX_TCS_DBGOPTIN = 0x01,
+};
+
+#define SGX_TCS_RESERVED_MASK GENMASK_ULL(63, 1)
+#define SGX_TCS_RESERVED_SIZE 4024
+
+/**
+ * struct sgx_tcs - Thread Control Structure (TCS)
+ * @state: used to mark an entered TCS
+ * @flags: execution flags (cleared by EADD)
+ * @ssa_offset: SSA stack offset relative to the enclave base
+ * @ssa_index: the current SSA frame index (cleard by EADD)
+ * @nr_ssa_frames: the number of frame in the SSA stack
+ * @entry_offset: entry point offset relative to the enclave base
+ * @exit_addr: address outside the enclave to exit on an exception or
+ * interrupt
+ * @fs_offset: offset relative to the enclave base to become FS
+ * segment inside the enclave
+ * @gs_offset: offset relative to the enclave base to become GS
+ * segment inside the enclave
+ * @fs_limit: size to become a new FS-limit (only 32-bit enclaves)
+ * @gs_limit: size to become a new GS-limit (only 32-bit enclaves)
+ *
+ * Thread Control Structure (TCS) is an enclave page visible in its address
+ * space that defines an entry point inside the enclave. A thread enters inside
+ * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered
+ * by only one thread at a time.
+ */
+struct sgx_tcs {
+ u64 state;
+ u64 flags;
+ u64 ssa_offset;
+ u32 ssa_index;
+ u32 nr_ssa_frames;
+ u64 entry_offset;
+ u64 exit_addr;
+ u64 fs_offset;
+ u64 gs_offset;
+ u32 fs_limit;
+ u32 gs_limit;
+ u8 reserved[SGX_TCS_RESERVED_SIZE];
+} __packed;
+
+/**
+ * struct sgx_pageinfo - an enclave page descriptor
+ * @addr: address of the enclave page
+ * @contents: pointer to the page contents
+ * @metadata: pointer either to a SECINFO or PCMD instance
+ * @secs: address of the SECS page
+ */
+struct sgx_pageinfo {
+ u64 addr;
+ u64 contents;
+ u64 metadata;
+ u64 secs;
+} __packed __aligned(32);
+
+
+/**
+ * enum sgx_page_type - bits in the SECINFO flags defining the page type
+ * @SGX_PAGE_TYPE_SECS: a SECS page
+ * @SGX_PAGE_TYPE_TCS: a TCS page
+ * @SGX_PAGE_TYPE_REG: a regular page
+ * @SGX_PAGE_TYPE_VA: a VA page
+ * @SGX_PAGE_TYPE_TRIM: a page in trimmed state
+ *
+ * Make sure when making changes to this enum that its values can still fit
+ * in the bitfield within &struct sgx_encl_page
+ */
+enum sgx_page_type {
+ SGX_PAGE_TYPE_SECS,
+ SGX_PAGE_TYPE_TCS,
+ SGX_PAGE_TYPE_REG,
+ SGX_PAGE_TYPE_VA,
+ SGX_PAGE_TYPE_TRIM,
+};
+
+#define SGX_NR_PAGE_TYPES 5
+#define SGX_PAGE_TYPE_MASK GENMASK(7, 0)
+
+/**
+ * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo
+ * @SGX_SECINFO_R: allow read
+ * @SGX_SECINFO_W: allow write
+ * @SGX_SECINFO_X: allow execution
+ * @SGX_SECINFO_SECS: a SECS page
+ * @SGX_SECINFO_TCS: a TCS page
+ * @SGX_SECINFO_REG: a regular page
+ * @SGX_SECINFO_VA: a VA page
+ * @SGX_SECINFO_TRIM: a page in trimmed state
+ */
+enum sgx_secinfo_flags {
+ SGX_SECINFO_R = BIT(0),
+ SGX_SECINFO_W = BIT(1),
+ SGX_SECINFO_X = BIT(2),
+ SGX_SECINFO_SECS = (SGX_PAGE_TYPE_SECS << 8),
+ SGX_SECINFO_TCS = (SGX_PAGE_TYPE_TCS << 8),
+ SGX_SECINFO_REG = (SGX_PAGE_TYPE_REG << 8),
+ SGX_SECINFO_VA = (SGX_PAGE_TYPE_VA << 8),
+ SGX_SECINFO_TRIM = (SGX_PAGE_TYPE_TRIM << 8),
+};
+
+#define SGX_SECINFO_PERMISSION_MASK GENMASK_ULL(2, 0)
+#define SGX_SECINFO_PAGE_TYPE_MASK (SGX_PAGE_TYPE_MASK << 8)
+#define SGX_SECINFO_RESERVED_MASK ~(SGX_SECINFO_PERMISSION_MASK | \
+ SGX_SECINFO_PAGE_TYPE_MASK)
+
+/**
+ * struct sgx_secinfo - describes attributes of an EPC page
+ * @flags: permissions and type
+ *
+ * Used together with ENCLS leaves that add or modify an EPC page to an
+ * enclave to define page permissions and type.
+ */
+struct sgx_secinfo {
+ u64 flags;
+ u8 reserved[56];
+} __packed __aligned(64);
+
+#define SGX_PCMD_RESERVED_SIZE 40
+
+/**
+ * struct sgx_pcmd - Paging Crypto Metadata (PCMD)
+ * @enclave_id: enclave identifier
+ * @mac: MAC over PCMD, page contents and isvsvn
+ *
+ * PCMD is stored for every swapped page to the regular memory. When ELDU loads
+ * the page back it recalculates the MAC by using a isvsvn number stored in a
+ * VA page. Together these two structures bring integrity and rollback
+ * protection.
+ */
+struct sgx_pcmd {
+ struct sgx_secinfo secinfo;
+ u64 enclave_id;
+ u8 reserved[SGX_PCMD_RESERVED_SIZE];
+ u8 mac[16];
+} __packed __aligned(128);
+
+#define SGX_SIGSTRUCT_RESERVED1_SIZE 84
+#define SGX_SIGSTRUCT_RESERVED2_SIZE 20
+#define SGX_SIGSTRUCT_RESERVED3_SIZE 32
+#define SGX_SIGSTRUCT_RESERVED4_SIZE 12
+
+/**
+ * struct sgx_sigstruct_header - defines author of the enclave
+ * @header1: constant byte string
+ * @vendor: must be either 0x0000 or 0x8086
+ * @date: YYYYMMDD in BCD
+ * @header2: constant byte string
+ * @swdefined: software defined value
+ */
+struct sgx_sigstruct_header {
+ u64 header1[2];
+ u32 vendor;
+ u32 date;
+ u64 header2[2];
+ u32 swdefined;
+ u8 reserved1[84];
+} __packed;
+
+/**
+ * struct sgx_sigstruct_body - defines contents of the enclave
+ * @miscselect: additional information stored to an SSA frame
+ * @misc_mask: required miscselect in SECS
+ * @attributes: attributes for enclave
+ * @xfrm: XSave-Feature Request Mask (subset of XCR0)
+ * @attributes_mask: required attributes in SECS
+ * @xfrm_mask: required XFRM in SECS
+ * @mrenclave: SHA256-hash of the enclave contents
+ * @isvprodid: a user-defined value that is used in key derivation
+ * @isvsvn: a user-defined value that is used in key derivation
+ */
+struct sgx_sigstruct_body {
+ u32 miscselect;
+ u32 misc_mask;
+ u8 reserved2[20];
+ u64 attributes;
+ u64 xfrm;
+ u64 attributes_mask;
+ u64 xfrm_mask;
+ u8 mrenclave[32];
+ u8 reserved3[32];
+ u16 isvprodid;
+ u16 isvsvn;
+} __packed;
+
+/**
+ * struct sgx_sigstruct - an enclave signature
+ * @header: defines author of the enclave
+ * @modulus: the modulus of the public key
+ * @exponent: the exponent of the public key
+ * @signature: the signature calculated over the fields except modulus,
+ * @body: defines contents of the enclave
+ * @q1: a value used in RSA signature verification
+ * @q2: a value used in RSA signature verification
+ *
+ * Header and body are the parts that are actual signed. The remaining fields
+ * define the signature of the enclave.
+ */
+struct sgx_sigstruct {
+ struct sgx_sigstruct_header header;
+ u8 modulus[SGX_MODULUS_SIZE];
+ u32 exponent;
+ u8 signature[SGX_MODULUS_SIZE];
+ struct sgx_sigstruct_body body;
+ u8 reserved4[12];
+ u8 q1[SGX_MODULUS_SIZE];
+ u8 q2[SGX_MODULUS_SIZE];
+} __packed;
+
+#define SGX_LAUNCH_TOKEN_SIZE 304
+
+/*
+ * Do not put any hardware-defined SGX structure representations below this
+ * comment!
+ */
+
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+ int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+ void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+int sgx_set_attribute(unsigned long *allowed_attributes,
+ unsigned int attribute_fd);
+
+#endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644
index 000000000000..8009d781c2f9
--- /dev/null
+++ b/arch/x86/include/asm/shared/io.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type) \
+static __always_inline void __out##bwl(type value, u16 port) \
+{ \
+ asm volatile("out" #bwl " %" #bw "0, %w1" \
+ : : "a"(value), "Nd"(port)); \
+} \
+ \
+static __always_inline type __in##bwl(u16 port) \
+{ \
+ type value; \
+ asm volatile("in" #bwl " %w1, %" #bw "0" \
+ : "=a"(value) : "Nd"(port)); \
+ return value; \
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l, , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644
index 000000000000..a20b1c08c99f
--- /dev/null
+++ b/arch/x86/include/asm/shared/msr.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+ union {
+ struct {
+ u32 l;
+ u32 h;
+ };
+ u64 q;
+ };
+};
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void raw_rdmsr(unsigned int reg, struct msr *m)
+{
+ asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void raw_wrmsr(unsigned int reg, const struct msr *m)
+{
+ asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644
index 000000000000..8bc074c8d7c6
--- /dev/null
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD 0
+
+#define TDX_CPUID_LEAF_ID 0x21
+#define TDX_IDENT "IntelTDX "
+
+/* TDX module Call Leaf IDs */
+#define TDG_VP_VMCALL 0
+#define TDG_VP_INFO 1
+#define TDG_MR_RTMR_EXTEND 2
+#define TDG_VP_VEINFO_GET 3
+#define TDG_MR_REPORT 4
+#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
+#define TDG_VM_WR 8
+
+/* TDX attributes */
+#define TDX_ATTR_DEBUG_BIT 0
+#define TDX_ATTR_DEBUG BIT_ULL(TDX_ATTR_DEBUG_BIT)
+#define TDX_ATTR_HGS_PLUS_PROF_BIT 4
+#define TDX_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT)
+#define TDX_ATTR_PERF_PROF_BIT 5
+#define TDX_ATTR_PERF_PROF BIT_ULL(TDX_ATTR_PERF_PROF_BIT)
+#define TDX_ATTR_PMT_PROF_BIT 6
+#define TDX_ATTR_PMT_PROF BIT_ULL(TDX_ATTR_PMT_PROF_BIT)
+#define TDX_ATTR_ICSSD_BIT 16
+#define TDX_ATTR_ICSSD BIT_ULL(TDX_ATTR_ICSSD_BIT)
+#define TDX_ATTR_LASS_BIT 27
+#define TDX_ATTR_LASS BIT_ULL(TDX_ATTR_LASS_BIT)
+#define TDX_ATTR_SEPT_VE_DISABLE_BIT 28
+#define TDX_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT)
+#define TDX_ATTR_MIGRTABLE_BIT 29
+#define TDX_ATTR_MIGRTABLE BIT_ULL(TDX_ATTR_MIGRTABLE_BIT)
+#define TDX_ATTR_PKS_BIT 30
+#define TDX_ATTR_PKS BIT_ULL(TDX_ATTR_PKS_BIT)
+#define TDX_ATTR_KL_BIT 31
+#define TDX_ATTR_KL BIT_ULL(TDX_ATTR_KL_BIT)
+#define TDX_ATTR_TPA_BIT 62
+#define TDX_ATTR_TPA BIT_ULL(TDX_ATTR_TPA_BIT)
+#define TDX_ATTR_PERFMON_BIT 63
+#define TDX_ATTR_PERFMON BIT_ULL(TDX_ATTR_PERFMON_BIT)
+
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
+#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
+
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE_BIT 0
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(TD_CTLS_PENDING_VE_DISABLE_BIT)
+#define TD_CTLS_ENUM_TOPOLOGY_BIT 1
+#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(TD_CTLS_ENUM_TOPOLOGY_BIT)
+#define TD_CTLS_VIRT_CPUID2_BIT 2
+#define TD_CTLS_VIRT_CPUID2 BIT_ULL(TD_CTLS_VIRT_CPUID2_BIT)
+#define TD_CTLS_REDUCE_VE_BIT 3
+#define TD_CTLS_REDUCE_VE BIT_ULL(TD_CTLS_REDUCE_VE_BIT)
+#define TD_CTLS_LOCK_BIT 63
+#define TD_CTLS_LOCK BIT_ULL(TD_CTLS_LOCK_BIT)
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000
+#define TDVMCALL_MAP_GPA 0x10001
+#define TDVMCALL_GET_QUOTE 0x10002
+#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
+
+/*
+ * TDG.VP.VMCALL Status Codes (returned in R10)
+ */
+#define TDVMCALL_STATUS_SUCCESS 0x0000000000000000ULL
+#define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL
+#define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL
+#define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL
+#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL
+
+/*
+ * Bitmasks of exposed registers (with VMM).
+ */
+#define TDX_RDX BIT(2)
+#define TDX_RBX BIT(3)
+#define TDX_RSI BIT(6)
+#define TDX_RDI BIT(7)
+#define TDX_R8 BIT(8)
+#define TDX_R9 BIT(9)
+#define TDX_R10 BIT(10)
+#define TDX_R11 BIT(11)
+#define TDX_R12 BIT(12)
+#define TDX_R13 BIT(13)
+#define TDX_R14 BIT(14)
+#define TDX_R15 BIT(15)
+
+/*
+ * These registers are clobbered to hold arguments for each
+ * TDVMCALL. They are safe to expose to the VMM.
+ * Each bit in this mask represents a register ID. Bit field
+ * details can be found in TDX GHCI specification, section
+ * titled "TDCALL [TDG.VP.VMCALL] leaf".
+ */
+#define TDVMCALL_EXPOSE_REGS_MASK \
+ (TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \
+ TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15)
+
+/* TDX supported page sizes from the TDX module ABI. */
+#define TDX_PS_4K 0
+#define TDX_PS_2M 1
+#define TDX_PS_1G 2
+#define TDX_PS_NR (TDX_PS_1G + 1)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/compiler_attributes.h>
+
+/*
+ * Used in __tdcall*() to gather the input/output registers' values of the
+ * TDCALL instruction when requesting services from the TDX module. This is a
+ * software only structure and not part of the TDX module/VMM ABI
+ */
+struct tdx_module_args {
+ /* callee-clobbered */
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ /* extra callee-clobbered */
+ u64 r10;
+ u64 r11;
+ /* callee-saved + rdi/rsi */
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u64 rbx;
+ u64 rdi;
+ u64 rsi;
+};
+
+/* Used to communicate with the TDX module */
+u64 __tdcall(u64 fn, struct tdx_module_args *args);
+u64 __tdcall_ret(u64 fn, struct tdx_module_args *args);
+u64 __tdcall_saved_ret(u64 fn, struct tdx_module_args *args);
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_module_args *args);
+
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+ struct tdx_module_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = fn,
+ .r12 = r12,
+ .r13 = r13,
+ .r14 = r14,
+ .r15 = r15,
+ };
+
+ return __tdx_hypercall(&args);
+}
+
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __noreturn __tdx_hypercall_failed(void);
+
+bool tdx_accept_memory(phys_addr_t start, phys_addr_t end);
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static __always_inline u64 hcall_func(u64 exit_reason)
+{
+ return exit_reason;
+}
+
+#endif /* !__ASSEMBLER__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
new file mode 100644
index 000000000000..fc7dcec58fd4
--- /dev/null
+++ b/arch/x86/include/asm/shstk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHSTK_H
+#define _ASM_X86_SHSTK_H
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+struct task_struct;
+struct ksignal;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+ u64 base;
+ u64 size;
+};
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
+void reset_thread_features(void);
+unsigned long shstk_alloc_thread_stack(struct task_struct *p, u64 clone_flags,
+ unsigned long stack_size);
+void shstk_free(struct task_struct *p);
+int setup_signal_shadow_stack(struct ksignal *ksig);
+int restore_signal_shadow_stack(void);
+int shstk_update_last_frame(unsigned long val);
+bool shstk_is_enabled(void);
+int shstk_pop(u64 *val);
+int shstk_push(u64 val);
+#else
+static inline long shstk_prctl(struct task_struct *task, int option,
+ unsigned long arg2) { return -EINVAL; }
+static inline void reset_thread_features(void) {}
+static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
+ u64 clone_flags,
+ unsigned long stack_size) { return 0; }
+static inline void shstk_free(struct task_struct *p) {}
+static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
+static inline int restore_signal_shadow_stack(void) { return 0; }
+static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline bool shstk_is_enabled(void) { return false; }
+static inline int shstk_pop(u64 *val) { return -ENOTSUPP; }
+static inline int shstk_push(u64 val) { return -ENOTSUPP; }
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index f176114c04d4..84eab2724875 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -33,11 +33,7 @@ struct sigframe_ia32 {
* legacy application accessing/modifying it.
*/
struct _fpstate_32 fpstate_unused;
-#ifdef CONFIG_IA32_EMULATION
- unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-#else /* !CONFIG_IA32_EMULATION */
- unsigned long extramask[_NSIG_WORDS-1];
-#endif /* CONFIG_IA32_EMULATION */
+ unsigned int extramask[1];
char retcode[8];
/* fp state follows here */
};
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 2fcbd6f33ef7..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -14,12 +14,36 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
- struct pt_regs *regs, unsigned long mask);
+void __user *
+get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
+ void __user **fpstate);
-#ifdef CONFIG_X86_X32_ABI
-asmlinkage long sys32_x32_rt_sigreturn(void);
+int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs);
+int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
#endif
+}
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 33d3c88a7225..5c03aaa89014 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_SIGNAL_H
#define _ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
/* Most things should be clean enough to redefine this at will, if care
@@ -28,14 +28,9 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
-#ifndef CONFIG_COMPAT
-typedef sigset_t compat_sigset_t;
-#endif
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <uapi/asm/signal.h>
-#ifndef __ASSEMBLY__
-extern void do_signal(struct pt_regs *regs);
+#ifndef __ASSEMBLER__
#define __ARCH_HAS_SA_RESTORER
@@ -88,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
bool ret;
- asm("btl %2,%1" CC_SET(c)
- : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
+ asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
@@ -106,5 +100,5 @@ struct pt_regs;
#endif /* !__i386__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index a341c878e977..b8027b63cd7a 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,6 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
#include <asm/fpu/api.h>
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
@@ -10,3 +14,5 @@ static __must_check inline bool may_use_simd(void)
{
return irq_fpu_usable();
}
+
+#endif /* _ASM_SIMD_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4b..977bef14a0ab 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -1,79 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Supervisor Mode Access Prevention support
*
* Copyright (C) 2012 Intel Corporation
* Author: H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _ASM_X86_SMAP_H
#define _ASM_X86_SMAP_H
-#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
-/* "Raw" instruction opcodes */
-#define __ASM_CLAC .byte 0x0f,0x01,0xca
-#define __ASM_STAC .byte 0x0f,0x01,0xcb
-
-#ifdef __ASSEMBLY__
-
-#include <asm/alternative-asm.h>
-
-#ifdef CONFIG_X86_SMAP
+#ifdef __ASSEMBLER__
#define ASM_CLAC \
- ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", "clac", X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", "stac", X86_FEATURE_SMAP
-#else /* CONFIG_X86_SMAP */
+#else /* __ASSEMBLER__ */
-#define ASM_CLAC
-#define ASM_STAC
+/*
+ * The CLAC/STAC instructions toggle the enforcement of
+ * X86_FEATURE_SMAP along with X86_FEATURE_LASS.
+ *
+ * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page
+ * tables. The kernel is not allowed to touch pages with that bit set
+ * unless the AC bit is set.
+ *
+ * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings,
+ * regardless of location.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
-#endif /* CONFIG_X86_SMAP */
+static __always_inline void clac(void)
+{
+ alternative("", "clac", X86_FEATURE_SMAP);
+}
-#else /* __ASSEMBLY__ */
+static __always_inline void stac(void)
+{
+ alternative("", "stac", X86_FEATURE_SMAP);
+}
-#include <asm/alternative.h>
+/*
+ * LASS enforcement is based on bit 63 of the virtual address. The
+ * kernel is not allowed to touch memory in the lower half of the
+ * virtual address space.
+ *
+ * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data
+ * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP.
+ *
+ * Even with the AC bit set, LASS will continue to block instruction
+ * fetches from the user half of the address space. To allow those,
+ * clear CR4.LASS to disable the LASS mechanism entirely.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
-#ifdef CONFIG_X86_SMAP
+static __always_inline void lass_clac(void)
+{
+ alternative("", "clac", X86_FEATURE_LASS);
+}
-static __always_inline void clac(void)
+static __always_inline void lass_stac(void)
{
- /* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", "stac", X86_FEATURE_LASS);
}
-static __always_inline void stac(void)
+static __always_inline unsigned long smap_save(void)
{
- /* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ unsigned long flags;
+
+ asm volatile ("# smap_save\n\t"
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
+ "", "pushf; pop %0; clac",
+ X86_FEATURE_SMAP)
+ : "=rm" (flags) : : "memory", "cc");
+
+ return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+ asm volatile ("# smap_restore\n\t"
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
+ "", "push %0; popf",
+ X86_FEATURE_SMAP)
+ : : "g" (flags) : "memory", "cc");
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
-
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-#define ASM_CLAC
-#define ASM_STAC
+ ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
-#endif /* CONFIG_X86_SMAP */
+#define ASM_CLAC_UNSAFE \
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "clac", X86_FEATURE_SMAP)
+#define ASM_STAC_UNSAFE \
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "stac", X86_FEATURE_SMAP)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3..84951572ab81 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -1,44 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SMP_H
#define _ASM_X86_SMP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
-#include <asm/percpu.h>
+#include <linux/thread_info.h>
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-# include <asm/mpspec.h>
-# include <asm/apic.h>
-# ifdef CONFIG_X86_IO_APIC
-# include <asm/io_apic.h>
-# endif
-#endif
-#include <asm/thread_info.h>
#include <asm/cpumask.h>
-extern int smp_num_siblings;
-extern unsigned int num_processors;
+DECLARE_PER_CPU_CACHE_HOT(int, cpu_number);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
-DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
-static inline struct cpumask *cpu_llc_shared_mask(int cpu)
-{
- return per_cpu(cpu_llc_shared_map, cpu);
-}
-
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
-#endif
struct task_struct;
@@ -51,10 +30,13 @@ struct smp_ops {
void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);
- int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
+ void (*cleanup_dead_cpu)(unsigned cpu);
+ void (*poll_sync_state)(void);
+ int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
+ void (*stop_this_cpu)(void);
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
@@ -76,11 +58,6 @@ static inline void stop_other_cpus(void)
smp_ops.stop_other_cpus(1);
}
-static inline void smp_prepare_boot_cpu(void)
-{
- smp_ops.smp_prepare_boot_cpu();
-}
-
static inline void smp_prepare_cpus(unsigned int max_cpus)
{
smp_ops.smp_prepare_cpus(max_cpus);
@@ -91,11 +68,6 @@ static inline void smp_cpus_done(unsigned int max_cpus)
smp_ops.smp_cpus_done(max_cpus);
}
-static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
- return smp_ops.cpu_up(cpu, tidle);
-}
-
static inline int __cpu_disable(void)
{
return smp_ops.cpu_disable();
@@ -103,15 +75,17 @@ static inline int __cpu_disable(void)
static inline void __cpu_die(unsigned int cpu)
{
- smp_ops.cpu_die(cpu);
+ if (smp_ops.cpu_die)
+ smp_ops.cpu_die(cpu);
}
-static inline void play_dead(void)
+static inline void __noreturn play_dead(void)
{
smp_ops.play_dead();
+ BUG();
}
-static inline void smp_send_reschedule(int cpu)
+static inline void arch_smp_send_reschedule(int cpu)
{
smp_ops.smp_send_reschedule(cpu);
}
@@ -128,26 +102,27 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
-void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
-int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
-void hlt_play_dead(void);
-void native_play_dead(void);
+void __noreturn hlt_play_dead(void);
+void __noreturn native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
-int wbinvd_on_all_cpus(void);
+void wbinvd_on_all_cpus(void);
+void wbinvd_on_cpus_mask(struct cpumask *cpus);
+void wbnoinvd_on_all_cpus(void);
+void wbnoinvd_on_cpus_mask(struct cpumask *cpus);
+
+void smp_kick_mwait_play_dead(void);
+void __noreturn mwait_play_dead(unsigned int eax_hint);
+void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
-
-void smp_store_boot_cpu_info(void);
-void smp_store_cpu_info(int id);
asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
@@ -159,34 +134,50 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
/*
* This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
+ * from the initial startup.
*/
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
-#ifdef CONFIG_X86_32
-extern int safe_smp_processor_id(void);
-#else
-# define safe_smp_processor_id() smp_processor_id()
-#endif
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
+static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
+{
+ return per_cpu(cpu_l2c_shared_map, cpu);
+}
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
-static inline int wbinvd_on_all_cpus(void)
+static inline void wbinvd_on_all_cpus(void)
{
wbinvd();
- return 0;
}
-#endif /* CONFIG_SMP */
-extern unsigned disabled_cpus;
+static inline void wbinvd_on_cpus_mask(struct cpumask *cpus)
+{
+ wbinvd();
+}
+
+static inline void wbnoinvd_on_all_cpus(void)
+{
+ wbnoinvd();
+}
+
+static inline void wbnoinvd_on_cpus_mask(struct cpumask *cpus)
+{
+ wbnoinvd();
+}
-#ifdef CONFIG_X86_LOCAL_APIC
-extern int hard_smp_processor_id(void);
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return (struct cpumask *)cpumask_of(0);
+}
-#else /* CONFIG_X86_LOCAL_APIC */
-#define hard_smp_processor_id() 0
-#endif /* CONFIG_X86_LOCAL_APIC */
+static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); }
+#endif /* CONFIG_SMP */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
@@ -194,5 +185,15 @@ extern void nmi_selftest(void);
#define nmi_selftest() do { } while (0)
#endif
-#endif /* __ASSEMBLY__ */
+extern unsigned int smpboot_control;
+extern unsigned long apic_mmio_base;
+
+#endif /* !__ASSEMBLER__ */
+
+/* Control bits for startup_64 */
+#define STARTUP_READ_APICID 0x80000000
+
+/* Top 8 bits are reserved for control */
+#define STARTUP_PARALLEL_MASK 0xFF000000
+
#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h
new file mode 100644
index 000000000000..889d53d6a0e1
--- /dev/null
+++ b/arch/x86/include/asm/softirq_stack.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SOFTIRQ_STACK_H
+#define _ASM_X86_SOFTIRQ_STACK_H
+
+#ifdef CONFIG_X86_64
+# include <asm/irq_stack.h>
+#else
+# include <asm-generic/softirq_stack.h>
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 199218719a86..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SPARSEMEM_H
#define _ASM_X86_SPARSEMEM_H
+#include <linux/types.h>
+
#ifdef CONFIG_SPARSEMEM
/*
* generic non-linear memory support:
@@ -10,26 +12,23 @@
* field of the struct page
*
* SECTION_SIZE_BITS 2^n: size of each section
- * MAX_PHYSADDR_BITS 2^n: max size of physical address space
- * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space
+ * MAX_PHYSMEM_BITS 2^n: max size of physical address space
*
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_PAE
# define SECTION_SIZE_BITS 29
-# define MAX_PHYSADDR_BITS 36
# define MAX_PHYSMEM_BITS 36
# else
# define SECTION_SIZE_BITS 26
-# define MAX_PHYSADDR_BITS 32
# define MAX_PHYSMEM_BITS 32
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44)
# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
+
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..00b7e0398210 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -4,6 +4,7 @@
#include <linux/thread_info.h>
#include <asm/nospec-branch.h>
+#include <asm/msr.h>
/*
* On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
@@ -13,7 +14,7 @@
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
* the guest's version of VIRT_SPEC_CTRL, if emulated.
*/
-extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest);
/**
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
@@ -24,9 +25,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, true);
}
/**
@@ -38,9 +39,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, false);
}
/* AMD specific Speculative Store Bypass MSR data */
@@ -76,6 +77,16 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
}
+/*
+ * This can be used in noinstr functions & should only be called in bare
+ * metal context.
+ */
+static __always_inline void __update_spec_ctrl(u64 val)
+{
+ __this_cpu_write(x86_spec_ctrl_current, val);
+ native_wrmsrq(MSR_IA32_SPEC_CTRL, val);
+}
+
#ifdef CONFIG_SMP
extern void speculative_store_bypass_ht_init(void);
#else
@@ -85,4 +96,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
+extern bool itlb_multihit_kvm_mitigation;
+
#endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..46aa2c9c1bda 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -2,54 +2,45 @@
#ifndef _ASM_X86_SPECIAL_INSNS_H
#define _ASM_X86_SPECIAL_INSNS_H
-
#ifdef __KERNEL__
-
#include <asm/nops.h>
+#include <asm/processor-flags.h>
-/*
- * Volatile isn't enough to prevent the compiler from reordering the
- * read/write functions for the control registers and messing everything up.
- * A memory clobber would solve the problem, but would prevent reordering of
- * all loads stores around it, which can hurt performance. Solution is to
- * use a variable and mimic reads and writes to it to enforce serialization
- */
-extern unsigned long __force_order;
+#include <linux/errno.h>
+#include <linux/irqflags.h>
+#include <linux/jump_label.h>
+
+void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr0,%0" : "=r" (val));
return val;
}
-static inline void native_write_cr0(unsigned long val)
-{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
-}
-
-static inline unsigned long native_read_cr2(void)
+static __always_inline unsigned long native_read_cr2(void)
{
unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr2,%0" : "=r" (val));
return val;
}
-static inline void native_write_cr2(unsigned long val)
+static __always_inline void native_write_cr2(unsigned long val)
{
- asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
{
unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr3,%0" : "=r" (val));
return val;
}
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
{
- asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
static inline unsigned long native_read_cr4(void)
@@ -64,35 +55,18 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
- : "=r" (val), "=m" (__force_order) : "0" (0));
+ : "=r" (val) : "0" (0));
#else
/* CR4 always exists on x86_64. */
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr4,%0" : "=r" (val));
#endif
return val;
}
-static inline void native_write_cr4(unsigned long val)
-{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
-}
-
-#ifdef CONFIG_X86_64
-static inline unsigned long native_read_cr8(void)
-{
- unsigned long cr8;
- asm volatile("movq %%cr8,%0" : "=r" (cr8));
- return cr8;
-}
-
-static inline void native_write_cr8(unsigned long val)
-{
- asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-#endif
+void native_write_cr4(unsigned long val);
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
{
u32 ecx = 0;
u32 edx, pkru;
@@ -101,13 +75,11 @@ static inline u32 __read_pkru(void)
* "rdpkru" instruction. Places PKRU contents in to EAX,
* clears EDX and requires that ecx=0.
*/
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (pkru), "=d" (edx)
- : "c" (ecx));
+ asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx));
return pkru;
}
-static inline void __write_pkru(u32 pkru)
+static inline void wrpkru(u32 pkru)
{
u32 ecx = 0, edx = 0;
@@ -115,26 +87,51 @@ static inline void __write_pkru(u32 pkru)
* "wrpkru" instruction. Loads contents in EAX to PKRU,
* requires that ecx = edx = 0.
*/
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (pkru), "c"(ecx), "d"(edx));
+ asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx));
}
+
#else
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
{
return 0;
}
-static inline void __write_pkru(u32 pkru)
+static inline void wrpkru(u32 pkru)
{
}
#endif
-static inline void native_wbinvd(void)
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, and then invalidate all caches. Depending
+ * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect
+ * lower level caches associated with another logical processor that shares any
+ * level of this processor's cache hierarchy.
+ */
+static __always_inline void wbinvd(void)
{
- asm volatile("wbinvd": : :"memory");
+ asm volatile("wbinvd" : : : "memory");
}
-extern asmlinkage void native_load_gs_index(unsigned);
+/* Instruction encoding provided for binutils backwards compatibility. */
+#define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09)
+
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, but do NOT explicitly invalidate caches,
+ * i.e. leave all/most cache lines in the hierarchy in non-modified state.
+ */
+static __always_inline void wbnoinvd(void)
+{
+ /*
+ * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even
+ * though WBNOINVD is backwards compatible (it's simply WBINVD with an
+ * ignored REP prefix), to guarantee that WBNOINVD isn't used if it
+ * needs to be avoided for any reason. For all supported usage in the
+ * kernel, WBINVD is functionally a superset of WBNOINVD.
+ */
+ alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD);
+}
static inline unsigned long __read_cr4(void)
{
@@ -155,12 +152,12 @@ static inline void write_cr0(unsigned long x)
native_write_cr0(x);
}
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
{
return native_read_cr2();
}
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
{
native_write_cr2(x);
}
@@ -183,62 +180,128 @@ static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
+#endif /* CONFIG_PARAVIRT_XXL */
-static inline void wbinvd(void)
+static __always_inline void clflush(volatile void *__p)
{
- native_wbinvd();
+ asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
}
-#ifdef CONFIG_X86_64
-
-static inline unsigned long read_cr8(void)
+static inline void clflushopt(volatile void *__p)
{
- return native_read_cr8();
+ alternative_io("ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+ "+m" (*(volatile char __force *)__p));
}
-static inline void write_cr8(unsigned long x)
+static inline void clwb(volatile void *__p)
{
- native_write_cr8(x);
+ volatile struct { char x[64]; } *p = __p;
+
+ asm_inline volatile(ALTERNATIVE_2(
+ "ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+ "clwb %0", X86_FEATURE_CLWB)
+ : "+m" (*p));
}
-static inline void load_gs_index(unsigned selector)
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- native_load_gs_index(selector);
+ asm goto("1: wrussq %[val], %[addr]\n"
+ _ASM_EXTABLE(1b, %l[fail])
+ :: [addr] "m" (*addr), [val] "r" (val)
+ :: fail);
+ return 0;
+fail:
+ return -EFAULT;
}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
-#endif
+#define nop() asm volatile ("nop")
-#endif /* CONFIG_PARAVIRT_XXL */
+static __always_inline void serialize(void)
+{
+ /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
-static inline void clflush(volatile void *__p)
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
{
- asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+
+ /*
+ * MOVDIR64B %(rdx), rax.
+ *
+ * Both __src and __dst must be memory constraints in order to tell the
+ * compiler that no other memory accesses should be reordered around
+ * this one.
+ *
+ * Also, both must be supplied as lvalues because this tells
+ * the compiler what the object is (its size) the instruction accesses.
+ * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+ */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
}
-static inline void clflushopt(volatile void *__p)
+static inline void movdir64b_io(void __iomem *dst, const void *src)
{
- alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0",
- ".byte 0x66; clflush %P0",
- X86_FEATURE_CLFLUSHOPT,
- "+m" (*(volatile char __force *)__p));
+ movdir64b((void __force *)dst, src);
}
-static inline void clwb(volatile void *__p)
+/**
+ * enqcmds - Enqueue a command in supervisor (CPL0) mode
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: 512 bits memory operand
+ *
+ * The ENQCMDS instruction allows software to write a 512-bit command to
+ * a 512-bit-aligned special MMIO region that supports the instruction.
+ * A return status is loaded into the ZF flag in the RFLAGS register.
+ * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
+ *
+ * This function issues the ENQCMDS instruction to submit data from
+ * kernel space to MMIO space, in a unit of 512 bits. Order of data access
+ * is not guaranteed, nor is a memory barrier performed afterwards. It
+ * returns 0 on success and -EAGAIN on failure.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the
+ * ENQCMDS instruction is supported on the platform and the device accepts
+ * ENQCMDS.
+ */
+static inline int enqcmds(void __iomem *dst, const void *src)
{
- volatile struct { char x[64]; } *p = __p;
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } __iomem *__dst = dst;
+ bool zf;
- asm volatile(ALTERNATIVE_2(
- ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
- ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
- X86_FEATURE_CLFLUSHOPT,
- ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
- X86_FEATURE_CLWB)
- : [p] "+m" (*p)
- : [pax] "a" (p));
-}
+ /*
+ * ENQCMDS %(rdx), rax
+ *
+ * See movdir64b()'s comment on operand specification.
+ */
+ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
+ : "=@ccz" (zf), "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
-#define nop() asm volatile ("nop")
+ /* Submission failure is indicated via EFLAGS.ZF=1 */
+ if (zf)
+ return -EAGAIN;
+ return 0;
+}
+
+static __always_inline void tile_release(void)
+{
+ /*
+ * Instruction opcode for TILERELEASE; supported in binutils
+ * version >= 2.36.
+ */
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index bf3e34b25afc..323db6c5852a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -3,29 +3,7 @@
#define _ASM_X86_SPINLOCK_TYPES_H
#include <linux/types.h>
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define __TICKET_LOCK_INC 2
-#define TICKET_SLOWPATH_FLAG ((__ticket_t)1)
-#else
-#define __TICKET_LOCK_INC 1
-#define TICKET_SLOWPATH_FLAG ((__ticket_t)0)
-#endif
-
-#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
-typedef u8 __ticket_t;
-typedef u16 __ticketpair_t;
-#else
-typedef u16 __ticket_t;
-typedef u32 __ticketpair_t;
-#endif
-
-#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC)
-
-#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
-
#include <asm-generic/qspinlock_types.h>
-
#include <asm-generic/qrwlock_types.h>
#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
deleted file mode 100644
index e0975e9c4f47..000000000000
--- a/arch/x86/include/asm/sta2x11.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
- */
-#ifndef __ASM_STA2X11_H
-#define __ASM_STA2X11_H
-
-#include <linux/pci.h>
-
-/* This needs to be called from the MFD to configure its sub-devices */
-struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
-
-#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..cd761b14eb02 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,33 +2,10 @@
/*
* GCC stack protector support.
*
- * Stack protector works by putting predefined pattern at the start of
+ * Stack protector works by putting a predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
- * returning from the function. The pattern is called stack canary
- * and unfortunately gcc requires it to be at a fixed offset from %gs.
- * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
- * and x86_32 use segment registers differently and thus handles this
- * requirement differently.
- *
- * On x86_64, %gs is shared by percpu area and stack canary. All
- * percpu symbols are zero based and %gs points to the base of percpu
- * area. The first occupant of the percpu area is always
- * irq_stack_union which contains stack_canary at offset 40. Userland
- * %gs is always saved and restored on kernel entry and exit using
- * swapgs, so stack protector doesn't add any complexity there.
- *
- * On x86_32, it's slightly more complicated. As in x86_64, %gs is
- * used for userland TLS. Unfortunately, some processors are much
- * slower at loading segment registers with different value when
- * entering and leaving the kernel, so the kernel uses %fs for percpu
- * area and manages %gs lazily so that %gs is switched only when
- * necessary, usually during task switch.
- *
- * As gcc requires the stack canary at %gs:20, %gs can't be managed
- * lazily if stack protector is enabled, so the kernel saves and
- * restores userland %gs on kernel entry and exit. This behavior is
- * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
- * system.h to hide the details.
+ * returning from the function. The pattern is called the stack canary
+ * and is a unique value for each task.
*/
#ifndef _ASM_STACKPROTECTOR_H
@@ -41,85 +18,40 @@
#include <asm/percpu.h>
#include <asm/desc.h>
-#include <linux/random.h>
#include <linux/sched.h>
-/*
- * 24 byte read-only segment initializer for stack canary. Linker
- * can't handle the address bit shifting. Address will be set in
- * head_32 for boot CPU and setup_per_cpu_areas() for others.
- */
-#define GDT_STACK_CANARY_INIT \
- [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
/*
* Initialize the stackprotector canary value.
*
- * NOTE: this must only be called from functions that never return,
+ * NOTE: this must only be called from functions that never return
* and it must always be inlined.
+ *
+ * In addition, it should be called from a compilation unit for which
+ * stack protector is disabled. Alternatively, the caller should not end
+ * with a function call which gets tail-call optimized as that would
+ * lead to checking a modified canary value.
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
- u64 tsc;
-
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
-#endif
- /*
- * We both use the random pool and the current TSC as a source
- * of randomness. The TSC only matters for very early init,
- * there it already has some randomness on most systems. Later
- * on during the bootup the random pool has true entropy too.
- */
- get_random_bytes(&canary, sizeof(canary));
- tsc = rdtsc();
- canary += tsc + (tsc << 32UL);
- canary &= CANARY_MASK;
+ unsigned long canary = get_random_canary();
current->stack_canary = canary;
-#ifdef CONFIG_X86_64
- this_cpu_write(irq_stack_union.stack_canary, canary);
-#else
- this_cpu_write(stack_canary.canary, canary);
-#endif
+ this_cpu_write(__stack_chk_guard, canary);
}
-static inline void setup_stack_canary_segment(int cpu)
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
-#ifdef CONFIG_X86_32
- unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
- struct desc_struct desc;
-
- desc = gdt_table[GDT_ENTRY_STACK_CANARY];
- set_desc_base(&desc, canary);
- write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
-#endif
-}
-
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
- asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
-#endif
+ per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
}
#else /* STACKPROTECTOR */
-#define GDT_STACK_CANARY_INIT
-
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
-static inline void setup_stack_canary_segment(int cpu)
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{ }
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
- asm volatile ("mov %0, %%gs" : : "r" (0));
-#endif
-}
-
#endif /* STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a4..3881b5333eb8 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
+
+#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>
enum stack_type {
@@ -33,6 +35,18 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info);
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
+bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info);
+
+static __always_inline
+bool get_stack_guard_info(unsigned long *stack, struct stack_info *info)
+{
+ /* make sure it's not in the stack proper */
+ if (get_stack_info_noinstr(stack, current, info))
+ return false;
+ /* but if it is in the page below it, we hit a guard */
+ return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info);
+}
const char *stack_type_name(enum stack_type type);
@@ -76,7 +90,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
if (regs)
- return (unsigned long *)kernel_stack_pointer(regs);
+ return (unsigned long *)regs->sp;
if (task == current)
return __builtin_frame_address(0);
@@ -84,9 +98,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
return (unsigned long *)task->thread.sp;
}
-void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, char *log_lvl);
-
/* The form of the top of the frame on the stack */
struct stack_frame {
struct stack_frame *next_frame;
@@ -98,19 +109,6 @@ struct stack_frame_ia32 {
u32 return_address;
};
-static inline unsigned long caller_frame_pointer(void)
-{
- struct stack_frame *frame;
-
- frame = __builtin_frame_address(0);
-
-#ifdef CONFIG_FRAME_POINTER
- frame = frame->next_frame;
-#endif
-
- return (unsigned long)frame;
-}
-
void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
new file mode 100644
index 000000000000..4cd725a8fe91
--- /dev/null
+++ b/arch/x86/include/asm/static_call.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#include <asm/text-patching.h>
+
+/*
+ * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
+ * uses the current value of the key->func pointer to do an indirect jump to
+ * the function. This trampoline is only used during boot, before the call
+ * sites get patched by static_call_update(). The name of this trampoline has
+ * a magical aspect: objtool uses it to find static call sites so it can create
+ * the .static_call_sites section.
+ *
+ * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
+ * does a direct jump to the function. The direct jump gets patched by
+ * static_call_update().
+ *
+ * Having the trampoline in a special section forces GCC to emit a JMP.d32 when
+ * it does tail-call optimization on the call; since you cannot compute the
+ * relative displacement across sections.
+ */
+
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ * jmp.d32 \func
+ * ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
+#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ ANNOTATE_NOENDBR " \n" \
+ insns " \n" \
+ ".byte 0x0f, 0xb9, 0xcc \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
+#ifdef CONFIG_MITIGATION_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
+
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
+
+#define ARCH_ADD_TRAMP_KEY(name) \
+ asm(".pushsection .static_call_tramp_key, \"a\" \n" \
+ ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")
+
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func) \
+({ \
+ typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \
+ if (static_call_initialized) { \
+ __static_call_update(&STATIC_CALL_KEY(name), \
+ STATIC_CALL_TRAMP_ADDR(name), __F);\
+ } else { \
+ WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \
+ __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+ __F); \
+ } \
+})
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index c3c2c1914d65..9cb5aae7fba9 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,6 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_STRING_H
+#define _ASM_X86_STRING_H
+
#ifdef CONFIG_X86_32
# include <asm/string_32.h>
#else
# include <asm/string_64.h>
#endif
+
+static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len)
+{
+ void *ret = to;
+
+ asm volatile("rep movsb"
+ : "+D" (to), "+S" (from), "+c" (len)
+ : : "memory");
+ return ret;
+}
+
+static __always_inline void *__inline_memset(void *s, int v, size_t n)
+{
+ void *ret = s;
+
+ asm volatile("rep stosb"
+ : "+D" (s), "+c" (n)
+ : "a" ((uint8_t)v)
+ : "memory");
+ return ret;
+}
+
+#endif /* _ASM_X86_STRING_H */
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 55d392c6bd29..e9cce169bb4c 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -33,11 +33,11 @@ extern size_t strlen(const char *s);
static __always_inline void *__memcpy(void *to, const void *from, size_t n)
{
int d0, d1, d2;
- asm volatile("rep ; movsl\n\t"
+ asm volatile("rep movsl\n\t"
"movl %4,%%ecx\n\t"
"andl $3,%%ecx\n\t"
"jz 1f\n\t"
- "rep ; movsb\n\t"
+ "rep movsb\n\t"
"1:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
@@ -89,7 +89,7 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
if (n >= 5 * 4) {
/* large block: use rep prefix */
int ecx;
- asm volatile("rep ; movsl"
+ asm volatile("rep movsl"
: "=&c" (ecx), "=&D" (edi), "=&S" (esi)
: "0" (n / 4), "1" (edi), "2" (esi)
: "memory"
@@ -146,49 +146,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
extern void *memcpy(void *, const void *, size_t);
#ifndef CONFIG_FORTIFY_SOURCE
-#ifdef CONFIG_X86_USE_3DNOW
-#include <asm/mmx.h>
-
-/*
- * This CPU favours 3DNow strongly (eg AMD Athlon)
- */
-
-static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
-{
- if (len < 512)
- return __constant_memcpy(to, from, len);
- return _mmx_memcpy(to, from, len);
-}
-
-static inline void *__memcpy3d(void *to, const void *from, size_t len)
-{
- if (len < 512)
- return __memcpy(to, from, len);
- return _mmx_memcpy(to, from, len);
-}
-
-#define memcpy(t, f, n) \
- (__builtin_constant_p((n)) \
- ? __constant_memcpy3d((t), (f), (n)) \
- : __memcpy3d((t), (f), (n)))
-
-#else
-
-/*
- * No 3D Now!
- */
-
-#if (__GNUC__ >= 4)
#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-#else
-#define memcpy(t, f, n) \
- (__builtin_constant_p((n)) \
- ? __constant_memcpy((t), (f), (n)) \
- : __memcpy((t), (f), (n)))
-#endif
-#endif
#endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMMOVE
@@ -205,8 +165,7 @@ extern void *memchr(const void *cs, int c, size_t count);
static inline void *__memset_generic(void *s, char c, size_t count)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosb"
+ asm volatile("rep stosb"
: "=&c" (d0), "=&D" (d1)
: "a" (c), "1" (s), "0" (count)
: "memory");
@@ -216,29 +175,6 @@ static inline void *__memset_generic(void *s, char c, size_t count)
/* we might want to write optimized versions of these later */
#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))
-/*
- * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static __always_inline
-void *__constant_c_memset(void *s, unsigned long c, size_t count)
-{
- int d0, d1;
- asm volatile("rep ; stosl\n\t"
- "testb $2,%b3\n\t"
- "je 1f\n\t"
- "stosw\n"
- "1:\ttestb $1,%b3\n\t"
- "je 2f\n\t"
- "stosb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1)
- : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
- : "memory");
- return s;
-}
-
/* Added by Gertjan van Wingerde to make minix and sysv module work */
#define __HAVE_ARCH_STRNLEN
extern size_t strnlen(const char *s, size_t count);
@@ -247,72 +183,6 @@ extern size_t strnlen(const char *s, size_t count);
#define __HAVE_ARCH_STRSTR
extern char *strstr(const char *cs, const char *ct);
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static __always_inline
-void *__constant_c_and_count_memset(void *s, unsigned long pattern,
- size_t count)
-{
- switch (count) {
- case 0:
- return s;
- case 1:
- *(unsigned char *)s = pattern & 0xff;
- return s;
- case 2:
- *(unsigned short *)s = pattern & 0xffff;
- return s;
- case 3:
- *(unsigned short *)s = pattern & 0xffff;
- *((unsigned char *)s + 2) = pattern & 0xff;
- return s;
- case 4:
- *(unsigned long *)s = pattern;
- return s;
- }
-
-#define COMMON(x) \
- asm volatile("rep ; stosl" \
- x \
- : "=&c" (d0), "=&D" (d1) \
- : "a" (eax), "0" (count/4), "1" ((long)s) \
- : "memory")
-
- {
- int d0, d1;
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
- /* Workaround for broken gcc 4.0 */
- register unsigned long eax asm("%eax") = pattern;
-#else
- unsigned long eax = pattern;
-#endif
-
- switch (count % 4) {
- case 0:
- COMMON("");
- return s;
- case 1:
- COMMON("\n\tstosb");
- return s;
- case 2:
- COMMON("\n\tstosw");
- return s;
- default:
- COMMON("\n\tstosw\n\tstosb");
- return s;
- }
- }
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count) \
- (__builtin_constant_p(count) \
- ? __constant_c_and_count_memset((s), (c), (count)) \
- : __constant_c_memset((s), (c), (count)))
-
#define __memset(s, c, count) \
(__builtin_constant_p(count) \
? __constant_count_memset((s), (c), (count)) \
@@ -321,23 +191,14 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, size_t);
#ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ >= 4)
#define memset(s, c, count) __builtin_memset(s, c, count)
-#else
-#define memset(s, c, count) \
- (__builtin_constant_p(c) \
- ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
- (count)) \
- : __memset((s), (c), (count)))
-#endif
#endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosw"
+ asm volatile("rep stosw"
: "=&c" (d0), "=&D" (d1)
: "a" (v), "1" (s), "0" (n)
: "memory");
@@ -348,8 +209,7 @@ static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
int d0, d1;
- asm volatile("rep\n\t"
- "stosl"
+ asm volatile("rep stosl"
: "=&c" (d0), "=&D" (d1)
: "a" (v), "1" (s), "0" (n)
: "memory");
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 4e4194e21a09..4635616863f5 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -10,68 +10,68 @@
/* Even with __builtin_ the compiler may decide to use the out of line
function. */
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#include <linux/kmsan_string.h>
+#endif
+
#define __HAVE_ARCH_MEMCPY 1
extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
-#ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
-#define memcpy(dst, src, len) \
-({ \
- size_t __len = (len); \
- void *__ret; \
- if (__builtin_constant_p(len) && __len >= 64) \
- __ret = __memcpy((dst), (src), __len); \
- else \
- __ret = __builtin_memcpy((dst), (src), __len); \
- __ret; \
-})
-#endif
-#endif /* !CONFIG_FORTIFY_SOURCE */
-
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
void *__memset(void *s, int c, size_t n);
+KCFI_REFERENCE(__memset);
+/*
+ * KMSAN needs to instrument as much code as possible. Use C versions of
+ * memsetXX() from lib/string.c under KMSAN.
+ */
+#if !defined(CONFIG_KMSAN)
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosw"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const auto s0 = s;
+ asm volatile (
+ "rep stosw"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosl"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const auto s0 = s;
+ asm volatile (
+ "rep stosl"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosq"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const auto s0 = s;
+ asm volatile (
+ "rep stosq"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
+#endif
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t count);
void *__memmove(void *dest, const void *src, size_t count);
+KCFI_REFERENCE(__memmove);
int memcmp(const void *cs, const void *ct, size_t count);
size_t strlen(const char *s);
@@ -79,56 +79,6 @@ char *strcpy(char *dest, const char *src);
char *strcat(char *dest, const char *src);
int strcmp(const char *cs, const char *ct);
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
-/*
- * For files that not instrumented (e.g. mm/slub.c) we
- * should use not instrumented version of mem* functions.
- */
-
-#undef memcpy
-#define memcpy(dst, src, len) __memcpy(dst, src, len)
-#define memmove(dst, src, len) __memmove(dst, src, len)
-#define memset(s, c, n) __memset(s, c, n)
-
-#ifndef __NO_FORTIFY
-#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
-#endif
-
-#endif
-
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
- size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst: destination address
- * @src: source address
- * @cnt: number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
- if (static_branch_unlikely(&mcsafe_key))
- return __memcpy_mcsafe(dst, src, cnt);
- else
-#endif
- memcpy(dst, src, cnt);
- return 0;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index fdbd9d7b7bca..e8e5aab06255 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -9,19 +9,12 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/msr.h>
/* image of the saved processor state */
struct saved_context {
- /*
- * On x86_32, all segment registers, with the possible exception of
- * gs, are saved at kernel entry in pt_regs.
- */
-#ifdef CONFIG_X86_32_LAZY_GS
- u16 gs;
-#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
@@ -30,6 +23,12 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ /*
+ * On x86_32, all segment registers except gs are saved at kernel
+ * entry in pt_regs.
+ */
+ u16 gs;
+ bool misc_enable_saved;
} __attribute__((packed));
/* routines for saving/restoring kernel state */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index a7af9f53c0cb..b512f9665f78 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -9,14 +9,19 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/msr.h>
/*
* Image of the saved processor state, used by the low level ACPI suspend to
* RAM code and by the low level hibernation code.
*
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
*/
struct saved_context {
struct pt_regs regs;
@@ -34,9 +39,8 @@ struct saved_context {
*/
unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
- unsigned long cr0, cr2, cr3, cr4, cr8;
+ unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
@@ -48,6 +52,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
#define loaddebug(thread,register) \
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index dec9c1e84c78..56aa99503dc4 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -3,10 +3,56 @@
#define __SVM_H
#include <uapi/asm/svm.h>
-
+#include <uapi/asm/kvm.h>
+
+#include <hyperv/hvhdk.h>
+
+/*
+ * 32-bit intercept words in the VMCB Control Area, starting
+ * at Byte offset 000h.
+ */
+
+enum intercept_words {
+ INTERCEPT_CR = 0,
+ INTERCEPT_DR,
+ INTERCEPT_EXCEPTION,
+ INTERCEPT_WORD3,
+ INTERCEPT_WORD4,
+ INTERCEPT_WORD5,
+ MAX_INTERCEPT,
+};
enum {
- INTERCEPT_INTR,
+ /* Byte offset 000h (word 0) */
+ INTERCEPT_CR0_READ = 0,
+ INTERCEPT_CR3_READ = 3,
+ INTERCEPT_CR4_READ = 4,
+ INTERCEPT_CR8_READ = 8,
+ INTERCEPT_CR0_WRITE = 16,
+ INTERCEPT_CR3_WRITE = 16 + 3,
+ INTERCEPT_CR4_WRITE = 16 + 4,
+ INTERCEPT_CR8_WRITE = 16 + 8,
+ /* Byte offset 004h (word 1) */
+ INTERCEPT_DR0_READ = 32,
+ INTERCEPT_DR1_READ,
+ INTERCEPT_DR2_READ,
+ INTERCEPT_DR3_READ,
+ INTERCEPT_DR4_READ,
+ INTERCEPT_DR5_READ,
+ INTERCEPT_DR6_READ,
+ INTERCEPT_DR7_READ,
+ INTERCEPT_DR0_WRITE = 48,
+ INTERCEPT_DR1_WRITE,
+ INTERCEPT_DR2_WRITE,
+ INTERCEPT_DR3_WRITE,
+ INTERCEPT_DR4_WRITE,
+ INTERCEPT_DR5_WRITE,
+ INTERCEPT_DR6_WRITE,
+ INTERCEPT_DR7_WRITE,
+ /* Byte offset 008h (word 2) */
+ INTERCEPT_EXCEPTION_OFFSET = 64,
+ /* Byte offset 00Ch (word 3) */
+ INTERCEPT_INTR = 96,
INTERCEPT_NMI,
INTERCEPT_SMI,
INTERCEPT_INIT,
@@ -38,7 +84,8 @@ enum {
INTERCEPT_TASK_SWITCH,
INTERCEPT_FERR_FREEZE,
INTERCEPT_SHUTDOWN,
- INTERCEPT_VMRUN,
+ /* Byte offset 010h (word 4) */
+ INTERCEPT_VMRUN = 128,
INTERCEPT_VMMCALL,
INTERCEPT_VMLOAD,
INTERCEPT_VMSAVE,
@@ -52,15 +99,31 @@ enum {
INTERCEPT_MWAIT,
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
+ INTERCEPT_RDPRU,
+ TRAP_EFER_WRITE,
+ TRAP_CR0_WRITE,
+ TRAP_CR1_WRITE,
+ TRAP_CR2_WRITE,
+ TRAP_CR3_WRITE,
+ TRAP_CR4_WRITE,
+ TRAP_CR5_WRITE,
+ TRAP_CR6_WRITE,
+ TRAP_CR7_WRITE,
+ TRAP_CR8_WRITE,
+ /* Byte offset 014h (word 5) */
+ INTERCEPT_INVLPGB = 160,
+ INTERCEPT_INVLPGB_ILLEGAL,
+ INTERCEPT_INVPCID,
+ INTERCEPT_MCOMMIT,
+ INTERCEPT_TLBSYNC,
+ INTERCEPT_BUSLOCK,
+ INTERCEPT_IDLE_HLT = 166,
};
struct __attribute__ ((__packed__)) vmcb_control_area {
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
- u8 reserved_1[40];
+ u32 intercepts[MAX_INTERCEPT];
+ u32 reserved_1[15 - MAX_INTERCEPT];
u16 pause_filter_thresh;
u16 pause_filter_count;
u64 iopm_base_pa;
@@ -81,7 +144,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 exit_int_info_err;
u64 nested_ctl;
u64 avic_vapic_bar;
- u8 reserved_4[8];
+ u64 ghcb_gpa;
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
@@ -95,7 +158,22 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u8 reserved_6[8]; /* Offset 0xe8 */
u64 avic_logical_id; /* Offset 0xf0 */
u64 avic_physical_id; /* Offset 0xf8 */
- u8 reserved_7[768];
+ u8 reserved_7[8];
+ u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[16];
+ u16 bus_lock_counter; /* Offset 0x120 */
+ u8 reserved_9[22];
+ u64 allowed_sev_features; /* Offset 0x138 */
+ u64 guest_sev_features; /* Offset 0x140 */
+ u8 reserved_10[664];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ union {
+ struct hv_vmcb_enlightenments hv_enlightenments;
+ u8 reserved_sw[32];
+ };
};
@@ -112,25 +190,40 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_SHIFT 9
#define V_GIF_MASK (1 << V_GIF_SHIFT)
+#define V_NMI_PENDING_SHIFT 11
+#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)
+
+#define V_NMI_BLOCKING_SHIFT 12
+#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)
+
#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
#define V_GIF_ENABLE_SHIFT 25
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+#define V_NMI_ENABLE_SHIFT 26
+#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)
+
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+#define X2APIC_MODE_SHIFT 30
+#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT)
+
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1)
#define SVM_IOIO_STR_SHIFT 2
#define SVM_IOIO_REP_SHIFT 3
@@ -143,21 +236,85 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-#define SVM_VM_CR_VALID_MASK 0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
-
#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+
+
+#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL
+#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL
+#define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL
+#define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL
+
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+/*
+ * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
+ * tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
+ * GA log interrupts to wake the vCPU (because it's blocking or about to block).
+ */
+#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
+};
-struct __attribute__ ((__packed__)) vmcb_seg {
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0)
+
+/*
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
+ */
+#define AVIC_MAX_PHYSICAL_ID 0XFEULL
+
+/*
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095).
+ */
+#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL
+
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID);
+
+#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
+#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
+#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+#define SVM_SEV_FEAT_SECURE_TSC BIT(9)
+
+#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63)
+
+struct vmcb_seg {
u16 selector;
u16 attrib;
u32 limit;
u64 base;
-};
+} __packed;
-struct __attribute__ ((__packed__)) vmcb_save_area {
+/* Save area definition for legacy and SEV-MEM guests */
+struct vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
struct vmcb_seg ss;
@@ -168,11 +325,13 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
struct vmcb_seg ldtr;
struct vmcb_seg idtr;
struct vmcb_seg tr;
- u8 reserved_1[43];
+ /* Reserved fields are named following their struct offset */
+ u8 reserved_0xa0[42];
+ u8 vmpl;
u8 cpl;
- u8 reserved_2[4];
+ u8 reserved_0xcc[4];
u64 efer;
- u8 reserved_3[112];
+ u8 reserved_0xd8[112];
u64 cr4;
u64 cr3;
u64 cr0;
@@ -180,9 +339,11 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
u64 dr6;
u64 rflags;
u64 rip;
- u8 reserved_4[88];
+ u8 reserved_0x180[88];
u64 rsp;
- u8 reserved_5[24];
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
u64 rax;
u64 star;
u64 lstar;
@@ -193,24 +354,251 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
u64 sysenter_esp;
u64 sysenter_eip;
u64 cr2;
- u8 reserved_6[32];
+ u8 reserved_0x248[32];
u64 g_pat;
u64 dbgctl;
u64 br_from;
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
-};
+ u8 reserved_0x298[72];
+ u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
+} __packed;
-struct __attribute__ ((__packed__)) vmcb {
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u64 pl0_ssp;
+ u64 pl1_ssp;
+ u64 pl2_ssp;
+ u64 pl3_ssp;
+ u64 u_cet;
+ u8 reserved_0xc8[2];
+ u8 vmpl;
+ u8 cpl;
+ u8 reserved_0xcc[4];
+ u64 efer;
+ u8 reserved_0xd8[104];
+ u64 xss;
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 dr0;
+ u64 dr1;
+ u64 dr2;
+ u64 dr3;
+ u64 dr0_addr_mask;
+ u64 dr1_addr_mask;
+ u64 dr2_addr_mask;
+ u64 dr3_addr_mask;
+ u8 reserved_0x1c0[24];
+ u64 rsp;
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_0x248[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+ u8 reserved_0x298[80];
+ u32 pkru;
+ u32 tsc_aux;
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u8 reserved_0x300[8];
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_0x320; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_0x380[16];
+ u64 guest_exit_info_1;
+ u64 guest_exit_info_2;
+ u64 guest_exit_int_info;
+ u64 guest_nrip;
+ u64 sev_features;
+ u64 vintr_ctrl;
+ u64 guest_exit_code;
+ u64 virtual_tom;
+ u64 tlb_id;
+ u64 pcpu_id;
+ u64 event_inj;
+ u64 xcr0;
+ u8 reserved_0x3f0[16];
+
+ /* Floating point area */
+ u64 x87_dp;
+ u32 mxcsr;
+ u16 x87_ftw;
+ u16 x87_fsw;
+ u16 x87_fcw;
+ u16 x87_fop;
+ u16 x87_ds;
+ u16 x87_cs;
+ u64 x87_rip;
+ u8 fpreg_x87[80];
+ u8 fpreg_xmm[256];
+ u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+ u8 reserved_0x0[203];
+ u8 cpl;
+ u8 reserved_0xcc[116];
+ u64 xss;
+ u8 reserved_0x148[24];
+ u64 dr7;
+ u8 reserved_0x168[16];
+ u64 rip;
+ u8 reserved_0x180[88];
+ u64 rsp;
+ u8 reserved_0x1e0[24];
+ u64 rax;
+ u8 reserved_0x200[264];
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u8 reserved_0x320[8];
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_0x380[16];
+ u64 sw_exit_code;
+ u64 sw_exit_info_1;
+ u64 sw_exit_info_2;
+ u64 sw_scratch;
+ u8 reserved_0x3b0[56];
+ u64 xcr0;
+ u8 valid_bitmap[16];
+ u64 x87_state_gpa;
+} __packed;
+
+#define GHCB_SHARED_BUF_SIZE 2032
+
+struct ghcb {
+ struct ghcb_save_area save;
+ u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
+
+ u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
+
+ u8 reserved_0xff0[10];
+ u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */
+ u32 ghcb_usage;
+} __packed;
+
+struct vmcb {
struct vmcb_control_area control;
- struct vmcb_save_area save;
-};
+ union {
+ struct vmcb_save_area save;
+
+ /*
+ * For SEV-ES VMs, the save area in the VMCB is used only to
+ * save/load host state. Guest state resides in a separate
+ * page, the aptly named VM Save Area (VMSA), that is encrypted
+ * with the guest's private key.
+ */
+ struct sev_es_save_area host_sev_es_save;
+ };
+} __packed;
+
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
+#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
+#define EXPECTED_GHCB_SIZE PAGE_SIZE
+
+#define BUILD_BUG_RESERVED_OFFSET(x, y) \
+ ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y)
+
+static inline void __unused_size_checks(void)
+{
+ BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
+
+ /* Check offsets of reserved fields */
+
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0);
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc);
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8);
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180);
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248);
+ BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298);
+
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0);
+
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380);
+ BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0);
+
+ BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
+}
#define SVM_CPUID_FUNC 0x8000000a
-#define SVM_VM_CR_SVM_DISABLE 4
-
#define SVM_SELECTOR_S_SHIFT 4
#define SVM_SELECTOR_DPL_SHIFT 5
#define SVM_SELECTOR_P_SHIFT 7
@@ -232,32 +620,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
#define SVM_SELECTOR_CODE_MASK (1 << 3)
-#define INTERCEPT_CR0_READ 0
-#define INTERCEPT_CR3_READ 3
-#define INTERCEPT_CR4_READ 4
-#define INTERCEPT_CR8_READ 8
-#define INTERCEPT_CR0_WRITE (16 + 0)
-#define INTERCEPT_CR3_WRITE (16 + 3)
-#define INTERCEPT_CR4_WRITE (16 + 4)
-#define INTERCEPT_CR8_WRITE (16 + 8)
-
-#define INTERCEPT_DR0_READ 0
-#define INTERCEPT_DR1_READ 1
-#define INTERCEPT_DR2_READ 2
-#define INTERCEPT_DR3_READ 3
-#define INTERCEPT_DR4_READ 4
-#define INTERCEPT_DR5_READ 5
-#define INTERCEPT_DR6_READ 6
-#define INTERCEPT_DR7_READ 7
-#define INTERCEPT_DR0_WRITE (16 + 0)
-#define INTERCEPT_DR1_WRITE (16 + 1)
-#define INTERCEPT_DR2_WRITE (16 + 2)
-#define INTERCEPT_DR3_WRITE (16 + 3)
-#define INTERCEPT_DR4_WRITE (16 + 4)
-#define INTERCEPT_DR5_WRITE (16 + 5)
-#define INTERCEPT_DR6_WRITE (16 + 6)
-#define INTERCEPT_DR7_WRITE (16 + 7)
-
#define SVM_EVTINJ_VEC_MASK 0xff
#define SVM_EVTINJ_TYPE_SHIFT 8
@@ -290,4 +652,58 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+/* GHCB Accessor functions */
+
+#define GHCB_BITMAP_IDX(field) \
+ (offsetof(struct ghcb_save_area, field) / sizeof(u64))
+
+#define DEFINE_GHCB_ACCESSORS(field) \
+ static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+ { \
+ return test_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ } \
+ \
+ static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \
+ { \
+ return ghcb->save.field; \
+ } \
+ \
+ static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
+ { \
+ return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \
+ } \
+ \
+ static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+ { \
+ __set_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ ghcb->save.field = value; \
+ }
+
+DEFINE_GHCB_ACCESSORS(cpl)
+DEFINE_GHCB_ACCESSORS(rip)
+DEFINE_GHCB_ACCESSORS(rsp)
+DEFINE_GHCB_ACCESSORS(rax)
+DEFINE_GHCB_ACCESSORS(rcx)
+DEFINE_GHCB_ACCESSORS(rdx)
+DEFINE_GHCB_ACCESSORS(rbx)
+DEFINE_GHCB_ACCESSORS(rbp)
+DEFINE_GHCB_ACCESSORS(rsi)
+DEFINE_GHCB_ACCESSORS(rdi)
+DEFINE_GHCB_ACCESSORS(r8)
+DEFINE_GHCB_ACCESSORS(r9)
+DEFINE_GHCB_ACCESSORS(r10)
+DEFINE_GHCB_ACCESSORS(r11)
+DEFINE_GHCB_ACCESSORS(r12)
+DEFINE_GHCB_ACCESSORS(r13)
+DEFINE_GHCB_ACCESSORS(r14)
+DEFINE_GHCB_ACCESSORS(r15)
+DEFINE_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_GHCB_ACCESSORS(sw_scratch)
+DEFINE_GHCB_ACCESSORS(xcr0)
+DEFINE_GHCB_ACCESSORS(xss)
+
#endif
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
deleted file mode 100644
index ff6c92eff035..000000000000
--- a/arch/x86/include/asm/swiotlb.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_SWIOTLB_H
-#define _ASM_X86_SWIOTLB_H
-
-#include <linux/swiotlb.h>
-
-#ifdef CONFIG_SWIOTLB
-extern int swiotlb;
-extern int __init pci_swiotlb_detect_override(void);
-extern int __init pci_swiotlb_detect_4gb(void);
-extern void __init pci_swiotlb_init(void);
-extern void __init pci_swiotlb_late_init(void);
-#else
-#define swiotlb 0
-static inline int pci_swiotlb_detect_override(void)
-{
- return 0;
-}
-static inline int pci_swiotlb_detect_4gb(void)
-{
- return 0;
-}
-static inline void pci_swiotlb_init(void)
-{
-}
-static inline void pci_swiotlb_late_init(void)
-{
-}
-#endif
-#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d891..499b1c15cc8b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,28 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
-/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *next)
-{
-#ifdef CONFIG_VMAP_STACK
- /*
- * If we switch to a stack that has a top-level paging entry
- * that is not present in the current mm, the resulting #PF will
- * will be promoted to a double-fault and we'll panic. Probe
- * the new stack now so that vmalloc_fault can fix up the page
- * tables if needed. This can only happen if we use a stack
- * in vmap space.
- *
- * We assume that the stack is aligned so that it never spans
- * more than one top-level paging entry.
- *
- * To minimize cache pollution, just follow the stack pointer.
- */
- READ_ONCE(*(unsigned char *)next->thread.sp);
-#endif
-}
-
-asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_fork_asm(void);
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+ int (*fn)(void *), void *fn_arg);
/*
* This is the structure pointed to by thread.sp for an inactive task. The
@@ -46,6 +27,7 @@ struct inactive_task_frame {
unsigned long r13;
unsigned long r12;
#else
+ unsigned long flags;
unsigned long si;
unsigned long di;
#endif
@@ -66,12 +48,12 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
- prepare_switch_to(next); \
- \
((last) = __switch_to_asm((prev), (next))); \
} while (0)
#ifdef CONFIG_X86_32
+#include <asm/msr.h>
+
static inline void refresh_sysenter_cs(struct thread_struct *thread)
{
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
@@ -79,7 +61,7 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
return;
this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ wrmsrq(MSR_IA32_SYSENTER_CS, thread->sysenter_cs);
}
#endif
@@ -88,21 +70,23 @@ static inline void update_task_stack(struct task_struct *task)
{
/* sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
- if (static_cpu_has(X86_FEATURE_XENPV))
- load_sp0(task->thread.sp0);
- else
- this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
+ this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- /*
- * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That
- * doesn't work on x86-32 because sp1 and
- * cpu_current_top_of_stack have different values (because of
- * the non-zero stack-padding on 32bit).
- */
- if (static_cpu_has(X86_FEATURE_XENPV))
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
+ /* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
#endif
+}
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+ int (*fun)(void *), void *arg)
+{
+ frame->bx = (unsigned long)fun;
+#ifdef CONFIG_X86_32
+ frame->di = (unsigned long)arg;
+#else
+ frame->r12 = (unsigned long)arg;
+#endif
}
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb1..cd21a0405ac5 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
+#include <asm/rmwcc.h>
+
#define ADDR (*(volatile long *)addr)
/**
@@ -29,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; bts %1,%0"
+ asm volatile("lock " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btr %1,%0"
+ asm volatile("lock " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btc %1,%0"
+ asm volatile("lock " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; bts %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btr %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btc %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index c67caafd3381..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,88 @@
#include <linux/preempt.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef CONFIG_X86_32
+static __always_inline void iret_to_self(void)
+{
+ asm volatile (
+ "pushfl\n\t"
+ "pushl %%cs\n\t"
+ "pushl $1f\n\t"
+ "iret\n\t"
+ "1:"
+ : ASM_CALL_CONSTRAINT : : "memory");
+}
+#else
+static __always_inline void iret_to_self(void)
+{
+ unsigned int tmp;
+
+ asm volatile (
+ "mov %%ss, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq %%rsp\n\t"
+ "addq $8, (%%rsp)\n\t"
+ "pushfq\n\t"
+ "mov %%cs, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq $1f\n\t"
+ "iretq\n\t"
+ "1:"
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ * a) Text was modified using one virtual address and is about to be executed
+ * from the same physical page at a different virtual address.
+ *
+ * b) Text was modified on a different CPU, may subsequently be
+ * executed on this CPU, and you want to make sure the new version
+ * gets executed. This generally means you're calling this in an IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
+ */
+static __always_inline void sync_core(void)
+{
+ /*
+ * The SERIALIZE instruction is the most straightforward way to
+ * do this, but it is not universally available.
+ */
+ if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+ serialize();
+ return;
+ }
+
+ /*
+ * For all other processors, there are quite a few ways to do this.
+ * IRET-to-self is nice because it works on every CPU, at any CPL
+ * (so it's compatible with paravirtualization), and it never exits
+ * to a hypervisor. The only downsides are that it's a bit slow
+ * (it seems to be a bit more than 2x slower than the fastest
+ * options) and that it unmasks NMIs. The "push %cs" is needed,
+ * because in paravirtual environments __KERNEL_CS may not be a
+ * valid CS value when we do IRET directly.
+ *
+ * In case NMI unmasking or performance ever becomes a problem,
+ * the next best option appears to be MOV-to-CR2 and an
+ * unconditional jump. That sequence also works on all CPUs,
+ * but it will fault at CPL3 (i.e. Xen PV).
+ *
+ * CPUID is the conventional way, but it's nasty: it doesn't
+ * exist on some 486-like CPUs, and it usually exits to a
+ * hypervisor.
+ */
+ iret_to_self();
+}
/*
* Ensure that a core serializing instruction is issued before returning
@@ -16,12 +98,13 @@ static inline void sync_core_before_usermode(void)
/* With PTI, we unconditionally serialize before running user code. */
if (static_cpu_has(X86_FEATURE_PTI))
return;
+
/*
- * Return from interrupt and NMI is done through iret, which is core
- * serializing.
+ * Even if we're in an interrupt, we might reschedule before returning,
+ * in which case we could switch to a different thread in the same mm
+ * and return using SYSRET or SYSEXIT. Instead of trying to keep
+ * track of our need to sync the core, just sync right away.
*/
- if (in_irq() || in_nmi())
- return;
sync_core();
}
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..c10dbb74cd00 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Access to user system call parameters and results
*
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- *
* See asm-generic/syscall.h for descriptions of what we must do here.
*/
@@ -16,28 +13,20 @@
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
-#include <asm/asm-offsets.h> /* For NR_syscalls */
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
-#ifdef CONFIG_X86_64
-typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
-#else
-typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
- unsigned long, unsigned long,
- unsigned long, unsigned long);
-#endif /* CONFIG_X86_64 */
+/* This is used purely for kernel/trace/trace_syscalls.c */
+typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#define __NR_syscall_compat_max __NR_syscall_max
-#define IA32_NR_syscalls NR_syscalls
-#endif
-
-#if defined(CONFIG_IA32_EMULATION)
-extern const sys_call_ptr_t ia32_sys_call_table[];
-#endif
+/*
+ * These may not exist, but still put the prototypes in so we
+ * can use IS_ENABLED().
+ */
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -49,6 +38,13 @@ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
return regs->orig_ax;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ regs->orig_ax = nr;
+}
+
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
@@ -91,23 +87,29 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+ args[0] = regs->bx;
+ args[1] = regs->cx;
+ args[2] = regs->dx;
+ args[3] = regs->si;
+ args[4] = regs->di;
+ args[5] = regs->bp;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->bx + i, args, n * sizeof(args[0]));
+ regs->bx = args[0];
+ regs->cx = args[1];
+ regs->dx = args[2];
+ regs->si = args[3];
+ regs->di = args[4];
+ regs->bp = args[5];
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_I386;
}
@@ -116,131 +118,67 @@ static inline int syscall_get_arch(void)
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->bx;
- case 1:
- if (!n--) break;
- *args++ = regs->cx;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->si;
- case 4:
- if (!n--) break;
- *args++ = regs->di;
- case 5:
- if (!n--) break;
- *args++ = regs->bp;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ *args++ = regs->bx;
+ *args++ = regs->cx;
+ *args++ = regs->dx;
+ *args++ = regs->si;
+ *args++ = regs->di;
+ *args = regs->bp;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->di;
- case 1:
- if (!n--) break;
- *args++ = regs->si;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->r10;
- case 4:
- if (!n--) break;
- *args++ = regs->r8;
- case 5:
- if (!n--) break;
- *args++ = regs->r9;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ *args++ = regs->di;
+ *args++ = regs->si;
+ *args++ = regs->dx;
+ *args++ = regs->r10;
+ *args++ = regs->r8;
+ *args = regs->r9;
+ }
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- regs->bx = *args++;
- case 1:
- if (!n--) break;
- regs->cx = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->si = *args++;
- case 4:
- if (!n--) break;
- regs->di = *args++;
- case 5:
- if (!n--) break;
- regs->bp = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ regs->bx = *args++;
+ regs->cx = *args++;
+ regs->dx = *args++;
+ regs->si = *args++;
+ regs->di = *args++;
+ regs->bp = *args;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- regs->di = *args++;
- case 1:
- if (!n--) break;
- regs->si = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->r10 = *args++;
- case 4:
- if (!n--) break;
- regs->r8 = *args++;
- case 5:
- if (!n--) break;
- regs->r9 = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ regs->di = *args++;
+ regs->si = *args++;
+ regs->dx = *args++;
+ regs->r10 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
+ }
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
- return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ return (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+ task->thread_info.status & TS_COMPAT)
+ ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
+
+bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
+
#endif /* CONFIG_X86_32 */
+void do_int80_syscall_32(struct pt_regs *regs);
+bool do_fast_syscall_32(struct pt_regs *regs);
+bool do_SYSENTER_32(struct pt_regs *regs);
+
#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a405743d..7e88705e907f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,86 +6,183 @@
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
+#include <asm/ptrace.h>
+
+extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+
+/*
+ * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
+ * struct pt_regs *regs as the only argument of the syscall stub(s) named as:
+ * __x64_sys_*() - 64-bit native syscall
+ * __ia32_sys_*() - 32-bit native syscall or common compat syscall
+ * __ia32_compat_sys_*() - 32-bit compat syscall
+ * __x64_compat_sys_*() - 64-bit X32 compat syscall
+ *
+ * The registers are decoded according to the ABI:
+ * 64-bit: RDI, RSI, RDX, R10, R8, R9
+ * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP
+ *
+ * The stub then passes the decoded arguments to the __se_sys_*() wrapper to
+ * perform sign-extension (omitted for zero-argument syscalls). Finally the
+ * arguments are passed to the __do_sys_*() function which is the actual
+ * syscall. These wrappers are marked as inline so the compiler can optimize
+ * the functions where appropriate.
+ *
+ * Example assembly (slightly re-ordered for better readability):
+ *
+ * <__x64_sys_recv>: <-- syscall with 4 parameters
+ * callq <__fentry__>
+ *
+ * mov 0x70(%rdi),%rdi <-- decode regs->di
+ * mov 0x68(%rdi),%rsi <-- decode regs->si
+ * mov 0x60(%rdi),%rdx <-- decode regs->dx
+ * mov 0x38(%rdi),%rcx <-- decode regs->r10
+ *
+ * xor %r9d,%r9d <-- clear %r9
+ * xor %r8d,%r8d <-- clear %r8
+ *
+ * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
+ * which takes 6 arguments
+ *
+ * cltq <-- extend return value to 64-bit
+ * retq <-- return
+ *
+ * This approach avoids leaking random user-provided register content down
+ * the call chain.
+ */
+
/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
#define SC_X86_64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
,,regs->di,,regs->si,,regs->dx \
,,regs->r10,,regs->r8,,regs->r9) \
+
+/* SYSCALL_PT_ARGS is Adapted from s390x */
+#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \
+ SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp))
+#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \
+ SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di))
+#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \
+ SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si))
+#define SYSCALL_PT_ARG3(m, t1, t2, t3) \
+ SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx))
+#define SYSCALL_PT_ARG2(m, t1, t2) \
+ SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx))
+#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx))
+#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+
+#define __SC_COMPAT_CAST(t, a) \
+ (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \
+ (unsigned int)a
+
/* Mapping of registers to parameters for syscalls on i386 */
#define SC_IA32_REGS_TO_ARGS(x, ...) \
- __MAP(x,__SC_ARGS \
- ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \
- ,,(unsigned int)regs->dx,,(unsigned int)regs->si \
- ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+ SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \
+ __MAP(x, __SC_TYPE, __VA_ARGS__)) \
-#ifdef CONFIG_IA32_EMULATION
-/*
- * For IA32 emulation, we need to handle "compat" syscalls *and* create
- * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
- * ia32 regs in the proper order for shared or "common" syscalls. As some
- * syscalls may not be implemented, we need to expand COND_SYSCALL in
- * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
- * case as well.
- */
-#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
- asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \
- asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
+#define __SYS_STUB0(abi, name) \
+ long __##abi##_##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
+ long __##abi##_##name(const struct pt_regs *regs) \
+ __alias(__do_##name);
+
+#define __SYS_STUBx(abi, name, ...) \
+ long __##abi##_##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
+ long __##abi##_##name(const struct pt_regs *regs) \
{ \
- return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ return __se_##name(__VA_ARGS__); \
+ }
-#define __IA32_SYS_STUBx(x, name, ...) \
- asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \
- asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \
+#define __COND_SYSCALL(abi, name) \
+ __weak long __##abi##_##name(const struct pt_regs *__unused); \
+ __weak long __##abi##_##name(const struct pt_regs *__unused) \
{ \
- return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+ return sys_ni_syscall(); \
}
+#ifdef CONFIG_X86_64
+#define __X64_SYS_STUB0(name) \
+ __SYS_STUB0(x64, sys_##name)
+
+#define __X64_SYS_STUBx(x, name, ...) \
+ __SYS_STUBx(x64, sys##name, \
+ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X64_COND_SYSCALL(name) \
+ __COND_SYSCALL(x64, sys_##name)
+
+#else /* CONFIG_X86_64 */
+#define __X64_SYS_STUB0(name)
+#define __X64_SYS_STUBx(x, name, ...)
+#define __X64_COND_SYSCALL(name)
+#endif /* CONFIG_X86_64 */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+#define __IA32_SYS_STUB0(name) \
+ __SYS_STUB0(ia32, sys_##name)
+
+#define __IA32_SYS_STUBx(x, name, ...) \
+ __SYS_STUBx(ia32, sys##name, \
+ SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __IA32_COND_SYSCALL(name) \
+ __COND_SYSCALL(ia32, sys_##name)
+
+#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+#define __IA32_SYS_STUB0(name)
+#define __IA32_SYS_STUBx(x, name, ...)
+#define __IA32_COND_SYSCALL(name)
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_IA32_EMULATION
/*
- * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
- * named __ia32_sys_*()
+ * For IA32 emulation, we need to handle "compat" syscalls *and* create
+ * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
+ * ia32 regs in the proper order for shared or "common" syscalls. As some
+ * syscalls may not be implemented, we need to expand COND_SYSCALL in
+ * kernel/sys_ni.c to cover this case as well.
*/
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
- asmlinkage long __x64_sys_##sname(void)
+#define __IA32_COMPAT_SYS_STUB0(name) \
+ __SYS_STUB0(ia32, compat_sys_##name)
-#define COND_SYSCALL(name) \
- cond_syscall(__x64_sys_##name); \
- cond_syscall(__ia32_sys_##name)
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
+ __SYS_STUBx(ia32, compat_sys##name, \
+ SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
-#define SYS_NI(name) \
- SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers)
+#define __IA32_COMPAT_COND_SYSCALL(name) \
+ __COND_SYSCALL(ia32, compat_sys_##name)
#else /* CONFIG_IA32_EMULATION */
+#define __IA32_COMPAT_SYS_STUB0(name)
#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
-#define __IA32_SYS_STUBx(x, fullname, name, ...)
+#define __IA32_COMPAT_COND_SYSCALL(name)
#endif /* CONFIG_IA32_EMULATION */
-#ifdef CONFIG_X86_X32
+#ifdef CONFIG_X86_X32_ABI
/*
* For the x32 ABI, we need to create a stub for compat_sys_*() which is aware
* of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
* with x86_64 obviously do not need such care.
*/
+#define __X32_COMPAT_SYS_STUB0(name) \
+ __SYS_STUB0(x64, compat_sys_##name)
+
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
- asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
- ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \
- asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
- { \
- return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ __SYS_STUBx(x64, compat_sys##name, \
+ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X32_COMPAT_COND_SYSCALL(name) \
+ __COND_SYSCALL(x64, compat_sys_##name)
-#else /* CONFIG_X86_X32 */
+#else /* CONFIG_X86_X32_ABI */
+#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
-#endif /* CONFIG_X86_X32 */
+#define __X32_COMPAT_COND_SYSCALL(name)
+#endif /* CONFIG_X86_X32_ABI */
#ifdef CONFIG_COMPAT
@@ -94,6 +191,14 @@
* mapping of registers to parameters, we need to generate stubs for each
* of them.
*/
+#define COMPAT_SYSCALL_DEFINE0(name) \
+ static long \
+ __do_compat_sys_##name(const struct pt_regs *__unused); \
+ __IA32_COMPAT_SYS_STUB0(name) \
+ __X32_COMPAT_SYS_STUB0(name) \
+ static long \
+ __do_compat_sys_##name(const struct pt_regs *__unused)
+
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
@@ -107,62 +212,18 @@
/*
* As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
*/
#define COND_SYSCALL_COMPAT(name) \
- cond_syscall(__ia32_compat_sys_##name); \
- cond_syscall(__x32_compat_sys_##name)
-
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \
- SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers)
+ __IA32_COMPAT_COND_SYSCALL(name) \
+ __X32_COMPAT_COND_SYSCALL(name)
#endif /* CONFIG_COMPAT */
-
-/*
- * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
- * struct pt_regs *regs as the only argument of the syscall stub named
- * __x64_sys_*(). It decodes just the registers it needs and passes them on to
- * the __se_sys_*() wrapper performing sign extension and then to the
- * __do_sys_*() function doing the actual job. These wrappers and functions
- * are inlined (at least in very most cases), meaning that the assembly looks
- * as follows (slightly re-ordered for better readability):
- *
- * <__x64_sys_recv>: <-- syscall with 4 parameters
- * callq <__fentry__>
- *
- * mov 0x70(%rdi),%rdi <-- decode regs->di
- * mov 0x68(%rdi),%rsi <-- decode regs->si
- * mov 0x60(%rdi),%rdx <-- decode regs->dx
- * mov 0x38(%rdi),%rcx <-- decode regs->r10
- *
- * xor %r9d,%r9d <-- clear %r9
- * xor %r8d,%r8d <-- clear %r8
- *
- * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
- * which takes 6 arguments
- *
- * cltq <-- extend return value to 64-bit
- * retq <-- return
- *
- * This approach avoids leaking random user-provided register content down
- * the call chain.
- *
- * If IA32_EMULATION is enabled, this macro generates an additional wrapper
- * named __ia32_sys_*() which decodes the struct pt_regs *regs according
- * to the i386 calling convention (bx, cx, dx, si, di, bp).
- */
#define __SYSCALL_DEFINEx(x, name, ...) \
- asmlinkage long __x64_sys##name(const struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
- asmlinkage long __x64_sys##name(const struct pt_regs *regs) \
- { \
- return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ __X64_SYS_STUBx(x, name, __VA_ARGS__) \
__IA32_SYS_STUBx(x, name, __VA_ARGS__) \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
@@ -177,33 +238,27 @@
* As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
* obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
* hurt, we only need to re-define it here to keep the naming congruent to
- * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
- * macros to work correctly.
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro
+ * to work correctly.
*/
-#ifndef SYSCALL_DEFINE0
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- asmlinkage long __x64_sys_##sname(void)
-#endif
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ static long __do_sys_##sname(const struct pt_regs *__unused); \
+ __X64_SYS_STUB0(sname) \
+ __IA32_SYS_STUB0(sname) \
+ static long __do_sys_##sname(const struct pt_regs *__unused)
-#ifndef COND_SYSCALL
-#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
-#endif
-
-#ifndef SYS_NI
-#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);
-#endif
+#define COND_SYSCALL(name) \
+ __X64_COND_SYSCALL(name) \
+ __IA32_COND_SYSCALL(name)
/*
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-struct pt_regs;
-asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
-asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
-asmlinkage long __x64_sys_time(const struct pt_regs *regs);
+long __x64_sys_getcpu(const struct pt_regs *regs);
+long __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __x64_sys_time(const struct pt_regs *regs);
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 9fa979dd0d9d..6714a358235d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,51 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* syscalls.h - Linux syscall interfaces (arch-specific)
*
* Copyright (c) 2008 Jaswinder Singh Rajput
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
*/
#ifndef _ASM_X86_SYSCALLS_H
#define _ASM_X86_SYSCALLS_H
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <linux/signal.h>
-#include <linux/types.h>
-
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
-#ifdef CONFIG_X86_32
-/*
- * These definitions are only valid on pure 32-bit systems; x86-64 uses a
- * different syscall calling convention
- */
-asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
-asmlinkage long sys_iopl(unsigned int);
-
-/* kernel/ldt.c */
-asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
-
-/* kernel/signal.c */
-asmlinkage long sys_rt_sigreturn(void);
-
-/* kernel/tls.c */
-asmlinkage long sys_set_thread_area(struct user_desc __user *);
-asmlinkage long sys_get_thread_area(struct user_desc __user *);
-
-/* X86_32 only */
-
-/* kernel/signal.c */
-asmlinkage long sys_sigreturn(void);
-
-/* kernel/vm86_32.c */
-struct vm86_struct;
-asmlinkage long sys_vm86old(struct vm86_struct __user *);
-asmlinkage long sys_vm86(unsigned long, unsigned long);
-
-#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h
deleted file mode 100644
index 2aeb3e25579c..000000000000
--- a/arch/x86/include/asm/sysfb.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef _ARCH_X86_KERNEL_SYSFB_H
-#define _ARCH_X86_KERNEL_SYSFB_H
-
-/*
- * Generic System Framebuffers on x86
- * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/platform_data/simplefb.h>
-#include <linux/screen_info.h>
-
-enum {
- M_I17, /* 17-Inch iMac */
- M_I20, /* 20-Inch iMac */
- M_I20_SR, /* 20-Inch iMac (Santa Rosa) */
- M_I24, /* 24-Inch iMac */
- M_I24_8_1, /* 24-Inch iMac, 8,1th gen */
- M_I24_10_1, /* 24-Inch iMac, 10,1th gen */
- M_I27_11_1, /* 27-Inch iMac, 11,1th gen */
- M_MINI, /* Mac Mini */
- M_MINI_3_1, /* Mac Mini, 3,1th gen */
- M_MINI_4_1, /* Mac Mini, 4,1th gen */
- M_MB, /* MacBook */
- M_MB_2, /* MacBook, 2nd rev. */
- M_MB_3, /* MacBook, 3rd rev. */
- M_MB_5_1, /* MacBook, 5th rev. */
- M_MB_6_1, /* MacBook, 6th rev. */
- M_MB_7_1, /* MacBook, 7th rev. */
- M_MB_SR, /* MacBook, 2nd gen, (Santa Rosa) */
- M_MBA, /* MacBook Air */
- M_MBA_3, /* Macbook Air, 3rd rev */
- M_MBP, /* MacBook Pro */
- M_MBP_2, /* MacBook Pro 2nd gen */
- M_MBP_2_2, /* MacBook Pro 2,2nd gen */
- M_MBP_SR, /* MacBook Pro (Santa Rosa) */
- M_MBP_4, /* MacBook Pro, 4th gen */
- M_MBP_5_1, /* MacBook Pro, 5,1th gen */
- M_MBP_5_2, /* MacBook Pro, 5,2th gen */
- M_MBP_5_3, /* MacBook Pro, 5,3rd gen */
- M_MBP_6_1, /* MacBook Pro, 6,1th gen */
- M_MBP_6_2, /* MacBook Pro, 6,2th gen */
- M_MBP_7_1, /* MacBook Pro, 7,1th gen */
- M_MBP_8_2, /* MacBook Pro, 8,2nd gen */
- M_UNKNOWN /* placeholder */
-};
-
-struct efifb_dmi_info {
- char *optname;
- unsigned long base;
- int stride;
- int width;
- int height;
- int flags;
-};
-
-#ifdef CONFIG_EFI
-
-extern struct efifb_dmi_info efifb_dmi_list[];
-void sysfb_apply_efi_quirks(void);
-
-#else /* CONFIG_EFI */
-
-static inline void sysfb_apply_efi_quirks(void)
-{
-}
-
-#endif /* CONFIG_EFI */
-
-#ifdef CONFIG_X86_SYSFB
-
-bool parse_mode(const struct screen_info *si,
- struct simplefb_platform_data *mode);
-int create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode);
-
-#else /* CONFIG_X86_SYSFB */
-
-static inline bool parse_mode(const struct screen_info *si,
- struct simplefb_platform_data *mode)
-{
- return false;
-}
-
-static inline int create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode)
-{
- return -EINVAL;
-}
-
-#endif /* CONFIG_X86_SYSFB */
-
-#endif /* _ARCH_X86_KERNEL_SYSFB_H */
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
deleted file mode 100644
index 7a6677c1a715..000000000000
--- a/arch/x86/include/asm/tce.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is derived from asm-powerpc/tce.h.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * Author: Jon Mason <jdmason@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _ASM_X86_TCE_H
-#define _ASM_X86_TCE_H
-
-extern unsigned int specified_table_size;
-struct iommu_table;
-
-#define TCE_ENTRY_SIZE 8 /* in bytes */
-
-#define TCE_READ_SHIFT 0
-#define TCE_WRITE_SHIFT 1
-#define TCE_HUBID_SHIFT 2 /* unused */
-#define TCE_RSVD_SHIFT 8 /* unused */
-#define TCE_RPN_SHIFT 12
-#define TCE_UNUSED_SHIFT 48 /* unused */
-
-#define TCE_RPN_MASK 0x0000fffffffff000ULL
-
-extern void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction);
-extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
-extern void * __init alloc_tce_table(void);
-extern void __init free_tce_table(void *tbl);
-extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
-
-#endif /* _ASM_X86_TCE_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644
index 000000000000..6b338d7f01b7
--- /dev/null
+++ b/arch/x86/include/asm/tdx.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <linux/mmzone.h>
+
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/trapnr.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR _BITUL(63)
+#define TDX_NON_RECOVERABLE _BITUL(62)
+#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP)
+#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD)
+
+/*
+ * TDX module SEAMCALL leaf function error codes
+ */
+#define TDX_SUCCESS 0ULL
+#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
+
+#ifndef __ASSEMBLER__
+
+#include <uapi/asm/mce.h>
+#include <asm/tdx_global_metadata.h>
+#include <linux/pgtable.h>
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+ u64 exit_reason;
+ u64 exit_qual;
+ /* Guest Linear (virtual) Address */
+ u64 gla;
+ /* Guest Physical Address */
+ u64 gpa;
+ u32 instr_len;
+ u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
+
+int tdx_mcall_extend_rtmr(u8 index, u8 *data);
+
+u64 tdx_hcall_get_quote(u8 *buf, size_t size);
+
+void __init tdx_dump_attributes(u64 td_attr);
+void __init tdx_dump_td_ctls(u64 td_ctls);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+ unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+
+#ifdef CONFIG_INTEL_TDX_HOST
+u64 __seamcall(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
+void tdx_init(void);
+
+#include <linux/preempt.h>
+#include <asm/archrandom.h>
+#include <asm/processor.h>
+
+typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
+
+static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * SEAMCALLs are made to the TDX module and can generate dirty
+ * cachelines of TDX private memory. Mark cache state incoherent
+ * so that the cache can be flushed during kexec.
+ *
+ * This needs to be done before actually making the SEAMCALL,
+ * because kexec-ing CPU could send NMI to stop remote CPUs,
+ * in which case even disabling IRQ won't help here.
+ */
+ this_cpu_write(cache_state_incoherent, true);
+
+ return func(fn, args);
+}
+
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ int retry = RDRAND_RETRY_LOOPS;
+ u64 ret;
+
+ do {
+ preempt_disable();
+ ret = __seamcall_dirty_cache(func, fn, args);
+ preempt_enable();
+ } while (ret == TDX_RND_NO_ENTROPY && --retry);
+
+ return ret;
+}
+
+#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
+#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
+#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
+int tdx_cpu_enable(void);
+int tdx_enable(void);
+const char *tdx_dump_mce_info(struct mce *m);
+const struct tdx_sys_info *tdx_get_sysinfo(void);
+
+int tdx_guest_keyid_alloc(void);
+u32 tdx_get_nr_guest_keyids(void);
+void tdx_guest_keyid_free(unsigned int keyid);
+
+void tdx_quirk_reset_page(struct page *page);
+
+struct tdx_td {
+ /* TD root structure: */
+ struct page *tdr_page;
+
+ int tdcs_nr_pages;
+ /* TD control structure: */
+ struct page **tdcs_pages;
+
+ /* Size of `tdcx_pages` in struct tdx_vp */
+ int tdcx_nr_pages;
+};
+
+struct tdx_vp {
+ /* TDVP root page */
+ struct page *tdvpr_page;
+ /* precalculated page_to_phys(tdvpr_page) for use in noinstr code */
+ phys_addr_t tdvpr_pa;
+
+ /* TD vCPU control structure: */
+ struct page **tdcx_pages;
+};
+
+static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
+{
+ u64 ret;
+
+ ret = page_to_phys(page);
+ /* KeyID bits are just above the physical address bits: */
+ ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
+
+ return ret;
+}
+
+static inline int pg_level_to_tdx_sept_level(enum pg_level level)
+{
+ WARN_ON_ONCE(level == PG_LEVEL_NONE);
+ return level - 1;
+}
+
+u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
+u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page);
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mng_key_config(struct tdx_td *td);
+u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
+u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp);
+u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data);
+u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mr_finalize(struct tdx_td *td);
+u64 tdh_vp_flush(struct tdx_vp *vp);
+u64 tdh_mng_vpflushdone(struct tdx_td *td);
+u64 tdh_mng_key_freeid(struct tdx_td *td);
+u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err);
+u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid);
+u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data);
+u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask);
+u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size);
+u64 tdh_mem_track(struct tdx_td *tdr);
+u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_phymem_cache_wb(bool resume);
+u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
+#else
+static inline void tdx_init(void) { }
+static inline int tdx_cpu_enable(void) { return -ENODEV; }
+static inline int tdx_enable(void) { return -ENODEV; }
+static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
+static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
+static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
+#endif /* CONFIG_INTEL_TDX_HOST */
+
+#ifdef CONFIG_KEXEC_CORE
+void tdx_cpu_flush_cache_for_kexec(void);
+#else
+static inline void tdx_cpu_flush_cache_for_kexec(void) { }
+#endif
+
+#endif /* !__ASSEMBLER__ */
+#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h
new file mode 100644
index 000000000000..060a2ad744bf
--- /dev/null
+++ b/arch/x86/include/asm/tdx_global_metadata.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Automatically generated TDX global metadata structures. */
+#ifndef _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H
+#define _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H
+
+#include <linux/types.h>
+
+struct tdx_sys_info_features {
+ u64 tdx_features0;
+};
+
+struct tdx_sys_info_tdmr {
+ u16 max_tdmrs;
+ u16 max_reserved_per_tdmr;
+ u16 pamt_4k_entry_size;
+ u16 pamt_2m_entry_size;
+ u16 pamt_1g_entry_size;
+};
+
+struct tdx_sys_info_td_ctrl {
+ u16 tdr_base_size;
+ u16 tdcs_base_size;
+ u16 tdvps_base_size;
+};
+
+struct tdx_sys_info_td_conf {
+ u64 attributes_fixed0;
+ u64 attributes_fixed1;
+ u64 xfam_fixed0;
+ u64 xfam_fixed1;
+ u16 num_cpuid_config;
+ u16 max_vcpus_per_td;
+ u64 cpuid_config_leaves[128];
+ u64 cpuid_config_values[128][2];
+};
+
+struct tdx_sys_info {
+ struct tdx_sys_info_features features;
+ struct tdx_sys_info_tdmr tdmr;
+ struct tdx_sys_info_td_ctrl td_ctrl;
+ struct tdx_sys_info_td_conf td_conf;
+};
+
+#endif
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..f2d142a0a862 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -6,19 +6,16 @@
#include <linux/stddef.h>
#include <asm/ptrace.h>
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define TEXT_POKE_MAX_OPCODE_SIZE 5
+
+extern void text_poke_early(void *addr, const void *opcode, size_t len);
+
+extern void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
@@ -31,12 +28,191 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
* no thread can be preempted in the instructions being modified (no iret to an
* invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
+ * On the local CPU you need to be protected against NMI or MCE handlers seeing
+ * an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void smp_text_poke_sync_each_cpu(void);
+extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+#define text_poke_copy text_poke_copy
+extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
+extern void *text_poke_set(void *addr, int c, size_t len);
+extern int smp_text_poke_int3_handler(struct pt_regs *regs);
+extern void smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate);
+
+extern void smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void smp_text_poke_batch_finish(void);
+
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define RET_INSN_SIZE 1
+#define RET_INSN_OPCODE 0xC3
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
+
+#define DISP32_SIZE 4
+
+static __always_inline int text_opcode_size(u8 opcode)
+{
+ int size = 0;
+
+#define __CASE(insn) \
+ case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
+
+ switch(opcode) {
+ __CASE(INT3);
+ __CASE(RET);
+ __CASE(CALL);
+ __CASE(JMP32);
+ __CASE(JMP8);
+ }
+
+#undef __CASE
+
+ return size;
+}
+
+union text_poke_insn {
+ u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
+ struct {
+ u8 opcode;
+ s32 disp;
+ } __attribute__((packed));
+};
+
+static __always_inline
+void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size)
+{
+ union text_poke_insn *insn = buf;
+
+ BUG_ON(size < text_opcode_size(opcode));
+
+ /*
+ * Hide the addresses to avoid the compiler folding in constants when
+ * referencing code, these can mess up annotations like
+ * ANNOTATE_NOENDBR.
+ */
+ OPTIMIZER_HIDE_VAR(insn);
+ OPTIMIZER_HIDE_VAR(addr);
+ OPTIMIZER_HIDE_VAR(dest);
+
+ insn->opcode = opcode;
+
+ if (size > 1) {
+ insn->disp = (long)dest - (long)(addr + size);
+ if (size == 2) {
+ /*
+ * Ensure that for JMP8 the displacement
+ * actually fits the signed byte.
+ */
+ BUG_ON((insn->disp >> 31) != (insn->disp >> 7));
+ }
+ }
+}
+
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+ static union text_poke_insn insn; /* per instance */
+ __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode));
+ return &insn.text;
+}
+
extern int after_bootmem;
+extern __ro_after_init struct mm_struct *text_poke_mm;
+extern __ro_after_init unsigned long text_poke_mm_addr;
+
+#ifndef CONFIG_UML_X86
+static __always_inline
+void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+ regs->ip = ip;
+}
+
+static __always_inline
+void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+ /*
+ * The INT3 handler in entry_64.S adds a gap between the
+ * stack where the break point happened, and the saving of
+ * pt_regs. We can extend the original stack because of
+ * this gap. See the idtentry macro's X86_TRAP_BP logic.
+ *
+ * Similarly, entry_32.S will have a gap on the stack for
+ * (any) hardware exception and pt_regs; see the
+ * FIXUP_FRAME macro.
+ */
+ regs->sp -= sizeof(unsigned long);
+ *(unsigned long *)regs->sp = val;
+}
+
+static __always_inline
+unsigned long int3_emulate_pop(struct pt_regs *regs)
+{
+ unsigned long val = *(unsigned long *)regs->sp;
+ regs->sp += sizeof(unsigned long);
+ return val;
+}
+
+static __always_inline
+void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+ int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+ int3_emulate_jmp(regs, func);
+}
+
+static __always_inline
+void int3_emulate_ret(struct pt_regs *regs)
+{
+ unsigned long ip = int3_emulate_pop(regs);
+ int3_emulate_jmp(regs, ip);
+}
+
+static __always_inline
+bool __emulate_cc(unsigned long flags, u8 cc)
+{
+ static const unsigned long cc_mask[6] = {
+ [0] = X86_EFLAGS_OF,
+ [1] = X86_EFLAGS_CF,
+ [2] = X86_EFLAGS_ZF,
+ [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
+ [4] = X86_EFLAGS_SF,
+ [5] = X86_EFLAGS_PF,
+ };
+
+ bool invert = cc & 1;
+ bool match;
+
+ if (cc < 0xc) {
+ match = flags & cc_mask[cc >> 1];
+ } else {
+ match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ if (cc >= 0xe)
+ match = match || (flags & X86_EFLAGS_ZF);
+ }
+
+ return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+ if (__emulate_cc(regs->flags, cc))
+ ip += disp;
+
+ int3_emulate_jmp(regs, ip);
+}
+
+#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
new file mode 100644
index 000000000000..91a7b6687c3b
--- /dev/null
+++ b/arch/x86/include/asm/thermal.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_THERMAL_H
+#define _ASM_X86_THERMAL_H
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+void therm_lvt_init(void);
+void intel_init_thermal(struct cpuinfo_x86 *c);
+bool x86_thermal_enabled(void);
+void intel_thermal_interrupt(void);
+#else
+static inline void therm_lvt_init(void) { }
+static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+#endif
+
+#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0eccbcb8447..e71e0e8362ed 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,9 @@
* In vm86 mode, the hardware frame is much longer still, so add 16
* bytes to make room for the real-mode segments.
*
- * x86_64 has a fixed-length stack frame.
+ * x86-64 has a fixed-length stack frame, but it depends on whether
+ * or not FRED is enabled. Future versions of FRED might make this
+ * dynamic, but for now it is always 2 words longer.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_VM86
@@ -39,8 +41,12 @@
# else
# define TOP_OF_KERNEL_STACK_PADDING 8
# endif
-#else
-# define TOP_OF_KERNEL_STACK_PADDING 0
+#else /* x86-64 */
+# ifdef CONFIG_X86_FRED
+# define TOP_OF_KERNEL_STACK_PADDING (2 * 8)
+# else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+# endif
#endif
/*
@@ -48,14 +54,18 @@
* - this struct should fit entirely inside of one cache line
* - this struct shares the supervisor stack pages
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
#include <asm/cpufeature.h>
#include <linux/atomic.h>
struct thread_info {
unsigned long flags; /* low level flags */
+ unsigned long syscall_work; /* SYSCALL_WORK_ flags */
u32 status; /* thread synchronous flags */
+#ifdef CONFIG_SMP
+ u32 cpu; /* current CPU */
+#endif
};
#define INIT_THREAD_INFO(tsk) \
@@ -63,86 +73,53 @@ struct thread_info {
.flags = 0, \
}
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#include <asm/asm-offsets.h>
#endif
/*
- * thread information flags
- * - these are process state flags that various assembly files
- * may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
*/
-#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Speculative store bypass disable */
-#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
-#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
-#define TIF_SECCOMP 8 /* secure computing */
-#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
-#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
-#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING 13 /* pending live patching update */
-#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_IA32 17 /* IA32 compatibility process */
-#define TIF_NOHZ 19 /* in adaptive nohz mode */
-#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD 16 /* Speculative store bypass disable */
+#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */
+#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */
+#define TIF_NOTSC 21 /* TSC is not accessible in userland */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
-#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
-#define TIF_X32 30 /* 32-bit native x86-64 binary */
-#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
-
-#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SSBD (1 << TIF_SSBD)
-#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
-#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP (1 << TIF_SECCOMP)
-#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
-#define _TIF_NOCPUID (1 << TIF_NOCPUID)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_IA32 (1 << TIF_IA32)
-#define _TIF_NOHZ (1 << TIF_NOHZ)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
-#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_ADDR32 (1 << TIF_ADDR32)
-#define _TIF_X32 (1 << TIF_X32)
-#define _TIF_FSCHECK (1 << TIF_FSCHECK)
-
-/*
- * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
- * enter_from_user_mode()
- */
-#define _TIF_WORK_SYSCALL_ENTRY \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
+
+#define _TIF_SSBD BIT(TIF_SSBD)
+#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID BIT(TIF_NOCPUID)
+#define _TIF_NOTSC BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
@@ -154,8 +131,14 @@ struct thread_info {
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+ _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
@@ -164,7 +147,7 @@ struct thread_info {
*
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Walks up the stack frames to make sure that the specified object is
@@ -174,7 +157,12 @@ struct thread_info {
* GOOD_FRAME if within a frame
* BAD_STACK if placed across a frame boundary (or outside stack)
* NOT_STACK unable to determine (no frame pointers, etc)
+ *
+ * This function reads pointers from the stack and dereferences them. The
+ * pointers may not have their KMSAN shadow set up properly, which may result
+ * in false positive reports. Disable instrumentation to avoid those.
*/
+__no_kmsan_checks
static inline int arch_within_stack_frames(const void * const stack,
const void * const stackend,
const void *obj, unsigned long len)
@@ -211,18 +199,25 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
}
-#else /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
-#endif
-
-#endif
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+#ifndef __ASSEMBLER__
#ifdef CONFIG_COMPAT
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+
+#define arch_set_restart_data(restart) \
+ do { restart->arch_data = current_thread_info()->status; } while (0)
+
#endif
-#ifndef __ASSEMBLY__
#ifdef CONFIG_X86_32
#define in_ia32_syscall() true
@@ -231,20 +226,8 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
-/*
- * Force syscall return via IRET by making it look as if there was
- * some work pending. IRET is our most capable (but slowest) syscall
- * return path, which is able to restore modified SS, CS and certain
- * EFLAGS values that other (fast) syscall return instructions
- * are not able to restore properly.
- */
-#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
-
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..f360104ed172 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -6,7 +6,8 @@
#include <asm/mc146818rtc.h>
extern void hpet_time_init(void);
-extern void time_init(void);
+extern bool pit_timer_init(void);
+extern bool tsc_clocksource_watchdog_disabled(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 7365dd4acffb..23baf8c9b34c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -6,8 +6,6 @@
#include <linux/interrupt.h>
#include <linux/math64.h>
-#define TICK_SIZE (tick_nsec / 1000)
-
unsigned long long native_sched_clock(void);
extern void recalibrate_cpu_khz(void);
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index a4a8b1b16c0c..956e4145311b 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -5,6 +5,15 @@
#include <asm/processor.h>
#include <asm/tsc.h>
+static inline unsigned long random_get_entropy(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
+ return random_get_entropy_fallback();
+ return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE PIT_TICK_RATE
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f5..866ea78ba156 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,13 +2,13 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
+#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -23,18 +23,144 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables);
}
+static inline void invlpg(unsigned long addr)
+{
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
/*
- * While x86 architecture in general requires an IPI to perform TLB
- * shootdown, enablement code for several hypervisors overrides
- * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
- * a hypercall. To keep software pagetable walkers safe in this case we
- * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment
- * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h
- * for more details.
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond the
+ * first page, while __invlpgb gets the more human readable number of pages to
+ * invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * Since it is desired to only flush TLB entries for the ASID that is executing
+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should
+ * always be set. On a host/hypervisor, the hardware will use the ASID value
+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will
+ * use the actual ASID value of the guest.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
*/
-static inline void __tlb_remove_table(void *table)
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride stride, u8 flags)
{
- free_page_and_swap_cache(table);
+ u64 rax = addr | flags | INVLPGB_FLAG_ASID;
+ u32 ecx = (stride << 31) | (nr_pages - 1);
+ u32 edx = (pcid << 16) | asid;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
}
+static inline void __tlbsync(void)
+{
+ /*
+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+ * to have completed. Print a warning if the task has been migrated,
+ * and might not be waiting on all the INVLPGBs issued during this TLB
+ * invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool stride)
+{
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ /*
+ * TLBSYNC at the end needs to make sure all flushes done on the
+ * current CPU have been executed system-wide. Therefore, make
+ * sure nothing gets migrated in-between but disable preemption
+ * as it is cheaper.
+ */
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+ __tlbsync();
+}
#endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
index 1ad56eb3e8a8..80aaf64ff25f 100644
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch {
* the PFNs being flushed..
*/
struct cpumask cpumask;
+ /*
+ * Set if pages were unmapped from any MM, even one that does not
+ * have active CPUs in its cpumask.
+ */
+ bool unmapped_pages;
};
#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fc..00daedfefc1b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
#ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H
-#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
@@ -12,141 +14,56 @@
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
-/*
- * The x86 feature is called PCID (Process Context IDentifier). It is similar
- * to what is traditionally called ASID on the RISC processors.
- *
- * We don't use the traditional ASID implementation, where each process/mm gets
- * its own ASID and flush/restart when we run out of ASID space.
- *
- * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
- * that came by on this CPU, allowing cheaper switch_mm between processes on
- * this CPU.
- *
- * We end up with different spaces for different things. To avoid confusion we
- * use different names for each of them:
- *
- * ASID - [0, TLB_NR_DYN_ASIDS-1]
- * the canonical identifier for an mm
- *
- * kPCID - [1, TLB_NR_DYN_ASIDS]
- * the value we write into the PCID part of CR3; corresponds to the
- * ASID+1, because PCID 0 is special.
- *
- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
- * for KPTI each mm has two address spaces and thus needs two
- * PCID values, but we can still do with a single ASID denomination
- * for each mm. Corresponds to kPCID + 2048.
- *
- */
+DECLARE_PER_CPU(u64, tlbstate_untag_mask);
-/* There are 12 bits of space for ASIDS in CR3 */
-#define CR3_HW_ASID_BITS 12
+void __flush_tlb_all(void);
-/*
- * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
- * user/kernel switches
- */
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define PTI_CONSUMED_PCID_BITS 1
-#else
-# define PTI_CONSUMED_PCID_BITS 0
-#endif
+#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
-#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+void cr4_update_irqsoff(unsigned long set, unsigned long clear);
+unsigned long cr4_read_shadow(void);
-/*
- * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
- * for them being zero-based. Another -1 is because PCID 0 is reserved for
- * use by non-PCID-aware users.
- */
-#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in two cache
- * lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
-/*
- * Given @asid, compute kPCID
- */
-static inline u16 kern_pcid(u16 asid)
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits_irqsoff(unsigned long mask)
{
- VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- /*
- * Make sure that the dynamic ASID space does not confict with the
- * bit we are using to switch between user and kernel ASIDs.
- */
- BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
-
- /*
- * The ASID being passed in here should have respected the
- * MAX_ASID_AVAILABLE and thus never have the switch bit set.
- */
- VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
-#endif
- /*
- * The dynamically-assigned ASIDs that get passed in are small
- * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
- * so do not bother to clear it.
- *
- * If PCID is on, ASID-aware code paths put the ASID+1 into the
- * PCID bits. This serves two purposes. It prevents a nasty
- * situation in which PCID-unaware code saves CR3, loads some other
- * value (with PCID == 0), and then restores CR3, thus corrupting
- * the TLB for ASID 0 if the saved ASID was nonzero. It also means
- * that any bugs involving loading a PCID-enabled CR3 with
- * CR4.PCIDE off will trigger deterministically.
- */
- return asid + 1;
+ cr4_update_irqsoff(mask, 0);
}
-/*
- * Given @asid, compute uPCID
- */
-static inline u16 user_pcid(u16 asid)
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits_irqsoff(unsigned long mask)
{
- u16 ret = kern_pcid(asid);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
-#endif
- return ret;
+ cr4_update_irqsoff(0, mask);
}
-struct pgd_t;
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
{
- if (static_cpu_has(X86_FEATURE_PCID)) {
- return __sme_pa(pgd) | kern_pcid(asid);
- } else {
- VM_WARN_ON_ONCE(asid != 0);
- return __sme_pa(pgd);
- }
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_set_bits_irqsoff(mask);
+ local_irq_restore(flags);
}
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
{
- VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
- /*
- * Use boot_cpu_has() instead of this_cpu_has() as this function
- * might be called during early boot. This should work even after
- * boot because all CPU's the have same capabilities:
- */
- VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
- return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_clear_bits_irqsoff(mask);
+ local_irq_restore(flags);
}
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
-#endif
+#ifndef MODULE
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
struct tlb_context {
u64 ctx_id;
@@ -167,35 +84,18 @@ struct tlb_state {
*/
struct mm_struct *loaded_mm;
-#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
- unsigned long last_user_mm_ibpb;
+ unsigned long last_user_mm_spec;
};
u16 loaded_mm_asid;
u16 next_asid;
/*
- * We can be in one of several states:
- *
- * - Actively using an mm. Our CPU's bit will be set in
- * mm_cpumask(loaded_mm) and is_lazy == false;
- *
- * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
- * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
- *
- * - Lazily using a real mm. loaded_mm != &init_mm, our bit
- * is set in mm_cpumask(loaded_mm), but is_lazy == true.
- * We're heuristically guessing that the CR3 load we
- * skipped more than makes up for the overhead added by
- * lazy mode.
- */
- bool is_lazy;
-
- /*
* If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
* This tells us to go invalidate all the non-loaded ctxs[]
@@ -206,6 +106,16 @@ struct tlb_state {
*/
bool invalidate_other;
+#ifdef CONFIG_ADDRESS_MASKING
+ /*
+ * Active LAM mode.
+ *
+ * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
+ * disabled.
+ */
+ u8 lam;
+#endif
+
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
@@ -240,39 +150,30 @@ struct tlb_state {
*/
struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
};
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
-
-/*
- * Blindly accessing user memory from NMI context can be dangerous
- * if we're in the middle of switching the current user task or
- * switching the loaded mm. It can also be dangerous if we
- * interrupted some kernel code that was temporarily using a
- * different mm.
- */
-static inline bool nmi_uaccess_okay(void)
-{
- struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
- struct mm_struct *current_mm = current->mm;
-
- VM_WARN_ON_ONCE(!loaded_mm);
+DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
+struct tlb_state_shared {
/*
- * The condition we want to check is
- * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
- * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
- * is supposed to be reasonably fast.
+ * We can be in one of several states:
+ *
+ * - Actively using an mm. Our CPU's bit will be set in
+ * mm_cpumask(loaded_mm) and is_lazy == false;
*
- * Instead, we check the almost equivalent but somewhat conservative
- * condition below, and we rely on the fact that switch_mm_irqs_off()
- * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+ *
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
+ * We're heuristically guessing that the CR3 load we
+ * skipped more than makes up for the overhead added by
+ * lazy mode.
*/
- if (loaded_mm != current_mm)
- return false;
+ bool is_lazy;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
- VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
-
- return true;
-}
+bool nmi_uaccess_okay(void);
+#define nmi_uaccess_okay nmi_uaccess_okay
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
@@ -280,234 +181,15 @@ static inline void cr4_init_shadow(void)
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
-static inline void __cr4_set(unsigned long cr4)
-{
- lockdep_assert_irqs_disabled();
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
-}
-
-/* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
-{
- unsigned long cr4, flags;
-
- local_irq_save(flags);
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 | mask) != cr4)
- __cr4_set(cr4 | mask);
- local_irq_restore(flags);
-}
-
-/* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
-{
- unsigned long cr4, flags;
-
- local_irq_save(flags);
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 & ~mask) != cr4)
- __cr4_set(cr4 & ~mask);
- local_irq_restore(flags);
-}
-
-static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
-{
- unsigned long cr4;
-
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- __cr4_set(cr4 ^ mask);
-}
-
-/* Read the CR4 shadow. */
-static inline unsigned long cr4_read_shadow(void)
-{
- return this_cpu_read(cpu_tlbstate.cr4);
-}
-
-/*
- * Mark all other ASIDs as invalid, preserves the current.
- */
-static inline void invalidate_other_asid(void)
-{
- this_cpu_write(cpu_tlbstate.invalidate_other, true);
-}
-
-/*
- * Save some of cr4 feature set we're using (e.g. Pentium 4MB
- * enable and PPro Global page enable), so that any CPU's that boot
- * up after us can get the correct flags. This should only be used
- * during boot on the boot cpu.
- */
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;
-static inline void cr4_set_bits_and_update_boot(unsigned long mask)
-{
- mmu_cr4_features |= mask;
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- cr4_set_bits(mask);
-}
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
extern void initialize_tlbstate_and_flush(void);
/*
- * Given an ASID, flush the corresponding user ASID. We can delay this
- * until the next time we switch to it.
- *
- * See SWITCH_TO_USER_CR3.
- */
-static inline void invalidate_user_asid(u16 asid)
-{
- /* There is no user ASID if address space separation is off */
- if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
- return;
-
- /*
- * We only have a single ASID if PCID is off and the CR3
- * write will have flushed it.
- */
- if (!cpu_feature_enabled(X86_FEATURE_PCID))
- return;
-
- if (!static_cpu_has(X86_FEATURE_PTI))
- return;
-
- __set_bit(kern_pcid(asid),
- (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
-}
-
-/*
- * flush the entire current user mapping
- */
-static inline void __native_flush_tlb(void)
-{
- /*
- * Preemption or interrupts must be disabled to protect the access
- * to the per CPU variable and to prevent being preempted between
- * read_cr3() and write_cr3().
- */
- WARN_ON_ONCE(preemptible());
-
- invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-
- /* If current->mm == NULL then the read_cr3() "borrows" an mm */
- native_write_cr3(__native_read_cr3());
-}
-
-/*
- * flush everything
- */
-static inline void __native_flush_tlb_global(void)
-{
- unsigned long cr4, flags;
-
- if (static_cpu_has(X86_FEATURE_INVPCID)) {
- /*
- * Using INVPCID is considerably faster than a pair of writes
- * to CR4 sandwiched inside an IRQ flag save/restore.
- *
- * Note, this works with CR4.PCIDE=0 or 1.
- */
- invpcid_flush_all();
- return;
- }
-
- /*
- * Read-modify-write to CR4 - protect it from preemption and
- * from interrupts. (Use the raw variant because this code can
- * be called from deep inside debugging code.)
- */
- raw_local_irq_save(flags);
-
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* toggle PGE */
- native_write_cr4(cr4 ^ X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
-
- raw_local_irq_restore(flags);
-}
-
-/*
- * flush one page in the user mapping
- */
-static inline void __native_flush_tlb_one_user(unsigned long addr)
-{
- u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-
- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
-
- if (!static_cpu_has(X86_FEATURE_PTI))
- return;
-
- /*
- * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
- * Just use invalidate_user_asid() in case we are called early.
- */
- if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
- invalidate_user_asid(loaded_mm_asid);
- else
- invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
-}
-
-/*
- * flush everything
- */
-static inline void __flush_tlb_all(void)
-{
- /*
- * This is to catch users with enabled preemption and the PGE feature
- * and don't trigger the warning in __native_flush_tlb().
- */
- VM_WARN_ON_ONCE(preemptible());
-
- if (boot_cpu_has(X86_FEATURE_PGE)) {
- __flush_tlb_global();
- } else {
- /*
- * !PGE -> !PCID (setup_pcid()), thus every flush is total.
- */
- __flush_tlb();
- }
-}
-
-/*
- * flush one page in the kernel mapping
- */
-static inline void __flush_tlb_one_kernel(unsigned long addr)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-
- /*
- * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
- * paravirt equivalent. Even with PCID, this is sufficient: we only
- * use PCID if we also use global PTEs for the kernel mapping, and
- * INVLPG flushes global translations across all address spaces.
- *
- * If PTI is on, then the kernel is mapped with non-global PTEs, and
- * __flush_tlb_one_user() will flush the given address for the current
- * kernel address space and for its usermode counterpart, but it does
- * not flush it for other address spaces.
- */
- __flush_tlb_one_user(addr);
-
- if (!static_cpu_has(X86_FEATURE_PTI))
- return;
-
- /*
- * See above. We need to propagate the flush to all other address
- * spaces. In principle, we only need to propagate it to kernelmode
- * address spaces, but the extra bookkeeping we would need is not
- * worth it.
- */
- invalidate_other_asid();
-}
-
-#define TLB_FLUSH_ALL -1UL
-
-/*
* TLB flushing:
*
* - flush_tlb_all() flushes all processes TLBs
@@ -515,7 +197,7 @@ static inline void __flush_tlb_one_kernel(unsigned long addr)
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+ * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
@@ -541,11 +223,86 @@ struct flush_tlb_info {
unsigned long start;
unsigned long end;
u64 new_tlb_gen;
- unsigned int stride_shift;
- bool freed_tables;
+ unsigned int initiating_cpu;
+ u8 stride_shift;
+ u8 freed_tables;
+ u8 trim_cpumask;
};
-#define local_flush_tlb() __flush_tlb()
+void flush_tlb_local(void);
+void flush_tlb_one_user(unsigned long addr);
+void flush_tlb_one_kernel(unsigned long addr);
+void flush_tlb_multi(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info);
+
+static inline bool is_dyn_asid(u16 asid)
+{
+ return asid < TLB_NR_DYN_ASIDS;
+}
+
+static inline bool is_global_asid(u16 asid)
+{
+ return !is_dyn_asid(asid);
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+ u16 asid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return 0;
+
+ asid = smp_load_acquire(&mm->context.global_asid);
+
+ /* mm->context.global_asid is either 0, or a global ASID */
+ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+ return asid;
+}
+
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ mm->context.global_asid = 0;
+ mm->context.asid_transition = false;
+ }
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+ /*
+ * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+ * finish_asid_transition() needs to observe asid_transition = true
+ * once it observes global_asid.
+ */
+ mm->context.asid_transition = true;
+ smp_store_release(&mm->context.global_asid, asid);
+}
+
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+ WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ return mm && READ_ONCE(mm->context.asid_transition);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
#define flush_tlb_mm(mm) \
flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
@@ -554,7 +311,7 @@ struct flush_tlb_info {
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
- : PAGE_SHIFT, false)
+ : PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -567,8 +324,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
}
-void native_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info);
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ bool should_defer = false;
+
+ /* If remote CPUs need to be flushed then defer batch the flush */
+ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+ should_defer = true;
+ put_cpu();
+
+ return should_defer;
+}
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
@@ -581,21 +347,145 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm)
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm, unsigned long start, unsigned long end)
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ batch->unmapped_pages = true;
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, info) \
- native_flush_tlb_others(mask, info)
+static inline bool pte_flags_need_flush(unsigned long oldflags,
+ unsigned long newflags,
+ bool ignore_access)
+{
+ /*
+ * Flags that require a flush when cleared but not when they are set.
+ * Only include flags that would not trigger spurious page-faults.
+ * Non-present entries are not cached. Hardware would set the
+ * dirty/access bit if needed without a fault.
+ */
+ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
+ _PAGE_ACCESSED;
+ const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
+ _PAGE_SOFTW3 | _PAGE_SOFTW4 |
+ _PAGE_SAVED_DIRTY;
+ const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
+ _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
+ _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
+ _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
+ unsigned long diff = oldflags ^ newflags;
+
+ BUILD_BUG_ON(flush_on_clear & software_flags);
+ BUILD_BUG_ON(flush_on_clear & flush_on_change);
+ BUILD_BUG_ON(flush_on_change & software_flags);
+
+ /* Ignore software flags */
+ diff &= ~software_flags;
+
+ if (ignore_access)
+ diff &= ~_PAGE_ACCESSED;
+
+ /*
+ * Did any of the 'flush_on_clear' flags was clleared set from between
+ * 'oldflags' and 'newflags'?
+ */
+ if (diff & oldflags & flush_on_clear)
+ return true;
+
+ /* Flush on modified flags. */
+ if (diff & flush_on_change)
+ return true;
+
+ /* Ensure there are no flags that were left behind */
+ if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+ (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
+ VM_WARN_ON_ONCE(1);
+ return true;
+ }
-#define paravirt_tlb_remove_table(tlb, page) \
- tlb_remove_page(tlb, (void *)(page))
+ return false;
+}
+
+/*
+ * pte_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace PTEs.
+ */
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pte_flags(oldpte) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pte_pfn(oldpte) != pte_pfn(newpte))
+ return true;
+
+ /*
+ * check PTE flags; ignore access-bit; see comment in
+ * ptep_clear_flush_young().
+ */
+ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
+ true);
+}
+#define pte_needs_flush pte_needs_flush
+
+/*
+ * huge_pmd_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace huge PMDs.
+ */
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
+ return true;
+
+ /*
+ * check PMD flags; do not ignore access-bit; see
+ * pmdp_clear_flush_young().
+ */
+ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
+ false);
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
+#ifdef CONFIG_ADDRESS_MASKING
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ u64 lam = this_cpu_read(cpu_tlbstate.lam);
+
+ return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+ this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+ this_cpu_write(tlbstate_untag_mask, untag_mask);
+}
+
+#else
+
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ return 0;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+}
#endif
+#endif /* !MODULE */
+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..1fadf0cf520c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -31,9 +31,9 @@
* CONFIG_NUMA.
*/
#include <linux/numa.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
#include <asm/mpspec.h>
#include <asm/percpu.h>
@@ -102,18 +102,105 @@ static inline void setup_node_to_cpumask_map(void) { }
#include <asm-generic/topology.h>
+/* Topology information */
+enum x86_topology_domains {
+ TOPO_SMT_DOMAIN,
+ TOPO_CORE_DOMAIN,
+ TOPO_MODULE_DOMAIN,
+ TOPO_TILE_DOMAIN,
+ TOPO_DIE_DOMAIN,
+ TOPO_DIEGRP_DOMAIN,
+ TOPO_PKG_DOMAIN,
+ TOPO_MAX_DOMAIN,
+};
+
+enum x86_topology_cpu_type {
+ TOPO_CPU_TYPE_PERFORMANCE,
+ TOPO_CPU_TYPE_EFFICIENCY,
+ TOPO_CPU_TYPE_UNKNOWN,
+};
+
+struct x86_topology_system {
+ unsigned int dom_shifts[TOPO_MAX_DOMAIN];
+ unsigned int dom_size[TOPO_MAX_DOMAIN];
+};
+
+extern struct x86_topology_system x86_topo_system;
+
+static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom)
+{
+ return x86_topo_system.dom_size[dom];
+}
+
+static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom)
+{
+ return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1];
+}
+
extern const struct cpumask *cpu_coregroup_mask(int cpu);
+extern const struct cpumask *cpu_clustergroup_mask(int cpu);
+
+#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
+#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
+#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
+#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
+#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
+#define topology_ppin(cpu) (cpu_data(cpu).ppin)
-#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
-#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id)
+
+extern unsigned int __max_dies_per_package;
+extern unsigned int __max_logical_packages;
+extern unsigned int __max_threads_per_core;
+extern unsigned int __num_threads_per_package;
+extern unsigned int __num_cores_per_package;
+
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
+
+static inline unsigned int topology_max_packages(void)
+{
+ return __max_logical_packages;
+}
+
+static inline unsigned int topology_max_dies_per_package(void)
+{
+ return __max_dies_per_package;
+}
+
+static inline unsigned int topology_num_cores_per_package(void)
+{
+ return __num_cores_per_package;
+}
+
+static inline unsigned int topology_num_threads_per_package(void)
+{
+ return __num_threads_per_package;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
+#else
+static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_SMP
+#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
+#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
+#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
-extern unsigned int __max_logical_packages;
-#define topology_max_packages() (__max_logical_packages)
+
+static inline int topology_phys_to_logical_pkg(unsigned int pkg)
+{
+ return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN],
+ TOPO_PKG_DOMAIN);
+}
extern int __max_smt_threads;
@@ -122,19 +209,43 @@ static inline int topology_max_smt_threads(void)
return __max_smt_threads;
}
-int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_is_primary_thread(unsigned int cpu);
-bool topology_smt_supported(void);
-#else
-#define topology_max_packages() (1)
-static inline int
-topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+#include <linux/cpu_smt.h>
+
+extern unsigned int __amd_nodes_per_pkg;
+
+static inline unsigned int topology_amd_nodes_per_pkg(void)
+{
+ return __amd_nodes_per_pkg;
+}
+
+#else /* CONFIG_SMP */
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
-static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
-static inline bool topology_smt_supported(void) { return false; }
-#endif
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
+
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu: CPU to check
+ */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
+int topology_get_primary_thread(unsigned int cpu);
+
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+ int pcpu = topology_get_primary_thread(cpu);
+
+ return pcpu >= 0 ? cpu_online(pcpu) : false;
+}
+#define topology_is_core_online topology_is_core_online
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
@@ -150,7 +261,7 @@ extern bool x86_topology_update;
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
-extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+extern bool __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
@@ -163,7 +274,7 @@ void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_MC_PRIO */
-#define sysctl_sched_itmt_enabled 0
+#define sysctl_sched_itmt_enabled false
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
@@ -176,4 +287,44 @@ static inline void sched_clear_itmt_support(void)
}
#endif /* CONFIG_SCHED_MC_PRIO */
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
+#include <asm/cpufeature.h>
+
+DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key)
+
+DECLARE_PER_CPU(unsigned long, arch_freq_scale);
+
+static inline long arch_scale_freq_capacity(int cpu)
+{
+ return per_cpu(arch_freq_scale, cpu);
+}
+#define arch_scale_freq_capacity arch_scale_freq_capacity
+
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+ unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
+extern void arch_set_max_freq_ratio(bool turbo_disabled);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
+#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+ unsigned long max_cap,
+ unsigned long cap_freq,
+ unsigned long base_freq) { }
+
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
+#endif
+
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
+extern int arch_sched_node_distance(int from, int to);
+
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
deleted file mode 100644
index 57c8da027d99..000000000000
--- a/arch/x86/include/asm/trace/common.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_TRACE_COMMON_H
-#define _ASM_TRACE_COMMON_H
-
-#ifdef CONFIG_TRACING
-DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
-#define trace_pagefault_enabled() \
- static_branch_unlikely(&trace_pagefault_key)
-DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key);
-#define trace_resched_ipi_enabled() \
- static_branch_unlikely(&trace_resched_ipi_key)
-#else
-static inline bool trace_pagefault_enabled(void) { return false; }
-static inline bool trace_resched_ipi_enabled(void) { return false; }
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
deleted file mode 100644
index e0e6d7f21399..000000000000
--- a/arch/x86/include/asm/trace/exceptions.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM exceptions
-
-#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_PAGE_FAULT_H
-
-#include <linux/tracepoint.h>
-#include <asm/trace/common.h>
-
-extern int trace_pagefault_reg(void);
-extern void trace_pagefault_unreg(void);
-
-DECLARE_EVENT_CLASS(x86_exceptions,
-
- TP_PROTO(unsigned long address, struct pt_regs *regs,
- unsigned long error_code),
-
- TP_ARGS(address, regs, error_code),
-
- TP_STRUCT__entry(
- __field( unsigned long, address )
- __field( unsigned long, ip )
- __field( unsigned long, error_code )
- ),
-
- TP_fast_assign(
- __entry->address = address;
- __entry->ip = regs->ip;
- __entry->error_code = error_code;
- ),
-
- TP_printk("address=%pf ip=%pf error_code=0x%lx",
- (void *)__entry->address, (void *)__entry->ip,
- __entry->error_code) );
-
-#define DEFINE_PAGE_FAULT_EVENT(name) \
-DEFINE_EVENT_FN(x86_exceptions, name, \
- TP_PROTO(unsigned long address, struct pt_regs *regs, \
- unsigned long error_code), \
- TP_ARGS(address, regs, error_code), \
- trace_pagefault_reg, trace_pagefault_unreg);
-
-DEFINE_PAGE_FAULT_EVENT(page_fault_user);
-DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE exceptions
-#endif /* _TRACE_PAGE_FAULT_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 069c04be1507..721b408d9a67 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
TP_STRUCT__entry(
__field(struct fpu *, fpu)
- __field(bool, initialized)
+ __field(bool, load_fpu)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
TP_fast_assign(
__entry->fpu = fpu;
- __entry->initialized = fpu->initialized;
+ __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
- __entry->xfeatures = fpu->state.xsave.header.xfeatures;
- __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
+ __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures;
+ __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
- __entry->initialized,
+ __entry->load_fpu,
__entry->xfeatures,
__entry->xcomp_bv
)
@@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
-DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
@@ -64,26 +54,11 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
-DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..a8e5a7a2b460 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -8,7 +8,7 @@
#if IS_ENABLED(CONFIG_HYPERV)
-TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
TP_PROTO(const struct cpumask *cpus,
const struct flush_tlb_info *info),
TP_ARGS(cpus, info),
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector)
);
+TRACE_EVENT(hyperv_send_ipi_one,
+ TP_PROTO(int cpu,
+ int vector),
+ TP_ARGS(cpu, vector),
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->cpu = cpu;
+ __entry->vector = vector;
+ ),
+ TP_printk("cpu %d vector %x",
+ __entry->cpu, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 33b9d0f0aafe..7408bebdfde0 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,13 +6,9 @@
#define _TRACE_IRQ_VECTORS_H
#include <linux/tracepoint.h>
-#include <asm/trace/common.h>
#ifdef CONFIG_X86_LOCAL_APIC
-extern int trace_resched_ipi_reg(void);
-extern void trace_resched_ipi_unreg(void);
-
DECLARE_EVENT_CLASS(x86_irq_vector,
TP_PROTO(int vector),
@@ -37,18 +33,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
TP_PROTO(int vector), \
TP_ARGS(vector), NULL, NULL);
-#define DEFINE_RESCHED_IPI_EVENT(name) \
-DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
- TP_PROTO(int vector), \
- TP_ARGS(vector), \
- trace_resched_ipi_reg, \
- trace_resched_ipi_unreg); \
-DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
- TP_PROTO(int vector), \
- TP_ARGS(vector), \
- trace_resched_ipi_reg, \
- trace_resched_ipi_unreg);
-
/*
* local_timer - called when entering/exiting a local timer interrupt
* vector handler
@@ -99,7 +83,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
/*
* reschedule - called when entering/exiting a reschedule vector handler
*/
-DEFINE_RESCHED_IPI_EVENT(reschedule);
+DEFINE_IRQ_VECTOR_EVENT(reschedule);
/*
* call_function - called when entering/exiting a call function interrupt
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
deleted file mode 100644
index 54133017267c..000000000000
--- a/arch/x86/include/asm/trace/mpx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mpx
-
-#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MPX_H
-
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-TRACE_EVENT(mpx_bounds_register_exception,
-
- TP_PROTO(void __user *addr_referenced,
- const struct mpx_bndreg *bndreg),
- TP_ARGS(addr_referenced, bndreg),
-
- TP_STRUCT__entry(
- __field(void __user *, addr_referenced)
- __field(u64, lower_bound)
- __field(u64, upper_bound)
- ),
-
- TP_fast_assign(
- __entry->addr_referenced = addr_referenced;
- __entry->lower_bound = bndreg->lower_bound;
- __entry->upper_bound = bndreg->upper_bound;
- ),
- /*
- * Note that we are printing out the '~' of the upper
- * bounds register here. It is actually stored in its
- * one's complement form so that its 'init' state
- * corresponds to all 0's. But, that looks like
- * gibberish when printed out, so print out the 1's
- * complement instead of the actual value here. Note
- * though that you still need to specify filters for the
- * actual value, not the displayed one.
- */
- TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
- __entry->addr_referenced,
- __entry->lower_bound,
- ~__entry->upper_bound
- )
-);
-
-TRACE_EVENT(bounds_exception_mpx,
-
- TP_PROTO(const struct mpx_bndcsr *bndcsr),
- TP_ARGS(bndcsr),
-
- TP_STRUCT__entry(
- __field(u64, bndcfgu)
- __field(u64, bndstatus)
- ),
-
- TP_fast_assign(
- /* need to get rid of the 'const' on bndcsr */
- __entry->bndcfgu = (u64)bndcsr->bndcfgu;
- __entry->bndstatus = (u64)bndcsr->bndstatus;
- ),
-
- TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
- __entry->bndcfgu,
- __entry->bndstatus)
-);
-
-DECLARE_EVENT_CLASS(mpx_range_trace,
-
- TP_PROTO(unsigned long start,
- unsigned long end),
- TP_ARGS(start, end),
-
- TP_STRUCT__entry(
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
-
- TP_fast_assign(
- __entry->start = start;
- __entry->end = end;
- ),
-
- TP_printk("[0x%p:0x%p]",
- (void *)__entry->start,
- (void *)__entry->end
- )
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-TRACE_EVENT(mpx_new_bounds_table,
-
- TP_PROTO(unsigned long table_vaddr),
- TP_ARGS(table_vaddr),
-
- TP_STRUCT__entry(
- __field(unsigned long, table_vaddr)
- ),
-
- TP_fast_assign(
- __entry->table_vaddr = table_vaddr;
- ),
-
- TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
-);
-
-#else
-
-/*
- * This gets used outside of MPX-specific code, so we need a stub.
- */
-static inline
-void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
-{
-}
-
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH asm/trace/
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mpx
-#endif /* _TRACE_MPX_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
new file mode 100644
index 000000000000..a23a7b707b64
--- /dev/null
+++ b/arch/x86/include/asm/trap_pf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_TRAP_PF_H
+#define _ASM_X86_TRAP_PF_H
+
+#include <linux/bits.h>
+
+/*
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
+ * bit 5 == 1: protection keys block access
+ * bit 6 == 1: shadow stack access fault
+ * bit 15 == 1: SGX MMU page-fault
+ * bit 31 == 1: fault was due to RMP violation
+ */
+enum x86_pf_error_code {
+ X86_PF_PROT = BIT(0),
+ X86_PF_WRITE = BIT(1),
+ X86_PF_USER = BIT(2),
+ X86_PF_RSVD = BIT(3),
+ X86_PF_INSTR = BIT(4),
+ X86_PF_PK = BIT(5),
+ X86_PF_SHSTK = BIT(6),
+ X86_PF_SGX = BIT(15),
+ X86_PF_RMP = BIT(31),
+};
+
+#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
new file mode 100644
index 000000000000..8d1154cdf787
--- /dev/null
+++ b/arch/x86/include/asm/trapnr.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_TRAPNR_H
+#define _ASM_X86_TRAPNR_H
+
+/*
+ * Event type codes used by FRED, Intel VT-x and AMD SVM
+ */
+#define EVENT_TYPE_EXTINT 0 // External interrupt
+#define EVENT_TYPE_RESERVED 1
+#define EVENT_TYPE_NMI 2 // NMI
+#define EVENT_TYPE_HWEXC 3 // Hardware originated traps, exceptions
+#define EVENT_TYPE_SWINT 4 // INT n
+#define EVENT_TYPE_PRIV_SWEXC 5 // INT1
+#define EVENT_TYPE_SWEXC 6 // INTO, INT3
+#define EVENT_TYPE_OTHER 7 // FRED SYSCALL/SYSENTER, VT-x MTF
+
+/* Interrupts/Exceptions */
+
+#define X86_TRAP_DE 0 /* Divide-by-zero */
+#define X86_TRAP_DB 1 /* Debug */
+#define X86_TRAP_NMI 2 /* Non-maskable Interrupt */
+#define X86_TRAP_BP 3 /* Breakpoint */
+#define X86_TRAP_OF 4 /* Overflow */
+#define X86_TRAP_BR 5 /* Bound Range Exceeded */
+#define X86_TRAP_UD 6 /* Invalid Opcode */
+#define X86_TRAP_NM 7 /* Device Not Available */
+#define X86_TRAP_DF 8 /* Double Fault */
+#define X86_TRAP_OLD_MF 9 /* Coprocessor Segment Overrun */
+#define X86_TRAP_TS 10 /* Invalid TSS */
+#define X86_TRAP_NP 11 /* Segment Not Present */
+#define X86_TRAP_SS 12 /* Stack Segment Fault */
+#define X86_TRAP_GP 13 /* General Protection Fault */
+#define X86_TRAP_PF 14 /* Page Fault */
+#define X86_TRAP_SPURIOUS 15 /* Spurious Interrupt */
+#define X86_TRAP_MF 16 /* x87 Floating-Point Exception */
+#define X86_TRAP_AC 17 /* Alignment Check */
+#define X86_TRAP_MC 18 /* Machine Check */
+#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */
+#define X86_TRAP_VE 20 /* Virtualization Exception */
+#define X86_TRAP_CP 21 /* Control Protection Exception */
+#define X86_TRAP_VC 29 /* VMM Communication Exception */
+#define X86_TRAP_IRET 32 /* IRET Exception */
+
+#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7d6f3f3fad78..869b88061801 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -6,93 +6,24 @@
#include <linux/kprobes.h>
#include <asm/debugreg.h>
+#include <asm/idtentry.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */
+#include <asm/trap_pf.h>
-#define dotraplinkage __visible
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
#ifdef CONFIG_X86_64
-asmlinkage void double_fault(void);
-#endif
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void alignment_check(void);
-#ifdef CONFIG_X86_MCE
-asmlinkage void machine_check(void);
-#endif /* CONFIG_X86_MCE */
-asmlinkage void simd_coprocessor_error(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_divide_error(void);
-asmlinkage void xen_xennmi(void);
-asmlinkage void xen_xendebug(void);
-asmlinkage void xen_xenint3(void);
-asmlinkage void xen_overflow(void);
-asmlinkage void xen_bounds(void);
-asmlinkage void xen_invalid_op(void);
-asmlinkage void xen_device_not_available(void);
-asmlinkage void xen_double_fault(void);
-asmlinkage void xen_coprocessor_segment_overrun(void);
-asmlinkage void xen_invalid_TSS(void);
-asmlinkage void xen_segment_not_present(void);
-asmlinkage void xen_stack_segment(void);
-asmlinkage void xen_general_protection(void);
-asmlinkage void xen_page_fault(void);
-asmlinkage void xen_spurious_interrupt_bug(void);
-asmlinkage void xen_coprocessor_error(void);
-asmlinkage void xen_alignment_check(void);
-#ifdef CONFIG_X86_MCE
-asmlinkage void xen_machine_check(void);
-#endif /* CONFIG_X86_MCE */
-asmlinkage void xen_simd_coprocessor_error(void);
-#endif
-
-dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code);
-dotraplinkage void do_debug(struct pt_regs *regs, long error_code);
-dotraplinkage void do_nmi(struct pt_regs *regs, long error_code);
-dotraplinkage void do_int3(struct pt_regs *regs, long error_code);
-dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
-dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
-dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
-dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
-dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
-dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
-dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
-dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
-void __init trap_init(void);
-#endif
-dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
-dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
-dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
-dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_MCE
-dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
-dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_32
-dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code);
+
+extern int ibt_selftest(void);
+extern int ibt_selftest_noendbr(void);
+
+#ifdef CONFIG_X86_F00F_BUG
+/* For handling the FOOF bug */
+void handle_invalid_op(struct pt_regs *regs);
#endif
-dotraplinkage void do_mce(struct pt_regs *regs, long error_code);
static inline int get_si_code(unsigned long condition)
{
@@ -104,72 +35,26 @@ static inline int get_si_code(unsigned long condition)
return TRAP_BRKPT;
}
-extern int panic_on_unrecovered_nmi;
-
void math_emulate(struct math_emu_info *);
-#ifndef CONFIG_X86_32
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs);
-asmlinkage void smp_threshold_interrupt(struct pt_regs *regs);
-asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs);
-#endif
-void smp_apic_timer_interrupt(struct pt_regs *regs);
-void smp_spurious_interrupt(struct pt_regs *regs);
-void smp_error_interrupt(struct pt_regs *regs);
-asmlinkage void smp_irq_move_cleanup_interrupt(void);
-
-extern void ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs);
-extern void ist_begin_non_atomic(struct pt_regs *regs);
-extern void ist_end_non_atomic(void);
+bool fault_in_kernel_space(unsigned long address);
#ifdef CONFIG_VMAP_STACK
-void __noreturn handle_stack_overflow(const char *message,
- struct pt_regs *regs,
- unsigned long fault_address);
+void __noreturn handle_stack_overflow(struct pt_regs *regs,
+ unsigned long fault_address,
+ struct stack_info *info);
#endif
-/* Interrupts/Exceptions */
-enum {
- X86_TRAP_DE = 0, /* 0, Divide-by-zero */
- X86_TRAP_DB, /* 1, Debug */
- X86_TRAP_NMI, /* 2, Non-maskable Interrupt */
- X86_TRAP_BP, /* 3, Breakpoint */
- X86_TRAP_OF, /* 4, Overflow */
- X86_TRAP_BR, /* 5, Bound Range Exceeded */
- X86_TRAP_UD, /* 6, Invalid Opcode */
- X86_TRAP_NM, /* 7, Device Not Available */
- X86_TRAP_DF, /* 8, Double Fault */
- X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */
- X86_TRAP_TS, /* 10, Invalid TSS */
- X86_TRAP_NP, /* 11, Segment Not Present */
- X86_TRAP_SS, /* 12, Stack Segment Fault */
- X86_TRAP_GP, /* 13, General Protection Fault */
- X86_TRAP_PF, /* 14, Page Fault */
- X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */
- X86_TRAP_MF, /* 16, x87 Floating-Point Exception */
- X86_TRAP_AC, /* 17, Alignment Check */
- X86_TRAP_MC, /* 18, Machine Check */
- X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */
- X86_TRAP_IRET = 32, /* 32, IRET Exception */
-};
+static inline void cond_local_irq_enable(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+}
+
+static inline void cond_local_irq_disable(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_disable();
+}
-/*
- * Page fault error code bits:
- *
- * bit 0 == 0: no page found 1: protection fault
- * bit 1 == 0: read access 1: write access
- * bit 2 == 0: kernel-mode access 1: user-mode access
- * bit 3 == 1: use of reserved bit detected
- * bit 4 == 1: fault was an instruction fetch
- * bit 5 == 1: protection keys block access
- */
-enum x86_pf_error_code {
- X86_PF_PROT = 1 << 0,
- X86_PF_WRITE = 1 << 1,
- X86_PF_USER = 1 << 2,
- X86_PF_RSVD = 1 << 3,
- X86_PF_INSTR = 1 << 4,
- X86_PF_PK = 1 << 5,
-};
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 8a0c25c6bf09..4f7f09f50552 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -5,10 +5,64 @@
#ifndef _ASM_X86_TSC_H
#define _ASM_X86_TSC_H
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
#include <asm/processor.h>
+#include <asm/msr.h>
-#define NS_SCALE 10 /* 2^10, carefully chosen */
-#define US_SCALE 32 /* 2^32, arbitralrily chosen */
+/**
+ * rdtsc() - returns the current TSC without ordering constraints
+ *
+ * rdtsc() returns the result of RDTSC as a 64-bit integer. The
+ * only ordering constraint it supplies is the ordering implied by
+ * "asm volatile": it will put the RDTSC in the place you expect. The
+ * CPU can and will speculatively execute that RDTSC, though, so the
+ * results can be non-monotonic if compared on different CPUs.
+ */
+static __always_inline u64 rdtsc(void)
+{
+ EAX_EDX_DECLARE_ARGS(val, low, high);
+
+ asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter. It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline u64 rdtsc_ordered(void)
+{
+ EAX_EDX_DECLARE_ARGS(val, low, high);
+
+ /*
+ * The RDTSC instruction is not ordered relative to memory
+ * access. The Intel SDM and the AMD APM are both vague on this
+ * point, but empirically an RDTSC instruction can be
+ * speculatively executed before prior loads. An RDTSC
+ * immediately after an appropriate barrier appears to be
+ * ordered as a normal load, that is, it provides the same
+ * ordering guarantees as reading from a global memory location
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ *
+ * Thus, use the preferred barrier on the respective CPU, aiming for
+ * RDTSCP as the default.
+ */
+ asm volatile(ALTERNATIVE_2("rdtsc",
+ "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+ "rdtscp", X86_FEATURE_RDTSCP)
+ : EAX_EDX_RET(val, low, high)
+ /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+ :: "ecx");
+
+ return EAX_EDX_VAL(val, low, high);
+}
/*
* Standard way to access the cycle counter.
@@ -22,20 +76,15 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
-#ifndef CONFIG_X86_TSC
- if (!boot_cpu_has(X86_FEATURE_TSC))
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
return 0;
-#endif
-
return rdtsc();
}
-
-extern struct system_counterval_t convert_art_to_tsc(u64 art);
-extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
+#define get_cycles get_cycles
extern void tsc_early_init(void);
extern void tsc_init(void);
-extern unsigned long calibrate_delay_is_known(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
@@ -58,12 +107,10 @@ extern bool tsc_async_resets;
#ifdef CONFIG_X86_TSC
extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
extern void tsc_verify_tsc_adjust(bool resume);
-extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
#else
static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
static inline void tsc_verify_tsc_adjust(bool resume) { }
-static inline void check_tsc_sync_source(int cpu) { }
static inline void check_tsc_sync_target(void) { }
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 780f2b42c8ef..367297b188c3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -5,118 +5,33 @@
* User space memory access functions
*/
#include <linux/compiler.h>
+#include <linux/instrumented.h>
#include <linux/kasan-checks.h>
+#include <linux/mm_types.h>
#include <linux/string.h>
+#include <linux/mmap_lock.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/extable.h>
+#include <asm/tlbflush.h>
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#define KERNEL_DS MAKE_MM_SEG(-1UL)
-#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
-
-#define get_ds() (KERNEL_DS)
-#define get_fs() (current->thread.addr_limit)
-static inline void set_fs(mm_segment_t fs)
-{
- current->thread.addr_limit = fs;
- /* On user-mode return, check fs is correct */
- set_thread_flag(TIF_FSCHECK);
-}
-
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
-#define user_addr_max() (current->thread.addr_limit.seg)
-#define __addr_ok(addr) \
- ((unsigned long __force)(addr) < user_addr_max())
-
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
-{
- /*
- * If we have used "sizeof()" for the size,
- * we know it won't overflow the limit (but
- * it might overflow the 'addr', so it's
- * important to subtract the size from the
- * limit, not add it to the address).
- */
- if (__builtin_constant_p(size))
- return unlikely(addr > limit - size);
-
- /* Arbitrary sizes? Be careful about overflow */
- addr += size;
- if (unlikely(addr < size))
- return true;
- return unlikely(addr > limit);
-}
-
-#define __range_not_ok(addr, size, limit) \
-({ \
- __chk_user_ptr(addr); \
- __chk_range_not_ok((unsigned long __force)(addr), size, limit); \
-})
-
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+#ifdef CONFIG_X86_32
+# include <asm/uaccess_32.h>
#else
-# define WARN_ON_IN_IRQ()
+# include <asm/uaccess_64.h>
#endif
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(addr, size) \
-({ \
- WARN_ON_IN_IRQ(); \
- likely(!__range_not_ok(addr, size, user_addr_max())); \
-})
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
+#include <asm-generic/access_ok.h>
extern int __get_user_1(void);
extern int __get_user_2(void);
extern int __get_user_4(void);
extern int __get_user_8(void);
+extern int __get_user_nocheck_1(void);
+extern int __get_user_nocheck_2(void);
+extern int __get_user_nocheck_4(void);
+extern int __get_user_nocheck_8(void);
extern int __get_user_bad(void);
#define __uaccess_begin() stac()
@@ -128,31 +43,24 @@ extern int __get_user_bad(void);
})
/*
- * This is a type: either unsigned long, if the argument fits into
- * that type, or otherwise unsigned long long.
+ * This is the smallest unsigned integer type that can fit a value
+ * (up to 'long long')
*/
-#define __inttype(x) \
-__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+#define __inttype(x) __typeof__( \
+ __typefits(x,char, \
+ __typefits(x,short, \
+ __typefits(x,int, \
+ __typefits(x,long,0ULL)))))
+
+#define __typefits(x,type,not) \
+ __builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not)
-/**
- * get_user: - Get a simple variable from user space.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
/*
+ * This is used for both get_user() and __get_user() to expand to
+ * the proper special function call that has odd calling conventions
+ * due to returning both a value and an error, and that depends on
+ * the size of the pointer passed in.
+ *
* Careful: we have to cast the result to the type of the pointer
* for sign reasons.
*
@@ -165,29 +73,67 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
* Clang/LLVM cares about the size of the register, but still wants
* the base register for something that ends up being a pair.
*/
-#define get_user(x, ptr) \
+#define do_get_user_call(fn,x,ptr) \
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- might_fault(); \
- asm volatile("call __get_user_%P4" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
- : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \
+ instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
})
-#define __put_user_x(size, x, ptr, __ret_pu) \
- asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
- : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+/**
+ * get_user - Get a simple variable from user space.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })
+/**
+ * __get_user - Get a simple variable from user space, with less checking.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr)
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -195,39 +141,55 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
: : "A" (x), "r" (addr) \
: : label)
-#define __put_user_asm_ex_u64(x, addr) \
- asm volatile("\n" \
- "1: movl %%eax,0(%1)\n" \
- "2: movl %%edx,4(%1)\n" \
- "3:" \
- _ASM_EXTABLE_EX(1b, 2b) \
- _ASM_EXTABLE_EX(2b, 3b) \
- : : "A" (x), "r" (addr))
-
-#define __put_user_x8(x, ptr, __ret_pu) \
- asm volatile("call __put_user_8" : "=a" (__ret_pu) \
- : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
#define __put_user_goto_u64(x, ptr, label) \
- __put_user_goto(x, ptr, "q", "", "er", label)
-#define __put_user_asm_ex_u64(x, addr) \
- __put_user_asm_ex(x, addr, "q", "", "er")
-#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
+ __put_user_goto(x, ptr, "q", "er", label)
#endif
extern void __put_user_bad(void);
/*
* Strange magic calling convention: pointer in %ecx,
- * value in %eax(:%edx), return value in %eax. clobbers %rbx
+ * value in %eax(:%edx), return value in %ecx. clobbers %rbx
*/
extern void __put_user_1(void);
extern void __put_user_2(void);
extern void __put_user_4(void);
extern void __put_user_8(void);
+extern void __put_user_nocheck_1(void);
+extern void __put_user_nocheck_2(void);
+extern void __put_user_nocheck_4(void);
+extern void __put_user_nocheck_8(void);
+
+/*
+ * ptr must be evaluated and assigned to the temporary __ptr_pu before
+ * the assignment of x to __val_pu, to avoid any function calls
+ * involved in the ptr expression (possibly implicitly generated due
+ * to KASAN) from clobbering %ax.
+ */
+#define do_put_user_call(fn,x,ptr) \
+({ \
+ int __ret_pu; \
+ void __user *__ptr_pu; \
+ register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \
+ __typeof__(*(ptr)) __x = (x); /* eval x once */ \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
+ __ptr_pu = __ptr; \
+ __val_pu = __x; \
+ asm volatile("call __" #fn "_%c[size]" \
+ : "=c" (__ret_pu), \
+ ASM_CALL_CONSTRAINT \
+ : "0" (__ptr_pu), \
+ "r" (__val_pu), \
+ [size] "i" (sizeof(*(ptr))) \
+ :"ebx"); \
+ instrument_put_user(__x, __ptr, sizeof(*(ptr))); \
+ __builtin_expect(__ret_pu, 0); \
+})
/**
- * put_user: - Write a simple value into user space.
+ * put_user - Write a simple value into user space.
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
@@ -241,214 +203,268 @@ extern void __put_user_8(void);
* @ptr must have pointer-to-simple-variable type, and @x must be assignable
* to the result of dereferencing @ptr.
*
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
*/
-#define put_user(x, ptr) \
-({ \
- int __ret_pu; \
- __typeof__(*(ptr)) __pu_val; \
- __chk_user_ptr(ptr); \
- might_fault(); \
- __pu_val = x; \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __put_user_x(1, __pu_val, ptr, __ret_pu); \
- break; \
- case 2: \
- __put_user_x(2, __pu_val, ptr, __ret_pu); \
- break; \
- case 4: \
- __put_user_x(4, __pu_val, ptr, __ret_pu); \
- break; \
- case 8: \
- __put_user_x8(__pu_val, ptr, __ret_pu); \
- break; \
- default: \
- __put_user_x(X, __pu_val, ptr, __ret_pu); \
- break; \
- } \
- __builtin_expect(__ret_pu, 0); \
-})
+#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })
+
+/**
+ * __put_user - Write a simple value into user space, with less checking.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ */
+#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr)
#define __put_user_size(x, ptr, size, label) \
do { \
- __chk_user_ptr(ptr); \
+ __typeof__(*(ptr)) __x = (x); /* eval x once */ \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
switch (size) { \
case 1: \
- __put_user_goto(x, ptr, "b", "b", "iq", label); \
+ __put_user_goto(__x, __ptr, "b", "iq", label); \
break; \
case 2: \
- __put_user_goto(x, ptr, "w", "w", "ir", label); \
+ __put_user_goto(__x, __ptr, "w", "ir", label); \
break; \
case 4: \
- __put_user_goto(x, ptr, "l", "k", "ir", label); \
+ __put_user_goto(__x, __ptr, "l", "ir", label); \
break; \
case 8: \
- __put_user_goto_u64((__typeof__(*ptr))(x), ptr, label); \
+ __put_user_goto_u64(__x, __ptr, label); \
break; \
default: \
__put_user_bad(); \
} \
+ instrument_put_user(__x, __ptr, size); \
} while (0)
-/*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
- */
-#define __put_user_size_ex(x, ptr, size) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#ifdef CONFIG_X86_32
+#define __get_user_asm_u64(x, ptr, label) do { \
+ unsigned int __gu_low, __gu_high; \
+ const unsigned int __user *__gu_ptr; \
+ __gu_ptr = (const void __user *)(ptr); \
+ __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \
+ __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \
+ (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \
+} while (0)
+#else
+#define __get_user_asm_u64(x, ptr, label) \
+ __get_user_asm(x, ptr, "q", "=r", label)
+#endif
+
+#define __get_user_size(x, ptr, size, label) \
do { \
__chk_user_ptr(ptr); \
switch (size) { \
- case 1: \
- __put_user_asm_ex(x, ptr, "b", "b", "iq"); \
+ case 1: { \
+ unsigned char x_u8__; \
+ __get_user_asm(x_u8__, ptr, "b", "=q", label); \
+ (x) = x_u8__; \
break; \
+ } \
case 2: \
- __put_user_asm_ex(x, ptr, "w", "w", "ir"); \
+ __get_user_asm(x, ptr, "w", "=r", label); \
break; \
case 4: \
- __put_user_asm_ex(x, ptr, "l", "k", "ir"); \
+ __get_user_asm(x, ptr, "l", "=r", label); \
break; \
case 8: \
- __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \
+ __get_user_asm_u64(x, ptr, label); \
break; \
default: \
- __put_user_bad(); \
+ (x) = __get_user_bad(); \
} \
+ instrument_get_user(x); \
} while (0)
+#define __get_user_asm(x, addr, itype, ltype, label) \
+ asm_goto_output("\n" \
+ "1: mov"itype" %[umem],%[output]\n" \
+ _ASM_EXTABLE_UA(1b, %l2) \
+ : [output] ltype(x) \
+ : [umem] "m" (__m(addr)) \
+ : : label)
+
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
#ifdef CONFIG_X86_32
-#define __get_user_asm_u64(x, ptr, retval, errret) \
+#define __get_user_asm_u64(x, ptr, retval) \
({ \
__typeof__(ptr) __ptr = (ptr); \
- asm volatile("\n" \
- "1: movl %2,%%eax\n" \
- "2: movl %3,%%edx\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: mov %4,%0\n" \
- " xorl %%eax,%%eax\n" \
- " xorl %%edx,%%edx\n" \
- " jmp 3b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 4b) \
- _ASM_EXTABLE_UA(2b, 4b) \
- : "=r" (retval), "=&A"(x) \
- : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
- "i" (errret), "0" (retval)); \
+ asm volatile("\n" \
+ "1: movl %[lowbits],%%eax\n" \
+ "2: movl %[highbits],%%edx\n" \
+ "3:\n" \
+ _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX_DX, \
+ %[errout]) \
+ _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX_DX, \
+ %[errout]) \
+ : [errout] "=r" (retval), \
+ [output] "=&A"(x) \
+ : [lowbits] "m" (__m(__ptr)), \
+ [highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \
+ "0" (retval)); \
})
-#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
#else
-#define __get_user_asm_u64(x, ptr, retval, errret) \
- __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
-#define __get_user_asm_ex_u64(x, ptr) \
- __get_user_asm_ex(x, ptr, "q", "", "=r")
+#define __get_user_asm_u64(x, ptr, retval) \
+ __get_user_asm(x, ptr, retval, "q")
#endif
-#define __get_user_size(x, ptr, size, retval, errret) \
+#define __get_user_size(x, ptr, size, retval) \
do { \
+ unsigned char x_u8__; \
+ \
retval = 0; \
__chk_user_ptr(ptr); \
switch (size) { \
case 1: \
- __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \
+ __get_user_asm(x_u8__, ptr, retval, "b"); \
+ (x) = x_u8__; \
break; \
case 2: \
- __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \
+ __get_user_asm(x, ptr, retval, "w"); \
break; \
case 4: \
- __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \
+ __get_user_asm(x, ptr, retval, "l"); \
break; \
case 8: \
- __get_user_asm_u64(x, ptr, retval, errret); \
+ __get_user_asm_u64(x, ptr, retval); \
break; \
default: \
(x) = __get_user_bad(); \
} \
} while (0)
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
+#define __get_user_asm(x, addr, err, itype) \
asm volatile("\n" \
- "1: mov"itype" %2,%"rtype"1\n" \
+ "1: mov"itype" %[umem],%[output]\n" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " xor"itype" %"rtype"1,%"rtype"1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "=r" (err), ltype(x) \
- : "m" (__m(addr)), "i" (errret), "0" (err))
-
-#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX, \
+ %[errout]) \
+ : [errout] "=r" (err), \
+ [output] "=a" (x) \
+ : [umem] "m" (__m(addr)), \
+ "0" (err))
+
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm_goto_output("\n" \
+ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
+ _ASM_EXTABLE_UA(1b, %l[label]) \
+ : "=@ccz" (success), \
+ [ptr] "+m" (*_ptr), \
+ [old] "+a" (__old) \
+ : [new] ltype (__new) \
+ : "memory" \
+ : label); \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+
+#ifdef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm_goto_output("\n" \
+ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ _ASM_EXTABLE_UA(1b, %l[label]) \
+ : "=@ccz" (success), \
+ "+A" (__old), \
+ [ptr] "+m" (*_ptr) \
+ : "b" ((u32)__new), \
+ "c" ((u32)((u64)__new >> 32)) \
+ : "memory" \
+ : label); \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+#endif // CONFIG_X86_32
+#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
+ int __err = 0; \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
asm volatile("\n" \
- "1: mov"itype" %2,%"rtype"1\n" \
+ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "=r" (err), ltype(x) \
- : "m" (__m(addr)), "i" (errret), "0" (err))
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
+ %[errout]) \
+ : "=@ccz" (success), \
+ [errout] "+r" (__err), \
+ [ptr] "+m" (*_ptr), \
+ [old] "+a" (__old) \
+ : [new] ltype (__new) \
+ : "memory"); \
+ if (unlikely(__err)) \
+ goto label; \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+#ifdef CONFIG_X86_32
/*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
+ * Unlike the normal CMPXCHG, use output GPR for both success/fail and error.
+ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
+ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses
+ * both ESI and EDI for the memory operand, compilation will fail if the error
+ * is an input+output as there will be no register available for input.
*/
-#define __get_user_size_ex(x, ptr, size) \
-do { \
- __chk_user_ptr(ptr); \
- switch (size) { \
- case 1: \
- __get_user_asm_ex(x, ptr, "b", "b", "=q"); \
- break; \
- case 2: \
- __get_user_asm_ex(x, ptr, "w", "w", "=r"); \
- break; \
- case 4: \
- __get_user_asm_ex(x, ptr, "l", "k", "=r"); \
- break; \
- case 8: \
- __get_user_asm_ex_u64(x, ptr); \
- break; \
- default: \
- (x) = __get_user_bad(); \
- } \
-} while (0)
-
-#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
- asm volatile("1: mov"itype" %1,%"rtype"0\n" \
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+ int __result; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm volatile("\n" \
+ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ "mov $0, %[result]\n\t" \
+ "setz %b[result]\n" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:xor"itype" %"rtype"0,%"rtype"0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_EX(1b, 3b) \
- : ltype(x) : "m" (__m(addr)))
-
-#define __put_user_nocheck(x, ptr, size) \
-({ \
- __label__ __pu_label; \
- int __pu_err = -EFAULT; \
- __uaccess_begin(); \
- __put_user_size((x), (ptr), (size), __pu_label); \
- __pu_err = 0; \
-__pu_label: \
- __uaccess_end(); \
- __builtin_expect(__pu_err, 0); \
-})
-
-#define __get_user_nocheck(x, ptr, size) \
-({ \
- int __gu_err; \
- __inttype(*(ptr)) __gu_val; \
- __uaccess_begin_nospec(); \
- __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
- __uaccess_end(); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
+ %[result]) \
+ : [result] "=q" (__result), \
+ "+A" (__old), \
+ [ptr] "+m" (*_ptr) \
+ : "b" ((u32)__new), \
+ "c" ((u32)((u64)__new >> 32)) \
+ : "memory", "cc"); \
+ if (unlikely(__result < 0)) \
+ goto label; \
+ if (unlikely(!__result)) \
+ *_old = __old; \
+ likely(__result); })
+#endif // CONFIG_X86_32
+#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
/* FIXME: this hack is definitely wrong -AK */
struct __large_struct { unsigned long buf[100]; };
@@ -459,118 +475,13 @@ struct __large_struct { unsigned long buf[100]; };
* we do not write to any memory gcc knows about, so there are no
* aliasing issues.
*/
-#define __put_user_goto(x, addr, itype, rtype, ltype, label) \
- asm_volatile_goto("\n" \
- "1: mov"itype" %"rtype"0,%1\n" \
- _ASM_EXTABLE_UA(1b, %l2) \
+#define __put_user_goto(x, addr, itype, ltype, label) \
+ asm goto("\n" \
+ "1: mov"itype" %0,%1\n" \
+ _ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
: : label)
-#define __put_user_failed(x, addr, itype, rtype, ltype, errret) \
- ({ __label__ __puflab; \
- int __pufret = errret; \
- __put_user_goto(x,addr,itype,rtype,ltype,__puflab); \
- __pufret = 0; \
- __puflab: __pufret; })
-
-#define __put_user_asm(x, addr, retval, itype, rtype, ltype, errret) do { \
- retval = __put_user_failed(x, addr, itype, rtype, ltype, errret); \
-} while (0)
-
-#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
- asm volatile("1: mov"itype" %"rtype"0,%1\n" \
- "2:\n" \
- _ASM_EXTABLE_EX(1b, 2b) \
- : : ltype(x), "m" (__m(addr)))
-
-/*
- * uaccess_try and catch
- */
-#define uaccess_try do { \
- current->thread.uaccess_err = 0; \
- __uaccess_begin(); \
- barrier();
-
-#define uaccess_try_nospec do { \
- current->thread.uaccess_err = 0; \
- __uaccess_begin_nospec(); \
-
-#define uaccess_catch(err) \
- __uaccess_end(); \
- (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
-} while (0)
-
-/**
- * __get_user: - Get a simple variable from user space, with less checking.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-
-#define __get_user(x, ptr) \
- __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-
-/**
- * __put_user: - Write a simple value into user space, with less checking.
- * @x: Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple value from kernel space to user
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-
-#define __put_user(x, ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-/*
- * {get|put}_user_try and catch
- *
- * get_user_try {
- * get_user_ex(...);
- * } get_user_catch(err)
- */
-#define get_user_try uaccess_try_nospec
-#define get_user_catch(err) uaccess_catch(err)
-
-#define get_user_ex(x, ptr) do { \
- unsigned long __gue_val; \
- __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \
- (x) = (__force __typeof__(*(ptr)))__gue_val; \
-} while (0)
-
-#define put_user_try uaccess_try
-#define put_user_catch(err) uaccess_catch(err)
-
-#define put_user_ex(x, ptr) \
- __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
extern unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
@@ -578,102 +489,14 @@ strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
-unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
-
-extern void __cmpxchg_wrong_size(void)
- __compiletime_error("Bad argument size for cmpxchg");
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
-#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
-({ \
- int __ret = 0; \
- __typeof__(ptr) __uval = (uval); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __uaccess_begin_nospec(); \
- switch (size) { \
- case 1: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "q" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 2: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 4: \
- { \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- case 8: \
- { \
- if (!IS_ENABLED(CONFIG_X86_64)) \
- __cmpxchg_wrong_size(); \
- \
- asm volatile("\n" \
- "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \
- "2:\n" \
- "\t.section .fixup, \"ax\"\n" \
- "3:\tmov %3, %0\n" \
- "\tjmp 2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
- : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
- : "i" (-EFAULT), "r" (__new), "1" (__old) \
- : "memory" \
- ); \
- break; \
- } \
- default: \
- __cmpxchg_wrong_size(); \
- } \
- __uaccess_end(); \
- *__uval = __old; \
- __ret; \
-})
-
-#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \
-({ \
- access_ok((ptr), sizeof(*(ptr))) ? \
- __user_atomic_cmpxchg_inatomic((uval), (ptr), \
- (old), (new), sizeof(*(ptr))) : \
- -EFAULT; \
-})
+unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned len);
+#endif
/*
* movsl can be slow when source and dest are not both 8-byte aligned
@@ -686,28 +509,13 @@ extern struct movsl_mask {
#define ARCH_HAS_NOCACHE_UACCESS 1
-#ifdef CONFIG_X86_32
-# include <asm/uaccess_32.h>
-#else
-# include <asm/uaccess_64.h>
-#endif
-
-/*
- * We rely on the nested NMI work to allow atomic faults from the NMI path; the
- * nested NMI paths are careful to preserve CR2.
- *
- * Caller must use pagefault_enable/disable, or run in interrupt context,
- * and also do a uaccess_ok() check
- */
-#define __copy_from_user_nmi __copy_from_user_inatomic
-
/*
* The "unsafe" user accesses aren't really "unsafe", but the naming
* is a big fat warning: you have to not only do the access_ok()
* checking before using them, but you have to surround them with the
* user_access_begin/end() pair.
*/
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr,len)))
return 0;
@@ -717,17 +525,117 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_begin(a,b) user_access_begin(a,b)
#define user_access_end() __uaccess_end()
-#define unsafe_put_user(x, ptr, label) \
+#define user_access_save() smap_save()
+#define user_access_restore(x) smap_restore(x)
+
+#define arch_unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
-#define unsafe_get_user(x, ptr, err_label) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_unsafe_get_user(x, ptr, err_label) \
+do { \
+ __inttype(*(ptr)) __gu_val; \
+ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+} while (0)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_unsafe_get_user(x, ptr, err_label) \
do { \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
- __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
+ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
if (unlikely(__gu_err)) goto err_label; \
} while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+extern void __try_cmpxchg_user_wrong_size(void);
+
+#ifndef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \
+ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
+#endif
+
+/*
+ * Force the pointer to u<size> to match the size expected by the asm helper.
+ * clang/LLVM compiles all cases and only discards the unused paths after
+ * processing errors, which breaks i386 if the pointer is an 8-byte value.
+ */
+#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
+ bool __ret; \
+ __chk_user_ptr(_ptr); \
+ switch (sizeof(*(_ptr))) { \
+ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \
+ (__force u8 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \
+ (__force u16 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \
+ (__force u32 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
+ (_nval), _label); \
+ break; \
+ default: __try_cmpxchg_user_wrong_size(); \
+ } \
+ __ret; })
+
+/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
+#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
+ int __ret = -EFAULT; \
+ __uaccess_begin_nospec(); \
+ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \
+_label: \
+ __uaccess_end(); \
+ __ret; \
+ })
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_get_kernel_nofault(dst, src, type, err_label) \
+ __get_user_size(*((type *)(dst)), (__force type __user *)(src), \
+ sizeof(type), err_label)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ int __kr_err; \
+ \
+ __get_user_size(*((type *)(dst)), (__force type __user *)(src), \
+ sizeof(type), __kr_err); \
+ if (unlikely(__kr_err)) \
+ goto err_label; \
+} while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define arch_put_kernel_nofault(dst, src, type, err_label) \
+ __put_user_size(*((type *)(src)), (__force type __user *)(dst), \
+ sizeof(type), err_label)
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index ba2dc1930630..40379a1adbb8 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u8 *)to, from, ret,
- "b", "b", "=q", 1);
- __uaccess_end();
- return ret;
- case 2:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u16 *)to, from, ret,
- "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 4:
- ret = 0;
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u32 *)to, from, ret,
- "l", "k", "=r", 4);
- __uaccess_end();
- return ret;
- }
- }
return __copy_user_ll(to, (__force const void *)from, n);
}
@@ -60,4 +33,7 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
return __copy_from_user_ll_nocache_nozero(to, from, n);
}
+unsigned long __must_check clear_user(void __user *mem, unsigned long len);
+unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+
#endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..915124011c27 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -11,193 +11,155 @@
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
+#include <asm/percpu.h>
+#ifdef MODULE
+ #define runtime_const_ptr(sym) (sym)
+#else
+ #include <asm/runtime-const.h>
+#endif
+extern unsigned long USER_PTR_MAX;
+
+#ifdef CONFIG_ADDRESS_MASKING
/*
- * Copy To/From Userspace
+ * Mask out tag bits from the address.
*/
+static inline unsigned long __untagged_addr(unsigned long addr)
+{
+ asm_inline (ALTERNATIVE("", "and " __percpu_arg([mask]) ", %[addr]",
+ X86_FEATURE_LAM)
+ : [addr] "+r" (addr)
+ : [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));
-/* Handles exceptions in both to and from, but doesn't do access_ok */
-__must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+ return addr;
+}
-static __always_inline __must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len)
+#define untagged_addr(addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr(__addr); \
+})
+
+static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
+ unsigned long addr)
{
- unsigned ret;
+ mmap_assert_locked(mm);
+ return addr & (mm)->context.untag_mask;
+}
- /*
- * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
- * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
- * Otherwise, use copy_user_generic_unrolled.
- */
- alternative_call_2(copy_user_generic_unrolled,
- copy_user_generic_string,
- X86_FEATURE_REP_GOOD,
- copy_user_enhanced_fast_string,
- X86_FEATURE_ERMS,
- ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
- "=d" (len)),
- "1" (to), "2" (from), "3" (len)
- : "memory", "rcx", "r8", "r9", "r10", "r11");
+#define untagged_addr_remote(mm, addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
+})
+
+#endif
+
+#define valid_user_address(x) \
+ likely((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
+
+/*
+ * Masking the user address is an alternative to a conditional
+ * user_access_begin that can avoid the fencing. This only works
+ * for dense accesses starting at the address.
+ */
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+ void __user *ret;
+ asm("cmp %1,%0\n\t"
+ "cmova %1,%0"
+ :"=r" (ret)
+ :"r" (runtime_const_ptr(USER_PTR_MAX)),
+ "0" (ptr));
return ret;
}
+#define masked_user_access_begin(x) ({ \
+ auto __masked_ptr = (x); \
+ __masked_ptr = mask_user_address(__masked_ptr); \
+ __uaccess_begin(); __masked_ptr; })
-static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+/*
+ * User pointers can have tag bits on x86-64. This scheme tolerates
+ * arbitrary values in those bits rather then masking them off.
+ *
+ * Enforce two rules:
+ * 1. 'ptr' must be in the user part of the address space
+ * 2. 'ptr+size' must not overflow into kernel addresses
+ *
+ * Note that we always have at least one guard page between the
+ * max user address and the non-canonical gap, allowing us to
+ * ignore small sizes entirely.
+ *
+ * In fact, we could probably remove the size check entirely, since
+ * any kernel accesses will be in increasing address order starting
+ * at 'ptr'.
+ *
+ * That's a separate optimization, for now just handle the small
+ * constant case.
+ */
+static inline bool __access_ok(const void __user *ptr, unsigned long size)
{
- unsigned long ret;
+ if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) {
+ return valid_user_address(ptr);
+ } else {
+ unsigned long sum = size + (__force unsigned long)ptr;
+
+ return valid_user_address(sum) && sum >= (__force unsigned long)ptr;
+ }
+}
+#define __access_ok __access_ok
- __uaccess_begin();
+/*
+ * Copy To/From Userspace
+ */
+
+/* Handles exceptions in both to and from, but doesn't do access_ok */
+__must_check unsigned long
+rep_movs_alternative(void *to, const void *from, unsigned len);
+
+static __always_inline __must_check unsigned long
+copy_user_generic(void *to, const void *from, unsigned long len)
+{
+ stac();
/*
- * Note, __memcpy_mcsafe() is explicitly used since it can
- * handle exceptions / faults. memcpy_mcsafe() may fall back to
- * memcpy() which lacks this handling.
+ * If CPU has FSRM feature, use 'rep movs'.
+ * Otherwise, use rep_movs_alternative.
*/
- ret = __memcpy_mcsafe(to, from, len);
- __uaccess_end();
- return ret;
+ asm volatile(
+ "1:\n\t"
+ ALTERNATIVE("rep movsb",
+ "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
+ "2:\n"
+ _ASM_EXTABLE_UA(1b, 2b)
+ :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
+ : : "memory", "rax");
+ clac();
+ return len;
}
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
- int ret = 0;
-
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst, (__force void *)src, size);
- switch (size) {
- case 1:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
- ret, "b", "b", "=q", 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
- ret, "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
- ret, "l", "k", "=r", 4);
- __uaccess_end();
- return ret;
- case 8:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 8);
- __uaccess_end();
- return ret;
- case 10:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 10);
- if (likely(!ret))
- __get_user_asm_nozero(*(u16 *)(8 + (char *)dst),
- (u16 __user *)(8 + (char __user *)src),
- ret, "w", "w", "=r", 2);
- __uaccess_end();
- return ret;
- case 16:
- __uaccess_begin_nospec();
- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 16);
- if (likely(!ret))
- __get_user_asm_nozero(*(u64 *)(8 + (char *)dst),
- (u64 __user *)(8 + (char __user *)src),
- ret, "q", "", "=r", 8);
- __uaccess_end();
- return ret;
- default:
- return copy_user_generic(dst, (__force void *)src, size);
- }
+ return copy_user_generic(dst, (__force void *)src, size);
}
static __always_inline __must_check unsigned long
raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
{
- int ret = 0;
-
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst, src, size);
- switch (size) {
- case 1:
- __uaccess_begin();
- __put_user_asm(*(u8 *)src, (u8 __user *)dst,
- ret, "b", "b", "iq", 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin();
- __put_user_asm(*(u16 *)src, (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin();
- __put_user_asm(*(u32 *)src, (u32 __user *)dst,
- ret, "l", "k", "ir", 4);
- __uaccess_end();
- return ret;
- case 8:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 8);
- __uaccess_end();
- return ret;
- case 10:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 10);
- if (likely(!ret)) {
- asm("":::"memory");
- __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
- }
- __uaccess_end();
- return ret;
- case 16:
- __uaccess_begin();
- __put_user_asm(*(u64 *)src, (u64 __user *)dst,
- ret, "q", "", "er", 16);
- if (likely(!ret)) {
- asm("":::"memory");
- __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
- ret, "q", "", "er", 8);
- }
- __uaccess_end();
- return ret;
- default:
- return copy_user_generic((__force void *)dst, src, size);
- }
-}
-
-static __always_inline __must_check
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size)
-{
- return copy_user_generic((__force void *)dst,
- (__force void *)src, size);
+ return copy_user_generic((__force void *)dst, src, size);
}
-extern long __copy_user_nocache(void *dst, const void __user *src,
- unsigned size, int zerorest);
-
+extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
-extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
- size_t len);
static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size)
{
+ long ret;
kasan_check_write(dst, size);
- return __copy_user_nocache(dst, src, size, 0);
+ stac();
+ ret = __copy_user_nocache(dst, src, size);
+ clac();
+ return ret;
}
static inline int
@@ -207,10 +169,40 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
return __copy_user_flushcache(dst, src, size);
}
-unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
+/*
+ * Zero Userspace.
+ */
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
+__must_check unsigned long
+rep_stos_alternative(void __user *addr, unsigned long len);
+
+static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+ might_fault();
+ stac();
+ /*
+ * No memory constraint because it doesn't change any memory gcc
+ * knows about.
+ */
+ asm volatile(
+ "1:\n\t"
+ ALTERNATIVE("rep stosb",
+ "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
+ "2:\n"
+ _ASM_EXTABLE_UA(1b, 2b)
+ : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
+ : "a" (0));
+
+ clac();
+
+ return size;
+}
+
+static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+ if (__access_ok(to, n))
+ return __clear_user(to, n);
+ return n;
+}
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
index db43f2a0d92c..aeed98c3c9e1 100644
--- a/arch/x86/include/asm/umip.h
+++ b/arch/x86/include/asm/umip.h
@@ -4,9 +4,9 @@
#include <linux/types.h>
#include <asm/ptrace.h>
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
bool fixup_umip_exception(struct pt_regs *regs);
#else
static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
-#endif /* CONFIG_X86_INTEL_UMIP */
+#endif /* CONFIG_X86_UMIP */
#endif /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h
new file mode 100644
index 000000000000..f5937e9866ac
--- /dev/null
+++ b/arch/x86/include/asm/unaccepted_memory.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_UNACCEPTED_MEMORY_H
+#define _ASM_X86_UNACCEPTED_MEMORY_H
+
+#include <linux/efi.h>
+#include <asm/tdx.h>
+#include <asm/sev.h>
+
+static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ /* Platform-specific memory-acceptance call goes here */
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ if (!tdx_accept_memory(start, end))
+ panic("TDX: Failed to accept memory\n");
+ } else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+ snp_accept_memory(start, end);
+ } else {
+ panic("Cannot accept memory: unknown platform\n");
+ }
+}
+
+static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void)
+{
+ if (efi.unaccepted == EFI_INVALID_TABLE_ADDR)
+ return NULL;
+ return __va(efi.unaccepted);
+}
+#endif
diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h
deleted file mode 100644
index 9c754a7447aa..000000000000
--- a/arch/x86/include/asm/unaligned.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_UNALIGNED_H
-#define _ASM_X86_UNALIGNED_H
-
-/*
- * The x86 can do unaligned accesses itself.
- */
-
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
-
-#endif /* _ASM_X86_UNALIGNED_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index dc4ed8bc2382..6c9e5bdd3916 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h>
-# ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-# define __SYSCALL_MASK (~0)
-# endif
-
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>
@@ -19,19 +13,27 @@
# define __ARCH_WANT_SYS_OLD_MMAP
# define __ARCH_WANT_SYS_OLD_SELECT
+# define IA32_NR_syscalls (__NR_syscalls)
+
# else
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
-# define __ARCH_WANT_COMPAT_SYS_TIME
-# define __ARCH_WANT_SYS_UTIME32
+# include <asm/unistd_32_ia32.h>
+# define __ARCH_WANT_SYS_TIME
+# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_COMPAT_STAT
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+# define X32_NR_syscalls (__NR_x32_syscalls)
+# define IA32_NR_syscalls (__NR_ia32_syscalls)
# endif
+# define NR_syscalls (__NR_syscalls)
+
# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
# define __ARCH_WANT_OLD_STAT
@@ -48,8 +50,8 @@
# define __ARCH_WANT_SYS_SIGPENDING
# define __ARCH_WANT_SYS_SIGPROCMASK
# define __ARCH_WANT_SYS_SOCKETCALL
-# define __ARCH_WANT_SYS_TIME
-# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_SYS_TIME32
+# define __ARCH_WANT_SYS_UTIME32
# define __ARCH_WANT_SYS_WAITPID
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 1f86e1b0a5cd..7cede4dc21f0 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
+#include <linux/rethook.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -15,14 +16,23 @@ struct unwind_state {
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
+#if defined(CONFIG_RETHOOK)
+ struct llist_node *kr_cur;
+#endif
bool error;
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
- struct pt_regs *regs;
+ struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
+ /*
+ * If non-NULL: The current frame is incomplete and doesn't contain a
+ * valid BP. When looking for the next frame, use this instead of the
+ * non-existent saved BP.
+ */
+ unsigned long *next_bp;
struct pt_regs *regs;
#else
unsigned long *sp;
@@ -93,6 +103,30 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
void *orc, size_t orc_size) {}
#endif
+static inline
+unsigned long unwind_recover_rethook(struct unwind_state *state,
+ unsigned long addr, unsigned long *addr_p)
+{
+#ifdef CONFIG_RETHOOK
+ if (is_rethook_trampoline(addr))
+ return rethook_find_ret_addr(state->task, (unsigned long)addr_p,
+ &state->kr_cur);
+#endif
+ return addr;
+}
+
+/* Recover the return address modified by rethook and ftrace_graph. */
+static inline
+unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+ unsigned long addr, unsigned long *addr_p)
+{
+ unsigned long ret;
+
+ ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ addr, addr_p);
+ return unwind_recover_rethook(state, ret, addr_p);
+}
+
/*
* This disables KASAN checking when reading a value from another task's stack,
* since the other task could be running on another CPU and could have poisoned
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 0bcdb1279361..8f4579c5a6f8 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -1,51 +1,26 @@
#ifndef _ASM_X86_UNWIND_HINTS_H
#define _ASM_X86_UNWIND_HINTS_H
+#include <linux/objtool.h>
+
#include "orc_types.h"
-#ifdef __ASSEMBLY__
-
-/*
- * In asm, there are two kinds of code: normal C-type callable functions and
- * the rest. The normal callable functions can be called by other code, and
- * don't do anything unusual with the stack. Such normal callable functions
- * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
- * category. In this case, no special debugging annotations are needed because
- * objtool can automatically generate the ORC data for the ORC unwinder to read
- * at runtime.
- *
- * Anything which doesn't fall into the above category, such as syscall and
- * interrupt handlers, tends to not be called directly by other functions, and
- * often does unusual non-C-function-type things with the stack pointer. Such
- * code needs to be annotated such that objtool can understand it. The
- * following CFI hint macros are for this type of code.
- *
- * These macros provide hints to objtool about the state of the stack at each
- * instruction. Objtool starts from the hints and follows the code flow,
- * making automatic CFI adjustments when it sees pushes and pops, filling out
- * the debuginfo as necessary. It will also warn if it sees any
- * inconsistencies.
- */
-.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
-#ifdef CONFIG_STACK_VALIDATION
-.Lunwind_hint_ip_\@:
- .pushsection .discard.unwind_hints
- /* struct unwind_hint */
- .long .Lunwind_hint_ip_\@ - .
- .short \sp_offset
- .byte \sp_reg
- .byte \type
- .byte \end
- .balign 4
- .popsection
-#endif
+#ifdef __ASSEMBLER__
+
+.macro UNWIND_HINT_END_OF_STACK
+ UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
+.endm
+
+.macro UNWIND_HINT_UNDEFINED
+ UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
.endm
-.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
+.macro UNWIND_HINT_ENTRY
+ VALIDATE_UNRET_BEGIN
+ UNWIND_HINT_END_OF_STACK
.endm
-.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1
.if \base == %rsp
.if \indirect
.set sp_reg, ORC_REG_SP_INDIRECT
@@ -66,44 +41,53 @@
.set sp_offset, \offset
- .if \iret
- .set type, ORC_TYPE_REGS_IRET
+ .if \partial
+ .set type, UNWIND_HINT_TYPE_REGS_PARTIAL
.elseif \extra == 0
- .set type, ORC_TYPE_REGS_IRET
+ .set type, UNWIND_HINT_TYPE_REGS_PARTIAL
.set sp_offset, \offset + (16*8)
.else
- .set type, ORC_TYPE_REGS
+ .set type, UNWIND_HINT_TYPE_REGS
.endif
- UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1
+ UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal
.endm
-.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
- UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.macro UNWIND_HINT_IRET_ENTRY base=%rsp offset=0 signal=1
+ VALIDATE_UNRET_BEGIN
+ UNWIND_HINT_IRET_REGS base=\base offset=\offset signal=\signal
.endm
-.macro UNWIND_HINT_FUNC sp_offset=8
- UNWIND_HINT sp_offset=\sp_offset
+.macro UNWIND_HINT_FUNC
+ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
-#else /* !__ASSEMBLY__ */
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
+#else
+
+#define UNWIND_HINT_UNDEFINED \
+ UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0)
-#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
- "987: \n\t" \
- ".pushsection .discard.unwind_hints\n\t" \
- /* struct unwind_hint */ \
- ".long 987b - .\n\t" \
- ".short " __stringify(sp_offset) "\n\t" \
- ".byte " __stringify(sp_reg) "\n\t" \
- ".byte " __stringify(type) "\n\t" \
- ".byte " __stringify(end) "\n\t" \
- ".balign 4 \n\t" \
- ".popsection\n\t"
+#define UNWIND_HINT_FUNC \
+ UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0)
-#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
+#define UNWIND_HINT_SAVE \
+ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
-#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
+#define UNWIND_HINT_RESTORE \
+ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
new file mode 100644
index 000000000000..12064284bc4e
--- /dev/null
+++ b/arch/x86/include/asm/unwind_user.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UNWIND_USER_H
+#define _ASM_X86_UNWIND_USER_H
+
+#ifdef CONFIG_HAVE_UNWIND_USER_FP
+
+#include <asm/ptrace.h>
+#include <asm/uprobes.h>
+
+#define ARCH_INIT_USER_FP_FRAME(ws) \
+ .cfa_off = 2*(ws), \
+ .ra_off = -1*(ws), \
+ .fp_off = -2*(ws), \
+ .use_fp = true,
+
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
+ .cfa_off = 1*(ws), \
+ .ra_off = -1*(ws), \
+ .fp_off = 0, \
+ .use_fp = false,
+
+static inline int unwind_user_word_size(struct pt_regs *regs)
+{
+ /* We can't unwind VM86 stacks */
+ if (regs->flags & X86_VM_MASK)
+ return 0;
+#ifdef CONFIG_X86_64
+ if (!user_64bit_mode(regs))
+ return sizeof(int);
+#endif
+ return sizeof(long);
+}
+
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+ return is_uprobe_at_func_entry(regs);
+}
+
+#endif /* CONFIG_HAVE_UNWIND_USER_FP */
+
+#endif /* _ASM_X86_UNWIND_USER_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index d8bfa98fca98..362210c79998 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_UPROBES_H
#define _ASM_UPROBES_H
/*
* User-space Probes (UProbes) for x86
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2008-2011
* Authors:
* Srikar Dronamraju
@@ -33,6 +20,11 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
+enum {
+ ARCH_UPROBE_FLAG_CAN_OPTIMIZE = 0,
+ ARCH_UPROBE_FLAG_OPTIMIZE_FAIL = 1,
+};
+
struct uprobe_xol_ops;
struct arch_uprobe {
@@ -58,6 +50,8 @@ struct arch_uprobe {
u8 ilen;
} push;
};
+
+ unsigned long flags;
};
struct arch_uprobe_task {
@@ -68,4 +62,13 @@ struct arch_uprobe_task {
unsigned int saved_tf;
};
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
+
#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h
index d72c3d66e94f..8963915e533f 100644
--- a/arch/x86/include/asm/user_32.h
+++ b/arch/x86/include/asm/user_32.h
@@ -124,9 +124,5 @@ struct user{
char u_comm[32]; /* User command that was responsible */
int u_debugreg[8];
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _ASM_X86_USER_32_H */
diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h
index db909923611c..1dd10f07ccd6 100644
--- a/arch/x86/include/asm/user_64.h
+++ b/arch/x86/include/asm/user_64.h
@@ -130,9 +130,5 @@ struct user {
unsigned long error_code; /* CPU error code or 0 */
unsigned long fault_address; /* CR3 or 0 */
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _ASM_X86_USER_64_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..6989b824fd32 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -1,27 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_UV_BIOS_H
#define _ASM_X86_UV_BIOS_H
/*
* UV BIOS layer definitions.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson <rja@sgi.com>
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
+#include <linux/efi.h>
#include <linux/rtc.h>
/*
@@ -40,6 +29,20 @@ enum uv_bios_cmd {
UV_BIOS_SET_LEGACY_VGA_TARGET
};
+#define UV_BIOS_EXTRA 0x10000
+#define UV_BIOS_GET_PCI_TOPOLOGY 0x10001
+#define UV_BIOS_GET_GEOINFO 0x10003
+
+#define UV_BIOS_EXTRA_OP_MEM_COPYIN 0x1000
+#define UV_BIOS_EXTRA_OP_MEM_COPYOUT 0x2000
+#define UV_BIOS_EXTRA_OP_MASK 0x0fff
+#define UV_BIOS_EXTRA_GET_HEAPSIZE 1
+#define UV_BIOS_EXTRA_INSTALL_HEAP 2
+#define UV_BIOS_EXTRA_MASTER_NASID 3
+#define UV_BIOS_EXTRA_OBJECT_COUNT (10|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+#define UV_BIOS_EXTRA_ENUM_OBJECTS (12|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+#define UV_BIOS_EXTRA_ENUM_PORTS (13|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+
/*
* Status values returned from a BIOS call.
*/
@@ -48,7 +51,8 @@ enum {
BIOS_STATUS_SUCCESS = 0,
BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
BIOS_STATUS_EINVAL = -EINVAL,
- BIOS_STATUS_UNAVAIL = -EBUSY
+ BIOS_STATUS_UNAVAIL = -EBUSY,
+ BIOS_STATUS_ABORT = -EINTR,
};
/* Address map parameters */
@@ -83,18 +87,27 @@ struct uv_gam_range_entry {
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};
+#define UV_AT_SIZE 8 /* 7 character arch type + NULL char */
+struct uv_arch_type_entry {
+ char archtype[UV_AT_SIZE];
+};
+
#define UV_SYSTAB_SIG "UVST"
-#define UV_SYSTAB_VERSION_1 1 /* UV1/2/3 BIOS version */
+#define UV_SYSTAB_VERSION_1 1 /* UV2/3 BIOS version */
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
+#define UV_SYSTAB_VERSION_UV5 0x500 /* UV5 GAM base version */
+#define UV_SYSTAB_VERSION_UV5_LATEST UV_SYSTAB_VERSION_UV5
+
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
#define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */
-#define UV_SYSTAB_TYPE_MAX 3
+#define UV_SYSTAB_TYPE_ARCH_TYPE 3 /* UV arch type */
+#define UV_SYSTAB_TYPE_MAX 4
/*
* The UV system table describes specific firmware
@@ -103,7 +116,8 @@ struct uv_gam_range_entry {
struct uv_systab {
char signature[4]; /* must be UV_SYSTAB_SIG */
u32 revision; /* distinguish different firmware revs */
- u64 function; /* BIOS runtime callback function ptr */
+ u64 (__efiapi *function)(enum uv_bios_cmd, ...);
+ /* BIOS runtime callback function ptr */
u32 size; /* systab size (starting with _VERSION_UV4) */
struct {
u32 type:8; /* type of entry */
@@ -111,6 +125,32 @@ struct uv_systab {
} entry[1]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;
+
+#define UV_BIOS_MAXSTRING 128
+struct uv_bios_hub_info {
+ unsigned int id;
+ union {
+ struct {
+ unsigned long long this_part:1;
+ unsigned long long is_shared:1;
+ unsigned long long is_disabled:1;
+ } fields;
+ struct {
+ unsigned long long flags;
+ unsigned long long reserved;
+ } b;
+ } f;
+ char name[UV_BIOS_MAXSTRING];
+ char location[UV_BIOS_MAXSTRING];
+ unsigned int ports;
+};
+
+struct uv_bios_port_info {
+ unsigned int port;
+ unsigned int conn_id;
+ unsigned int conn_port;
+};
+
/* (... end of definitions from UV BIOS ...) */
enum {
@@ -135,13 +175,6 @@ enum uv_memprotect {
UV_MEMPROT_ALLOW_RW
};
-/*
- * bios calls have 6 parameters
- */
-extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
@@ -151,11 +184,17 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
-#ifdef CONFIG_EFI
-extern void uv_bios_init(void);
-#else
-void uv_bios_init(void) { }
-#endif
+extern s64 uv_bios_get_master_nasid(u64 sz, u64 *nasid);
+extern s64 uv_bios_get_heapsize(u64 nasid, u64 sz, u64 *heap_sz);
+extern s64 uv_bios_install_heap(u64 nasid, u64 sz, u64 *heap);
+extern s64 uv_bios_obj_count(u64 nasid, u64 sz, u64 *objcnt);
+extern s64 uv_bios_enum_objs(u64 nasid, u64 sz, u64 *objbuf);
+extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 sz, u64 *portbuf);
+extern s64 uv_bios_get_geoinfo(u64 nasid, u64 sz, u64 *geo);
+extern s64 uv_bios_get_pci_topology(u64 sz, u64 *buf);
+
+extern int uv_bios_init(void);
+extern unsigned long get_uv_systab_phys(bool msg);
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
@@ -163,8 +202,14 @@ extern long sn_partition_id;
extern long sn_coherency_id;
extern long sn_region_size;
extern long system_serial_number;
-#define uv_partition_coherence_id() (sn_coherency_id)
+extern ssize_t uv_get_archtype(char *buf, int len);
+extern int uv_get_hubless_system(void);
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
#endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..648eb23fe7f0 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -2,41 +2,42 @@
#ifndef _ASM_X86_UV_UV_H
#define _ASM_X86_UV_UV_H
-#include <asm/tlbflush.h>
-
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-
-struct cpumask;
-struct mm_struct;
+enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC};
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
+#define UV_PROC_NODE "sgi_uv"
+
+static inline int uv(int uvtype)
+{
+ /* uv(0) is "any" */
+ if (uvtype >= 0 && uvtype <= 30)
+ return 1 << uvtype;
+ return 1;
+}
+
+extern unsigned long uv_systab_phys;
+
extern enum uv_system_type get_uv_system_type(void);
static inline bool is_early_uv_system(void)
{
- return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+ return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
}
extern int is_uv_system(void);
-extern int is_uv_hubless(void);
+extern int is_uv_hubbed(int uvtype);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
-extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info);
-#else /* X86_UV */
+#else /* !X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
-static inline int is_uv_hubless(void) { return 0; }
+static inline int is_uv_hubbed(int uv) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
-static inline const struct cpumask *
-uv_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info)
-{ return cpumask; }
#endif /* X86_UV */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
deleted file mode 100644
index 7803114aa140..000000000000
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV Broadcast Assist Unit definitions
- *
- * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
- */
-
-#ifndef _ASM_X86_UV_UV_BAU_H
-#define _ASM_X86_UV_UV_BAU_H
-
-#include <linux/bitmap.h>
-#define BITSPERBYTE 8
-
-/*
- * Broadcast Assist Unit messaging structures
- *
- * Selective Broadcast activations are induced by software action
- * specifying a particular 8-descriptor "set" via a 6-bit index written
- * to an MMR.
- * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
- * each 6-bit index value. These descriptor sets are mapped in sequence
- * starting with set 0 located at the address specified in the
- * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
- * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
- *
- * We will use one set for sending BAU messages from each of the
- * cpu's on the uvhub.
- *
- * TLB shootdown will use the first of the 8 descriptors of each set.
- * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
- */
-
-#define MAX_CPUS_PER_UVHUB 128
-#define MAX_CPUS_PER_SOCKET 64
-#define ADP_SZ 64 /* hardware-provided max. */
-#define UV_CPUS_PER_AS 32 /* hardware-provided max. */
-#define ITEMS_PER_DESC 8
-/* the 'throttle' to prevent the hardware stay-busy bug */
-#define MAX_BAU_CONCURRENT 3
-#define UV_ACT_STATUS_MASK 0x3
-#define UV_ACT_STATUS_SIZE 2
-#define UV_DISTRIBUTION_SIZE 256
-#define UV_SW_ACK_NPENDING 8
-#define UV1_NET_ENDPOINT_INTD 0x38
-#define UV2_NET_ENDPOINT_INTD 0x28
-#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \
- UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
-#define UV_PAYLOADQ_GNODE_SHIFT 49
-#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
-#define UV_BAU_BASENAME "sgi_uv/bau_tunables"
-#define UV_BAU_TUNABLES_DIR "sgi_uv"
-#define UV_BAU_TUNABLES_FILE "bau_tunables"
-#define WHITESPACE " \t\n"
-#define cpubit_isset(cpu, bau_local_cpumask) \
- test_bit((cpu), (bau_local_cpumask).bits)
-
-/* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */
-/*
- * UV2: Bit 19 selects between
- * (0): 10 microsecond timebase and
- * (1): 80 microseconds
- * we're using 560us, similar to UV1: 65 units of 10us
- */
-#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
-#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
-
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \
- UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \
- UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD)
-/* assuming UV3 is the same */
-
-#define BAU_MISC_CONTROL_MULT_MASK 3
-
-#define UVH_AGING_PRESCALE_SEL 0x000000b000UL
-/* [30:28] URGENCY_7 an index into a table of times */
-#define BAU_URGENCY_7_SHIFT 28
-#define BAU_URGENCY_7_MASK 7
-
-#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL
-/* [45:40] BAU - BAU transaction timeout select - a multiplier */
-#define BAU_TRANS_SHIFT 40
-#define BAU_TRANS_MASK 0x3f
-
-/*
- * shorten some awkward names
- */
-#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
-#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
-#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
-#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT
-#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-#define write_gmmr uv_write_global_mmr64
-#define write_lmmr uv_write_local_mmr
-#define read_lmmr uv_read_local_mmr
-#define read_gmmr uv_read_global_mmr64
-
-/*
- * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
- */
-#define DS_IDLE 0
-#define DS_ACTIVE 1
-#define DS_DESTINATION_TIMEOUT 2
-#define DS_SOURCE_TIMEOUT 3
-/*
- * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
- * values 1 and 3 will not occur
- * Decoded meaning ERROR BUSY AUX ERR
- * ------------------------------- ---- ----- -------
- * IDLE 0 0 0
- * BUSY (active) 0 1 0
- * SW Ack Timeout (destination) 1 0 0
- * SW Ack INTD rejected (strong NACK) 1 0 1
- * Source Side Time Out Detected 1 1 0
- * Destination Side PUT Failed 1 1 1
- */
-#define UV2H_DESC_IDLE 0
-#define UV2H_DESC_BUSY 2
-#define UV2H_DESC_DEST_TIMEOUT 4
-#define UV2H_DESC_DEST_STRONG_NACK 5
-#define UV2H_DESC_SOURCE_TIMEOUT 6
-#define UV2H_DESC_DEST_PUT_ERR 7
-
-/*
- * delay for 'plugged' timeout retries, in microseconds
- */
-#define PLUGGED_DELAY 10
-
-/*
- * threshholds at which to use IPI to free resources
- */
-/* after this # consecutive 'plugged' timeouts, use IPI to release resources */
-#define PLUGSB4RESET 100
-/* after this many consecutive timeouts, use IPI to release resources */
-#define TIMEOUTSB4RESET 1
-/* at this number uses of IPI to release resources, giveup the request */
-#define IPI_RESET_LIMIT 1
-/* after this # consecutive successes, bump up the throttle if it was lowered */
-#define COMPLETE_THRESHOLD 5
-/* after this # of giveups (fall back to kernel IPI's) disable the use of
- the BAU for a period of time */
-#define GIVEUP_LIMIT 100
-
-#define UV_LB_SUBNODEID 0x10
-
-/* these two are the same for UV1 and UV2: */
-#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
-/* 4 bits of software ack period */
-#define UV2_ACK_MASK 0x7UL
-#define UV2_ACK_UNITS_SHFT 3
-#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-
-/*
- * number of entries in the destination side payload queue
- */
-#define DEST_Q_SIZE 20
-/*
- * number of destination side software ack resources
- */
-#define DEST_NUM_RESOURCES 8
-/*
- * completion statuses for sending a TLB flush message
- */
-#define FLUSH_RETRY_PLUGGED 1
-#define FLUSH_RETRY_TIMEOUT 2
-#define FLUSH_GIVEUP 3
-#define FLUSH_COMPLETE 4
-
-/*
- * tuning the action when the numalink network is extremely delayed
- */
-#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in
- microseconds */
-#define CONGESTED_REPS 10 /* long delays averaged over
- this many broadcasts */
-#define DISABLED_PERIOD 10 /* time for the bau to be
- disabled, in seconds */
-/* see msg_type: */
-#define MSG_NOOP 0
-#define MSG_REGULAR 1
-#define MSG_RETRY 2
-
-#define BAU_DESC_QUALIFIER 0x534749
-
-enum uv_bau_version {
- UV_BAU_V1 = 1,
- UV_BAU_V2,
- UV_BAU_V3,
- UV_BAU_V4,
-};
-
-/*
- * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
- * If the 'multilevel' flag in the header portion of the descriptor
- * has been set to 0, then endpoint multi-unicast mode is selected.
- * The distribution specification (32 bytes) is interpreted as a 256-bit
- * distribution vector. Adjacent bits correspond to consecutive even numbered
- * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nasid' field of the header corresponds to the
- * destination nodeID associated with that specified bit.
- */
-struct pnmask {
- unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
-};
-
-/*
- * mask of cpu's on a uvhub
- * (during initialization we need to check that unsigned long has
- * enough bits for max. cpu's per uvhub)
- */
-struct bau_local_cpumask {
- unsigned long bits;
-};
-
-/*
- * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
- * only 12 bytes (96 bits) of the payload area are usable.
- * An additional 3 bytes (bits 27:4) of the header address are carried
- * to the next bytes of the destination payload queue.
- * And an additional 2 bytes of the header Suppl_A field are also
- * carried to the destination payload queue.
- * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
- * of the destination payload queue, which is written by the hardware
- * with the s/w ack resource bit vector.
- * [ effective message contents (16 bytes (128 bits) maximum), not counting
- * the s/w ack bit vector ]
- */
-
-/**
- * struct uv1_2_3_bau_msg_payload - defines payload for INTD transactions
- * @address: Signifies a page or all TLB's of the cpu
- * @sending_cpu: CPU from which the message originates
- * @acknowledge_count: CPUs on the destination Hub that received the interrupt
- */
-struct uv1_2_3_bau_msg_payload {
- u64 address;
- u16 sending_cpu;
- u16 acknowledge_count;
-};
-
-/**
- * struct uv4_bau_msg_payload - defines payload for INTD transactions
- * @address: Signifies a page or all TLB's of the cpu
- * @sending_cpu: CPU from which the message originates
- * @acknowledge_count: CPUs on the destination Hub that received the interrupt
- * @qualifier: Set by source to verify origin of INTD broadcast
- */
-struct uv4_bau_msg_payload {
- u64 address;
- u16 sending_cpu;
- u16 acknowledge_count;
- u32 reserved:8;
- u32 qualifier:24;
-};
-
-/*
- * UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see table 4.2.3.0.1 in broacast_assist spec.
- */
-struct uv1_bau_msg_header {
- unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
- /* bits 5:0 */
- unsigned int base_dest_nasid:15; /* nasid of the first bit */
- /* bits 20:6 */ /* in uvhub map */
- unsigned int command:8; /* message type */
- /* bits 28:21 */
- /* 0x38: SN3net EndPoint Message */
- unsigned int rsvd_1:3; /* must be zero */
- /* bits 31:29 */
- /* int will align on 32 bits */
- unsigned int rsvd_2:9; /* must be zero */
- /* bits 40:32 */
- /* Suppl_A is 56-41 */
- unsigned int sequence:16; /* message sequence number */
- /* bits 56:41 */ /* becomes bytes 16-17 of msg */
- /* Address field (96:57) is
- never used as an address
- (these are address bits
- 42:3) */
-
- unsigned int rsvd_3:1; /* must be zero */
- /* bit 57 */
- /* address bits 27:4 are payload */
- /* these next 24 (58-81) bits become bytes 12-14 of msg */
- /* bits 65:58 land in byte 12 */
- unsigned int replied_to:1; /* sent as 0 by the source to
- byte 12 */
- /* bit 58 */
- unsigned int msg_type:3; /* software type of the
- message */
- /* bits 61:59 */
- unsigned int canceled:1; /* message canceled, resource
- is to be freed*/
- /* bit 62 */
- unsigned int payload_1a:1; /* not currently used */
- /* bit 63 */
- unsigned int payload_1b:2; /* not currently used */
- /* bits 65:64 */
-
- /* bits 73:66 land in byte 13 */
- unsigned int payload_1ca:6; /* not currently used */
- /* bits 71:66 */
- unsigned int payload_1c:2; /* not currently used */
- /* bits 73:72 */
-
- /* bits 81:74 land in byte 14 */
- unsigned int payload_1d:6; /* not currently used */
- /* bits 79:74 */
- unsigned int payload_1e:2; /* not currently used */
- /* bits 81:80 */
-
- unsigned int rsvd_4:7; /* must be zero */
- /* bits 88:82 */
- unsigned int swack_flag:1; /* software acknowledge flag */
- /* bit 89 */
- /* INTD trasactions at
- destination are to wait for
- software acknowledge */
- unsigned int rsvd_5:6; /* must be zero */
- /* bits 95:90 */
- unsigned int rsvd_6:5; /* must be zero */
- /* bits 100:96 */
- unsigned int int_both:1; /* if 1, interrupt both sockets
- on the uvhub */
- /* bit 101*/
- unsigned int fairness:3; /* usually zero */
- /* bits 104:102 */
- unsigned int multilevel:1; /* multi-level multicast
- format */
- /* bit 105 */
- /* 0 for TLB: endpoint multi-unicast messages */
- unsigned int chaining:1; /* next descriptor is part of
- this activation*/
- /* bit 106 */
- unsigned int rsvd_7:21; /* must be zero */
- /* bits 127:107 */
-};
-
-/*
- * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see figure 9-2 of harp_sys.pdf
- * assuming UV3 is the same
- */
-struct uv2_3_bau_msg_header {
- unsigned int base_dest_nasid:15; /* nasid of the first bit */
- /* bits 14:0 */ /* in uvhub map */
- unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
- /* bits 19:15 */
- unsigned int rsvd_1:1; /* must be zero */
- /* bit 20 */
- /* Address bits 59:21 */
- /* bits 25:2 of address (44:21) are payload */
- /* these next 24 bits become bytes 12-14 of msg */
- /* bits 28:21 land in byte 12 */
- unsigned int replied_to:1; /* sent as 0 by the source to
- byte 12 */
- /* bit 21 */
- unsigned int msg_type:3; /* software type of the
- message */
- /* bits 24:22 */
- unsigned int canceled:1; /* message canceled, resource
- is to be freed*/
- /* bit 25 */
- unsigned int payload_1:3; /* not currently used */
- /* bits 28:26 */
-
- /* bits 36:29 land in byte 13 */
- unsigned int payload_2a:3; /* not currently used */
- unsigned int payload_2b:5; /* not currently used */
- /* bits 36:29 */
-
- /* bits 44:37 land in byte 14 */
- unsigned int payload_3:8; /* not currently used */
- /* bits 44:37 */
-
- unsigned int rsvd_2:7; /* reserved */
- /* bits 51:45 */
- unsigned int swack_flag:1; /* software acknowledge flag */
- /* bit 52 */
- unsigned int rsvd_3a:3; /* must be zero */
- unsigned int rsvd_3b:8; /* must be zero */
- unsigned int rsvd_3c:8; /* must be zero */
- unsigned int rsvd_3d:3; /* must be zero */
- /* bits 74:53 */
- unsigned int fairness:3; /* usually zero */
- /* bits 77:75 */
-
- unsigned int sequence:16; /* message sequence number */
- /* bits 93:78 Suppl_A */
- unsigned int chaining:1; /* next descriptor is part of
- this activation*/
- /* bit 94 */
- unsigned int multilevel:1; /* multi-level multicast
- format */
- /* bit 95 */
- unsigned int rsvd_4:24; /* ordered / source node /
- source subnode / aging
- must be zero */
- /* bits 119:96 */
- unsigned int command:8; /* message type */
- /* bits 127:120 */
-};
-
-/*
- * The activation descriptor:
- * The format of the message to send, plus all accompanying control
- * Should be 64 bytes
- */
-struct bau_desc {
- struct pnmask distribution;
- /*
- * message template, consisting of header and payload:
- */
- union bau_msg_header {
- struct uv1_bau_msg_header uv1_hdr;
- struct uv2_3_bau_msg_header uv2_3_hdr;
- } header;
-
- union bau_payload_header {
- struct uv1_2_3_bau_msg_payload uv1_2_3;
- struct uv4_bau_msg_payload uv4;
- } payload;
-};
-/* UV1:
- * -payload-- ---------header------
- * bytes 0-11 bits 41-56 bits 58-81
- * A B (2) C (3)
- *
- * A/B/C are moved to:
- * A C B
- * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
- * ------------payload queue-----------
- */
-/* UV2:
- * -payload-- ---------header------
- * bytes 0-11 bits 70-78 bits 21-44
- * A B (2) C (3)
- *
- * A/B/C are moved to:
- * A C B
- * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
- * ------------payload queue-----------
- */
-
-/*
- * The payload queue on the destination side is an array of these.
- * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
- * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
- * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
- * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
- * swack_vec and payload_2)
- * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
- * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
- * operation."
- */
-struct bau_pq_entry {
- unsigned long address; /* signifies a page or all TLB's
- of the cpu */
- /* 64 bits, bytes 0-7 */
- unsigned short sending_cpu; /* cpu that sent the message */
- /* 16 bits, bytes 8-9 */
- unsigned short acknowledge_count; /* filled in by destination */
- /* 16 bits, bytes 10-11 */
- /* these next 3 bytes come from bits 58-81 of the message header */
- unsigned short replied_to:1; /* sent as 0 by the source */
- unsigned short msg_type:3; /* software message type */
- unsigned short canceled:1; /* sent as 0 by the source */
- unsigned short unused1:3; /* not currently using */
- /* byte 12 */
- unsigned char unused2a; /* not currently using */
- /* byte 13 */
- unsigned char unused2; /* not currently using */
- /* byte 14 */
- unsigned char swack_vec; /* filled in by the hardware */
- /* byte 15 (bits 127:120) */
- unsigned short sequence; /* message sequence number */
- /* bytes 16-17 */
- unsigned char unused4[2]; /* not currently using bytes 18-19 */
- /* bytes 18-19 */
- int number_of_cpus; /* filled in at destination */
- /* 32 bits, bytes 20-23 (aligned) */
- unsigned char unused5[8]; /* not using */
- /* bytes 24-31 */
-};
-
-struct msg_desc {
- struct bau_pq_entry *msg;
- int msg_slot;
- struct bau_pq_entry *queue_first;
- struct bau_pq_entry *queue_last;
-};
-
-struct reset_args {
- int sender;
-};
-
-/*
- * This structure is allocated per_cpu for UV TLB shootdown statistics.
- */
-struct ptc_stats {
- /* sender statistics */
- unsigned long s_giveup; /* number of fall backs to
- IPI-style flushes */
- unsigned long s_requestor; /* number of shootdown
- requests */
- unsigned long s_stimeout; /* source side timeouts */
- unsigned long s_dtimeout; /* destination side timeouts */
- unsigned long s_strongnacks; /* number of strong nack's */
- unsigned long s_time; /* time spent in sending side */
- unsigned long s_retriesok; /* successful retries */
- unsigned long s_ntargcpu; /* total number of cpu's
- targeted */
- unsigned long s_ntargself; /* times the sending cpu was
- targeted */
- unsigned long s_ntarglocals; /* targets of cpus on the local
- blade */
- unsigned long s_ntargremotes; /* targets of cpus on remote
- blades */
- unsigned long s_ntarglocaluvhub; /* targets of the local hub */
- unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
- unsigned long s_ntarguvhub; /* total number of uvhubs
- targeted */
- unsigned long s_ntarguvhub16; /* number of times target
- hubs >= 16*/
- unsigned long s_ntarguvhub8; /* number of times target
- hubs >= 8 */
- unsigned long s_ntarguvhub4; /* number of times target
- hubs >= 4 */
- unsigned long s_ntarguvhub2; /* number of times target
- hubs >= 2 */
- unsigned long s_ntarguvhub1; /* number of times target
- hubs == 1 */
- unsigned long s_resets_plug; /* ipi-style resets from plug
- state */
- unsigned long s_resets_timeout; /* ipi-style resets from
- timeouts */
- unsigned long s_busy; /* status stayed busy past
- s/w timer */
- unsigned long s_throttles; /* waits in throttle */
- unsigned long s_retry_messages; /* retry broadcasts */
- unsigned long s_bau_reenabled; /* for bau enable/disable */
- unsigned long s_bau_disabled; /* for bau enable/disable */
- unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
- unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
- unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
- unsigned long s_overipilimit; /* over the ipi reset limit */
- unsigned long s_giveuplimit; /* disables, over giveup limit*/
- unsigned long s_enters; /* entries to the driver */
- unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
- unsigned long s_plugged; /* plugged by h/w bug*/
- unsigned long s_congested; /* giveup on long wait */
- /* destination statistics */
- unsigned long d_alltlb; /* times all tlb's on this
- cpu were flushed */
- unsigned long d_onetlb; /* times just one tlb on this
- cpu was flushed */
- unsigned long d_multmsg; /* interrupts with multiple
- messages */
- unsigned long d_nomsg; /* interrupts with no message */
- unsigned long d_time; /* time spent on destination
- side */
- unsigned long d_requestee; /* number of messages
- processed */
- unsigned long d_retries; /* number of retry messages
- processed */
- unsigned long d_canceled; /* number of messages canceled
- by retries */
- unsigned long d_nocanceled; /* retries that found nothing
- to cancel */
- unsigned long d_resets; /* number of ipi-style requests
- processed */
- unsigned long d_rcanceled; /* number of messages canceled
- by resets */
-};
-
-struct tunables {
- int *tunp;
- int deflt;
-};
-
-struct hub_and_pnode {
- short uvhub;
- short pnode;
-};
-
-struct socket_desc {
- short num_cpus;
- short cpu_number[MAX_CPUS_PER_SOCKET];
-};
-
-struct uvhub_desc {
- unsigned short socket_mask;
- short num_cpus;
- short uvhub;
- short pnode;
- struct socket_desc socket[2];
-};
-
-/**
- * struct bau_control
- * @status_mmr: location of status mmr, determined by uvhub_cpu
- * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu
- *
- * Per-cpu control struct containing CPU topology information and BAU tuneables.
- */
-struct bau_control {
- struct bau_desc *descriptor_base;
- struct bau_pq_entry *queue_first;
- struct bau_pq_entry *queue_last;
- struct bau_pq_entry *bau_msg_head;
- struct bau_control *uvhub_master;
- struct bau_control *socket_master;
- struct ptc_stats *statp;
- cpumask_t *cpumask;
- unsigned long timeout_interval;
- unsigned long set_bau_on_time;
- atomic_t active_descriptor_count;
- int plugged_tries;
- int timeout_tries;
- int ipi_attempts;
- int conseccompletes;
- u64 status_mmr;
- int status_index;
- bool nobau;
- short baudisabled;
- short cpu;
- short osnode;
- short uvhub_cpu;
- short uvhub;
- short uvhub_version;
- short cpus_in_socket;
- short cpus_in_uvhub;
- short partition_base_pnode;
- short busy; /* all were busy (war) */
- unsigned short message_number;
- unsigned short uvhub_quiesce;
- short socket_acknowledge_count[DEST_Q_SIZE];
- cycles_t send_message;
- cycles_t period_end;
- cycles_t period_time;
- spinlock_t uvhub_lock;
- spinlock_t queue_lock;
- spinlock_t disable_lock;
- /* tunables */
- int max_concurr;
- int max_concurr_const;
- int plugged_delay;
- int plugsb4reset;
- int timeoutsb4reset;
- int ipi_reset_limit;
- int complete_threshold;
- int cong_response_us;
- int cong_reps;
- cycles_t disabled_period;
- int period_giveups;
- int giveup_limit;
- long period_requests;
- struct hub_and_pnode *thp;
-};
-
-/* Abstracted BAU functions */
-struct bau_operations {
- unsigned long (*read_l_sw_ack)(void);
- unsigned long (*read_g_sw_ack)(int pnode);
- unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
- void (*write_l_sw_ack)(unsigned long mmr);
- void (*write_g_sw_ack)(int pnode, unsigned long mmr);
- void (*write_payload_first)(int pnode, unsigned long mmr);
- void (*write_payload_last)(int pnode, unsigned long mmr);
- int (*wait_completion)(struct bau_desc*,
- struct bau_control*, long try);
-};
-
-static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
-}
-
-static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
-}
-
-static inline void write_mmr_activation(unsigned long index)
-{
- write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-}
-
-static inline void write_gmmr_activation(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
-}
-
-static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-}
-
-static inline unsigned long read_mmr_misc_control(int pnode)
-{
- return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
-}
-
-static inline void write_mmr_sw_ack(unsigned long mr)
-{
- uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline unsigned long read_mmr_sw_ack(void)
-{
- return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline unsigned long read_gmmr_sw_ack(int pnode)
-{
- return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline void write_mmr_proc_sw_ack(unsigned long mr)
-{
- uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline unsigned long read_mmr_proc_sw_ack(void)
-{
- return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
-{
- return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline void write_mmr_data_config(int pnode, unsigned long mr)
-{
- uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
-}
-
-static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp)
-{
- return constant_test_bit(uvhub, &dstp->bits[0]);
-}
-static inline void bau_uvhub_set(int pnode, struct pnmask *dstp)
-{
- __set_bit(pnode, &dstp->bits[0]);
-}
-static inline void bau_uvhubs_clear(struct pnmask *dstp,
- int nbits)
-{
- bitmap_zero(&dstp->bits[0], nbits);
-}
-static inline int bau_uvhub_weight(struct pnmask *dstp)
-{
- return bitmap_weight((unsigned long *)&dstp->bits[0],
- UV_DISTRIBUTION_SIZE);
-}
-
-static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
-{
- bitmap_zero(&dstp->bits, nbits);
-}
-
-extern void uv_bau_message_intr1(void);
-#ifdef CONFIG_TRACING
-#define trace_uv_bau_message_intr1 uv_bau_message_intr1
-#endif
-extern void uv_bau_timeout_intr1(void);
-
-struct atomic_short {
- short counter;
-};
-
-/*
- * atomic_read_short - read a short atomic variable
- * @v: pointer of type atomic_short
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read_short(const struct atomic_short *v)
-{
- return v->counter;
-}
-
-/*
- * atom_asr - add and return a short int
- * @i: short value to add
- * @v: pointer of type atomic_short
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atom_asr(short i, struct atomic_short *v)
-{
- short __i = i;
- asm volatile(LOCK_PREFIX "xaddw %0, %1"
- : "+r" (i), "+m" (v->counter)
- : : "memory");
- return i + __i;
-}
-
-/*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'. atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
- spin_lock(lock);
- if (atomic_read(v) >= u) {
- spin_unlock(lock);
- return 0;
- }
- atomic_inc(v);
- spin_unlock(lock);
- return 1;
-}
-
-#endif /* _ASM_X86_UV_UV_BAU_H */
diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h
new file mode 100644
index 000000000000..027a9258dbca
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv_geo.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2020 Hewlett Packard Enterprise Development LP. All rights reserved.
+ */
+
+#ifndef _ASM_UV_GEO_H
+#define _ASM_UV_GEO_H
+
+/* Type declarations */
+
+/* Size of a geoid_s structure (must be before decl. of geoid_u) */
+#define GEOID_SIZE 8
+
+/* Fields common to all substructures */
+struct geo_common_s {
+ unsigned char type; /* What type of h/w is named by this geoid_s */
+ unsigned char blade;
+ unsigned char slot; /* slot is IRU */
+ unsigned char upos;
+ unsigned char rack;
+};
+
+/* Additional fields for particular types of hardware */
+struct geo_node_s {
+ struct geo_common_s common; /* No additional fields needed */
+};
+
+struct geo_rtr_s {
+ struct geo_common_s common; /* No additional fields needed */
+};
+
+struct geo_iocntl_s {
+ struct geo_common_s common; /* No additional fields needed */
+};
+
+struct geo_pcicard_s {
+ struct geo_iocntl_s common;
+ char bus; /* Bus/widget number */
+ char slot; /* PCI slot number */
+};
+
+/* Subcomponents of a node */
+struct geo_cpu_s {
+ struct geo_node_s node;
+ unsigned char socket:4, /* Which CPU on the node */
+ thread:4;
+ unsigned char core;
+};
+
+struct geo_mem_s {
+ struct geo_node_s node;
+ char membus; /* The memory bus on the node */
+ char memslot; /* The memory slot on the bus */
+};
+
+union geoid_u {
+ struct geo_common_s common;
+ struct geo_node_s node;
+ struct geo_iocntl_s iocntl;
+ struct geo_pcicard_s pcicard;
+ struct geo_rtr_s rtr;
+ struct geo_cpu_s cpu;
+ struct geo_mem_s mem;
+ char padsize[GEOID_SIZE];
+};
+
+/* Defined constants */
+
+#define GEO_MAX_LEN 48
+
+#define GEO_TYPE_INVALID 0
+#define GEO_TYPE_MODULE 1
+#define GEO_TYPE_NODE 2
+#define GEO_TYPE_RTR 3
+#define GEO_TYPE_IOCNTL 4
+#define GEO_TYPE_IOCARD 5
+#define GEO_TYPE_CPU 6
+#define GEO_TYPE_MEM 7
+#define GEO_TYPE_MAX (GEO_TYPE_MEM+1)
+
+static inline int geo_rack(union geoid_u g)
+{
+ return (g.common.type == GEO_TYPE_INVALID) ?
+ -1 : g.common.rack;
+}
+
+static inline int geo_slot(union geoid_u g)
+{
+ return (g.common.type == GEO_TYPE_INVALID) ?
+ -1 : g.common.upos;
+}
+
+static inline int geo_blade(union geoid_u g)
+{
+ return (g.common.type == GEO_TYPE_INVALID) ?
+ -1 : g.common.blade * 2 + g.common.slot;
+}
+
+#endif /* _ASM_UV_GEO_H */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 44cf6d6deb7a..ea877fd83114 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,6 +5,7 @@
*
* SGI UV architectural definitions
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
*/
@@ -19,6 +20,7 @@
#include <linux/topology.h>
#include <asm/types.h>
#include <asm/percpu.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/bios.h>
#include <asm/irq_vectors.h>
@@ -128,17 +130,6 @@
*/
#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
-/* System Controller Interface Reg info */
-struct uv_scir_s {
- struct timer_list timer;
- unsigned long offset;
- unsigned long last;
- unsigned long idle_on;
- unsigned long idle_off;
- unsigned char state;
- unsigned char enabled;
-};
-
/* GAM (globally addressed memory) range table */
struct uv_gam_range_s {
u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */
@@ -154,6 +145,8 @@ struct uv_gam_range_s {
* available in the L3 cache on the cpu socket for the node.
*/
struct uv_hub_info_s {
+ unsigned int hub_type;
+ unsigned char hub_revision;
unsigned long global_mmr_base;
unsigned long global_mmr_shift;
unsigned long gpa_mask;
@@ -166,9 +159,9 @@ struct uv_hub_info_s {
unsigned char m_val;
unsigned char n_val;
unsigned char gr_table_len;
- unsigned char hub_revision;
unsigned char apic_pnode_shift;
unsigned char gpa_shift;
+ unsigned char nasid_shift;
unsigned char m_shift;
unsigned char n_lshift;
unsigned int gnode_extra;
@@ -184,22 +177,20 @@ struct uv_hub_info_s {
unsigned short nr_possible_cpus;
unsigned short nr_online_cpus;
short memory_nid;
+ unsigned short *node_to_socket;
};
/* CPU specific info with a pointer to the hub common info struct */
struct uv_cpu_info_s {
void *p_uv_hub_info;
unsigned char blade_cpu_id;
- struct uv_scir_s scir;
+ void *reserved;
};
DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
#define uv_cpu_info this_cpu_ptr(&__uv_cpu_info)
#define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu))
-#define uv_scir_info (&uv_cpu_info->scir)
-#define uv_cpu_scir_info(cpu) (&uv_cpu_info_per(cpu)->scir)
-
/* Node specific hub common info struct */
extern void **__uv_hub_info_list;
static inline struct uv_hub_info_s *uv_hub_info_list(int node)
@@ -218,109 +209,51 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
}
-#define UV_HUB_INFO_VERSION 0x7150
-extern int uv_hub_info_version(void);
-static inline int uv_hub_info_check(int version)
+static inline int uv_hub_type(void)
{
- if (uv_hub_info_version() == version)
- return 0;
-
- pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
- uv_hub_info_version(), version);
+ return uv_hub_info->hub_type;
+}
- BUG(); /* Catastrophic - cannot continue on unknown UV system */
+static inline __init void uv_hub_type_set(int uvmask)
+{
+ uv_hub_info->hub_type = uvmask;
}
-#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION)
+
/*
* HUB revision ranges for each UV HUB architecture.
* This is a software convention - NOT the hardware revision numbers in
* the hub chip.
*/
-#define UV1_HUB_REVISION_BASE 1
#define UV2_HUB_REVISION_BASE 3
#define UV3_HUB_REVISION_BASE 5
#define UV4_HUB_REVISION_BASE 7
#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
+#define UV5_HUB_REVISION_BASE 9
-#ifdef UV1_HUB_IS_SUPPORTED
-static inline int is_uv1_hub(void)
-{
- return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
-}
-#else
-static inline int is_uv1_hub(void)
-{
- return 0;
-}
-#endif
+static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
+static inline int is_uv1_hub(void) { return 0; }
+static inline int is_uv2_hub(void) { return is_uv(UV2); }
+static inline int is_uv3_hub(void) { return is_uv(UV3); }
+static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
+static inline int is_uv4_hub(void) { return is_uv(UV4); }
+static inline int is_uv5_hub(void) { return is_uv(UV5); }
-#ifdef UV2_HUB_IS_SUPPORTED
-static inline int is_uv2_hub(void)
-{
- return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
-}
-#else
-static inline int is_uv2_hub(void)
-{
- return 0;
-}
-#endif
-
-#ifdef UV3_HUB_IS_SUPPORTED
-static inline int is_uv3_hub(void)
-{
- return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
-}
-#else
-static inline int is_uv3_hub(void)
-{
- return 0;
-}
-#endif
-
-/* First test "is UV4A", then "is UV4" */
-#ifdef UV4A_HUB_IS_SUPPORTED
-static inline int is_uv4a_hub(void)
-{
- return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
-}
-#else
-static inline int is_uv4a_hub(void)
-{
- return 0;
-}
-#endif
+/*
+ * UV4A is a revision of UV4. So on UV4A, both is_uv4_hub() and
+ * is_uv4a_hub() return true, While on UV4, only is_uv4_hub()
+ * returns true. So to get true results, first test if is UV4A,
+ * then test if is UV4.
+ */
-#ifdef UV4_HUB_IS_SUPPORTED
-static inline int is_uv4_hub(void)
-{
- return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
-}
-#else
-static inline int is_uv4_hub(void)
-{
- return 0;
-}
-#endif
+/* UVX class: UV2,3,4 */
+static inline int is_uvx_hub(void) { return is_uv(UVX); }
-static inline int is_uvx_hub(void)
-{
- if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
- return uv_hub_info->hub_revision;
+/* UVY class: UV5,..? */
+static inline int is_uvy_hub(void) { return is_uv(UVY); }
- return 0;
-}
-
-static inline int is_uv_hub(void)
-{
-#ifdef UV1_HUB_IS_SUPPORTED
- return uv_hub_info->hub_revision;
-#endif
- return is_uvx_hub();
-}
+/* Any UV Hubbed System */
+static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
union uvh_apicid {
unsigned long v;
@@ -342,14 +275,11 @@ union uvh_apicid {
* g - GNODE (full 15-bit global nasid, right shifted 1)
* p - PNODE (local part of nsids, right shifted 1)
*/
-#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_NASID_TO_PNODE(n) \
+ (((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask)
#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra)
-#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1)
-
-#define UV1_LOCAL_MMR_BASE 0xf4000000UL
-#define UV1_GLOBAL_MMR32_BASE 0xf8000000UL
-#define UV1_LOCAL_MMR_SIZE (64UL * 1024 * 1024)
-#define UV1_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024)
+#define UV_PNODE_TO_NASID(p) \
+ (UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift)
#define UV2_LOCAL_MMR_BASE 0xfa000000UL
#define UV2_GLOBAL_MMR32_BASE 0xfc000000UL
@@ -362,33 +292,42 @@ union uvh_apicid {
#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
#define UV4_LOCAL_MMR_BASE 0xfa000000UL
-#define UV4_GLOBAL_MMR32_BASE 0xfc000000UL
+#define UV4_GLOBAL_MMR32_BASE 0
#define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
-#define UV4_GLOBAL_MMR32_SIZE (16UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE 0
+
+#define UV5_LOCAL_MMR_BASE 0xfa000000UL
+#define UV5_GLOBAL_MMR32_BASE 0
+#define UV5_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
+#define UV5_GLOBAL_MMR32_SIZE 0
#define UV_LOCAL_MMR_BASE ( \
- is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
- is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
- is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
- /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+ is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \
+ is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \
+ is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \
+ is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \
+ 0)
#define UV_GLOBAL_MMR32_BASE ( \
- is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \
- is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
- is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
- /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+ is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \
+ is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \
+ is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \
+ is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \
+ 0)
#define UV_LOCAL_MMR_SIZE ( \
- is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
- is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
- is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
- /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+ is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \
+ is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \
+ is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \
+ is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \
+ 0)
#define UV_GLOBAL_MMR32_SIZE ( \
- is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \
- is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
- is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
- /*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+ is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \
+ is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \
+ is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \
+ is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \
+ 0)
#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
@@ -406,8 +345,6 @@ union uvh_apicid {
#define UVH_APICID 0x002D0E00L
#define UV_APIC_PNODE_SHIFT 6
-#define UV_APICID_HIBIT_MASK 0xffff0000
-
/* Local Bus from cpu's perspective */
#define LOCAL_BUS_BASE 0x1c00000
#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
@@ -417,7 +354,7 @@ union uvh_apicid {
*
* Note there are NO leds on a UV system. This register is only
* used by the system controller to monitor system-wide operation.
- * There are 64 regs per node. With Nahelem cpus (2 cores per node,
+ * There are 64 regs per node. With Nehalem cpus (2 cores per node,
* 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
* a node.
*
@@ -583,25 +520,30 @@ static inline int uv_socket_to_node(int socket)
return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
}
+static inline int uv_pnode_to_socket(int pnode)
+{
+ unsigned short *p2s = uv_hub_info->pnode_to_socket;
+
+ return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+}
+
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
unsigned int m_val = uv_hub_info->m_val;
unsigned long base;
- unsigned short sockid, node, *p2s;
+ unsigned short sockid;
if (m_val)
return __va(((unsigned long)pnode << m_val) | offset);
- p2s = uv_hub_info->pnode_to_socket;
- sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
- node = uv_socket_to_node(sockid);
+ sockid = uv_pnode_to_socket(pnode);
/* limit address of previous socket is our base, except node 0 is 0 */
- if (!node)
+ if (sockid == 0)
return __va((unsigned long)offset);
- base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+ base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit);
return __va(base << UV_GAM_RANGE_SHFT | offset);
}
@@ -614,15 +556,6 @@ static inline int uv_apicid_to_pnode(int apicid)
return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode;
}
-/* Convert an apicid to the socket number on the blade */
-static inline int uv_apicid_to_socket(int apicid)
-{
- if (is_uv1_hub())
- return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1;
- else
- return 0;
-}
-
/*
* Access global MMRs using the low memory MMR32 space. This region supports
* faster MMR access but not all MMRs are accessible in this space.
@@ -713,15 +646,14 @@ static inline int uv_cpu_blade_processor_id(int cpu)
{
return uv_cpu_info_per(cpu)->blade_cpu_id;
}
-#define _uv_cpu_blade_processor_id 1 /* indicate function available */
-/* Blade number to Node number (UV1..UV4 is 1:1) */
+/* Blade number to Node number (UV2..UV4 is 1:1) */
static inline int uv_blade_to_node(int blade)
{
- return blade;
+ return uv_socket_to_node(blade);
}
-/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
+/* Blade number of current cpu. Numbered 0 .. <#blades -1> */
static inline int uv_numa_blade_id(void)
{
return uv_hub_info->numa_blade_id;
@@ -729,24 +661,28 @@ static inline int uv_numa_blade_id(void)
/*
* Convert linux node number to the UV blade number.
- * .. Currently for UV1 thru UV4 the node and the blade are identical.
- * .. If this changes then you MUST check references to this function!
+ * .. Currently for UV2 thru UV4 the node and the blade are identical.
+ * .. UV5 needs conversion when sub-numa clustering is enabled.
*/
static inline int uv_node_to_blade_id(int nid)
{
- return nid;
+ unsigned short *n2s = uv_hub_info->node_to_socket;
+
+ return n2s ? n2s[nid] : nid;
}
-/* Convert a cpu number to the the UV blade number */
+/* Convert a CPU number to the UV blade number */
static inline int uv_cpu_to_blade_id(int cpu)
{
- return uv_node_to_blade_id(cpu_to_node(cpu));
+ return uv_cpu_hub_info(cpu)->numa_blade_id;
}
/* Convert a blade id to the PNODE of the blade */
static inline int uv_blade_to_pnode(int bid)
{
- return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
+ unsigned short *s2p = uv_hub_info->socket_to_pnode;
+
+ return s2p ? s2p[bid] : bid;
}
/* Nid of memory node on blade. -1 if no blade-local memory */
@@ -801,7 +737,7 @@ extern void uv_nmi_setup_hubless(void);
#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */
#define UVH_TSC_SYNC_MASK 3 /* 0011 */
#define UVH_TSC_SYNC_VALID 3 /* 0011 */
-#define UVH_TSC_SYNC_INVALID 2 /* 0010 */
+#define UVH_TSC_SYNC_UNKNOWN 0 /* 0000 */
/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR
@@ -809,19 +745,6 @@ extern void uv_nmi_setup_hubless(void);
#define UVH_NMI_MMR_SHIFT 63
#define UVH_NMI_MMR_TYPE "SCRATCH5"
-/* Newer SMM NMI handler, not present in all systems */
-#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
-#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
-#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
-
-/* Non-zero indicates newer SMM NMI handler present */
-#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
-
-/* Indicates to BIOS that we want to use the newer SMM NMI handler */
-#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2
-#define UVH_NMI_MMRX_REQ_SHIFT 62
-
struct uv_hub_nmi_s {
raw_spinlock_t nmi_lock;
atomic_t in_nmi; /* flag this node in UV NMI IRQ */
@@ -853,51 +776,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
#define UV_NMI_STATE_DUMP 2
#define UV_NMI_STATE_DUMP_DONE 3
-/* Update SCIR state */
-static inline void uv_set_scir_bits(unsigned char value)
-{
- if (uv_scir_info->state != value) {
- uv_scir_info->state = value;
- uv_write_local_mmr8(uv_scir_info->offset, value);
- }
-}
-
-static inline unsigned long uv_scir_offset(int apicid)
-{
- return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
-}
-
-static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
-{
- if (uv_cpu_scir_info(cpu)->state != value) {
- uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
- uv_cpu_scir_info(cpu)->offset, value);
- uv_cpu_scir_info(cpu)->state = value;
- }
-}
-
-extern unsigned int uv_apicid_hibits;
-static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
-{
- apicid |= uv_apicid_hibits;
- return (1UL << UVH_IPI_INT_SEND_SHFT) |
- ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
- (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
- (vector << UVH_IPI_INT_VECTOR_SHFT);
-}
-
-static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
-{
- unsigned long val;
- unsigned long dmode = dest_Fixed;
-
- if (vector == NMI_VECTOR)
- dmode = dest_NMI;
-
- val = uv_hub_ipi_value(apicid, vector, dmode);
- uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
-
/*
* Get the minimum revision number of the hub chips within the partition.
* (See UVx_HUB_REVISION_BASE above for specific values.)
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c760622..1876b5edd142 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,7 +31,6 @@ enum {
UV_AFFINITY_CPU
};
-extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 62c79e26a59a..bb45812889dd 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3,8 +3,9 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * SGI UV MMR definitions
+ * HPE UV MMR definitions
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
*/
@@ -18,58 +19,43 @@
* grouped by architecture types.
*
* UVH - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
- * UV1H - definitions specific to UV type 1 hub.
- * UV2H - definitions specific to UV type 2 hub.
- * UV3H - definitions specific to UV type 3 hub.
+ * UVXH - definitions common to UVX class (2, 3, 4).
+ * UVYH - definitions common to UVY class (5).
+ * UV5H - definitions specific to UV type 5 hub.
+ * UV4AH - definitions specific to UV type 4A hub.
* UV4H - definitions specific to UV type 4 hub.
- *
- * So in general, MMR addresses and structures are identical on all hubs types.
- * These MMRs are identified as:
- * #define UVH_xxx <address>
- * union uvh_xxx {
- * unsigned long v;
- * struct uvh_int_cmpd_s {
- * } s;
- * };
+ * UV3H - definitions specific to UV type 3 hub.
+ * UV2H - definitions specific to UV type 2 hub.
*
* If the MMR exists on all hub types but have different addresses,
- * use a conditional operator to define the value at runtime.
- * #define UV1Hxxx a
- * #define UV2Hxxx b
- * #define UV3Hxxx c
- * #define UV4Hxxx d
- * #define UV4AHxxx e
- * #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
- * (is_uv2_hub() ? UV2Hxxx :
- * (is_uv3_hub() ? UV3Hxxx :
- * (is_uv4a_hub() ? UV4AHxxx :
- * UV4Hxxx))
+ * use a conditional operator to define the value at runtime. Any
+ * that are not defined are blank.
+ * (UV4A variations only generated if different from uv4)
+ * #define UVHxxx (
+ * is_uv(UV5) ? UV5Hxxx value :
+ * is_uv(UV4A) ? UV4AHxxx value :
+ * is_uv(UV4) ? UV4Hxxx value :
+ * is_uv(UV3) ? UV3Hxxx value :
+ * is_uv(UV2) ? UV2Hxxx value :
+ * <ucv> or <undef value>)
*
- * If the MMR exists on all hub types > 1 but have different addresses, the
- * variation using "UVX" as the prefix exists.
- * #define UV2Hxxx b
- * #define UV3Hxxx c
- * #define UV4Hxxx d
- * #define UV4AHxxx e
- * #define UVHxxx (is_uv2_hub() ? UV2Hxxx :
- * (is_uv3_hub() ? UV3Hxxx :
- * (is_uv4a_hub() ? UV4AHxxx :
- * UV4Hxxx))
+ * Class UVX has UVs (2|3|4|4A).
+ * Class UVY has UVs (5).
*
* union uvh_xxx {
* unsigned long v;
* struct uvh_xxx_s { # Common fields only
* } s;
- * struct uv1h_xxx_s { # Full UV1 definition (*)
- * } s1;
- * struct uv2h_xxx_s { # Full UV2 definition (*)
- * } s2;
- * struct uv3h_xxx_s { # Full UV3 definition (*)
- * } s3;
- * (NOTE: No struct uv4ah_xxx_s members exist)
+ * struct uv5h_xxx_s { # Full UV5 definition (*)
+ * } s5;
+ * struct uv4ah_xxx_s { # Full UV4A definition (*)
+ * } s4a;
* struct uv4h_xxx_s { # Full UV4 definition (*)
* } s4;
+ * struct uv3h_xxx_s { # Full UV3 definition (*)
+ * } s3;
+ * struct uv2h_xxx_s { # Full UV2 definition (*)
+ * } s2;
* };
* (* - if present and different than the common struct)
*
@@ -78,18 +64,22 @@
* if the contents is the same for all hubs, only the "s" structure is
* generated.
*
- * If the MMR exists on ONLY 1 type of hub, no generic definition is
- * generated:
- * #define UVnH_xxx <uvn address>
- * union uvnh_xxx {
- * unsigned long v;
- * struct uvh_int_cmpd_s {
- * } sn;
- * };
- *
- * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
+ * (GEN Flags: undefs=function)
*/
+ /* UV bit masks */
+#define UV2 (1 << 0)
+#define UV3 (1 << 1)
+#define UV4 (1 << 2)
+#define UV4A (1 << 3)
+#define UV5 (1 << 4)
+#define UVX (UV2|UV3|UV4)
+#define UVY (UV5)
+#define UV_ANY (~0)
+
+
+
+
#define UV_MMR_ENABLE (1UL << 63)
#define UV1_HUB_PART_NUMBER 0x88a5
@@ -98,533 +88,475 @@
#define UV3_HUB_PART_NUMBER 0x9578
#define UV3_HUB_PART_NUMBER_X 0x4321
#define UV4_HUB_PART_NUMBER 0x99a1
-
-/* Compat: Indicate which UV Hubs are supported. */
-#define UV1_HUB_IS_SUPPORTED 1
-#define UV2_HUB_IS_SUPPORTED 1
-#define UV3_HUB_IS_SUPPORTED 1
-#define UV4_HUB_IS_SUPPORTED 1
-#define UV4A_HUB_IS_SUPPORTED 1
+#define UV5_HUB_PART_NUMBER 0xa171
/* Error function to catch undefined references */
extern unsigned long uv_undefined(char *str);
/* ========================================================================= */
-/* UVH_BAU_DATA_BROADCAST */
-/* ========================================================================= */
-#define UVH_BAU_DATA_BROADCAST 0x61688UL
-
-#define UV1H_BAU_DATA_BROADCAST_32 0x440
-#define UV2H_BAU_DATA_BROADCAST_32 0x440
-#define UV3H_BAU_DATA_BROADCAST_32 0x440
-#define UV4H_BAU_DATA_BROADCAST_32 0x360
-#define UVH_BAU_DATA_BROADCAST_32 ( \
- is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 : \
- is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 : \
- is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 : \
- /*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
-
-#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
-#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
-
-
-union uvh_bau_data_broadcast_u {
- unsigned long v;
- struct uvh_bau_data_broadcast_s {
- unsigned long enable:1; /* RW */
- unsigned long rsvd_1_63:63;
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_BAU_DATA_CONFIG */
-/* ========================================================================= */
-#define UVH_BAU_DATA_CONFIG 0x61680UL
-
-#define UV1H_BAU_DATA_CONFIG_32 0x438
-#define UV2H_BAU_DATA_CONFIG_32 0x438
-#define UV3H_BAU_DATA_CONFIG_32 0x438
-#define UV4H_BAU_DATA_CONFIG_32 0x358
-#define UVH_BAU_DATA_CONFIG_32 ( \
- is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 : \
- is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 : \
- is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 : \
- /*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
-
-#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
-#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
-#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
-#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
-#define UVH_BAU_DATA_CONFIG_P_SHFT 13
-#define UVH_BAU_DATA_CONFIG_T_SHFT 15
-#define UVH_BAU_DATA_CONFIG_M_SHFT 16
-#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
-#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
-
-
-union uvh_bau_data_config_u {
- unsigned long v;
- struct uvh_bau_data_config_s {
- unsigned long vector_:8; /* RW */
- unsigned long dm:3; /* RW */
- unsigned long destmode:1; /* RW */
- unsigned long status:1; /* RO */
- unsigned long p:1; /* RO */
- unsigned long rsvd_14:1;
- unsigned long t:1; /* RO */
- unsigned long m:1; /* RW */
- unsigned long rsvd_17_31:15;
- unsigned long apic_id:32; /* RW */
- } s;
-};
-
-/* ========================================================================= */
/* UVH_EVENT_OCCURRED0 */
/* ========================================================================= */
#define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x5e8
+/* UVH common defines*/
#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
-
-#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
-#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
-#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3
-#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4
-#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5
-#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6
-#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
-#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
-#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
-#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
-#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
-#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
-#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
-#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
-#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
-#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
-#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
-#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
-#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
-#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
-#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
-#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43
-#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
-#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
-#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
-#define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51
-#define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52
-#define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53
-#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54
-#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55
-#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
-#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
-#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
-#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
-#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
-#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
-#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
-#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
-#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
-#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
-#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
-#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
-#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
-#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
-#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
-#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
-#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
-#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
-#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
-#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
-#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
-#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
-#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
-#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
-#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
-#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
-#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
-#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+/* UVXH common defines */
#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
-#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
-#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
-#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
-#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
-#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
-#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
-#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
-#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
-
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
-
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT 1
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT 2
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT 3
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT 4
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT 5
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT 6
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT 7
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT 8
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT 9
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT 10
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT 11
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT 12
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT 13
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT 14
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT 15
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 16
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 17
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT 18
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT 19
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT 20
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT 21
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT 22
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT 23
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 24
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 25
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT 26
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT 27
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT 28
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT 29
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 30
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 31
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT 32
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT 33
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK 0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT 34
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT 35
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK 0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT 36
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK 0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT 37
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK 0x0000002000000000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 38
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000004000000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 39
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000008000000000UL
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 40
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000010000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 41
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000020000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 42
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000040000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 43
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000080000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 44
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000100000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 45
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000200000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 46
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000400000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 47
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000800000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 48
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0001000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 49
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0002000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 50
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0004000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 51
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0008000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 52
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0010000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 53
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0020000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 54
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0040000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 55
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0080000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 56
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0100000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 57
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0200000000000000UL
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 58
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0400000000000000UL
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 59
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0800000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 60
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x1000000000000000UL
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 61
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x2000000000000000UL
+
+/* UV4 unique defines */
#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1
-#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22
-#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23
-#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24
-#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25
-#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26
-#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27
-#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28
-#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29
-#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36
-#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53
-#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54
-#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55
-#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56
-#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57
-#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58
-#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63
#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10
#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17
#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18
#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19
#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20
#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21
#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22
#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23
#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24
#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25
#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26
#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27
#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28
#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29
#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30
#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31
#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32
#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33
#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34
#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35
#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36
#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37
#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54
#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55
#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56
#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57
#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58
#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59
#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60
#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61
#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62
#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63
#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK ( \
+ is_uv(UV4) ? 0x1000000000000000UL : \
+ is_uv(UV3) ? 0x0040000000000000UL : \
+ is_uv(UV2) ? 0x0040000000000000UL : \
+ 0)
#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \
- is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+ is_uv(UV4) ? 60 : \
+ is_uv(UV3) ? 54 : \
+ is_uv(UV2) ? 54 : \
+ -1)
union uvh_event_occurred0_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_event_occurred0_s {
- unsigned long lb_hcerr:1; /* RW, W1C */
- unsigned long rsvd_1_10:10;
- unsigned long rh_aoerr0:1; /* RW, W1C */
- unsigned long rsvd_12_63:52;
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long rsvd_1_63:63;
} s;
+
+ /* UVXH common struct */
struct uvxh_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
unsigned long rsvd_1:1;
@@ -645,6 +577,142 @@ union uvh_event_occurred0_u {
unsigned long xb_aoerr0:1; /* RW */
unsigned long rsvd_17_63:47;
} sx;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long kt_hcerr:1; /* RW */
+ unsigned long rh0_hcerr:1; /* RW */
+ unsigned long rh1_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long lh2_hcerr:1; /* RW */
+ unsigned long lh3_hcerr:1; /* RW */
+ unsigned long xb_hcerr:1; /* RW */
+ unsigned long rdm_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long kt_aoerr0:1; /* RW */
+ unsigned long rh0_aoerr0:1; /* RW */
+ unsigned long rh1_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long lh2_aoerr0:1; /* RW */
+ unsigned long lh3_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rdm_aoerr0:1; /* RW */
+ unsigned long rt0_aoerr0:1; /* RW */
+ unsigned long rt1_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long kt_aoerr1:1; /* RW */
+ unsigned long rh0_aoerr1:1; /* RW */
+ unsigned long rh1_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long lh2_aoerr1:1; /* RW */
+ unsigned long lh3_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rdm_aoerr1:1; /* RW */
+ unsigned long rt0_aoerr1:1; /* RW */
+ unsigned long rt1_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long rsvd_62_63:2;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long kt_hcerr:1; /* RW */
+ unsigned long rh0_hcerr:1; /* RW */
+ unsigned long rh1_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long lh2_hcerr:1; /* RW */
+ unsigned long lh3_hcerr:1; /* RW */
+ unsigned long xb_hcerr:1; /* RW */
+ unsigned long rdm_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long kt_aoerr0:1; /* RW */
+ unsigned long rh0_aoerr0:1; /* RW */
+ unsigned long rh1_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long lh2_aoerr0:1; /* RW */
+ unsigned long lh3_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rdm_aoerr0:1; /* RW */
+ unsigned long rt0_aoerr0:1; /* RW */
+ unsigned long rt1_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long kt_aoerr1:1; /* RW */
+ unsigned long rh0_aoerr1:1; /* RW */
+ unsigned long rh1_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long lh2_aoerr1:1; /* RW */
+ unsigned long lh3_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rdm_aoerr1:1; /* RW */
+ unsigned long rt0_aoerr1:1; /* RW */
+ unsigned long rt1_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long rsvd_62_63:2;
+ } s5;
+
+ /* UV4 unique struct */
struct uv4h_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
unsigned long kt_hcerr:1; /* RW */
@@ -711,13 +779,1355 @@ union uvh_event_occurred0_u {
unsigned long extio_int2:1; /* RW */
unsigned long extio_int3:1; /* RW */
} s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long qp_hcerr:1; /* RW */
+ unsigned long rh_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long gr0_hcerr:1; /* RW */
+ unsigned long gr1_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long qp_aoerr0:1; /* RW */
+ unsigned long rh_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long gr0_aoerr0:1; /* RW */
+ unsigned long gr1_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rt_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long qp_aoerr1:1; /* RW */
+ unsigned long rh_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long gr0_aoerr1:1; /* RW */
+ unsigned long gr1_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rt_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long rsvd_59_63:5;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long qp_hcerr:1; /* RW */
+ unsigned long rh_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long gr0_hcerr:1; /* RW */
+ unsigned long gr1_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long qp_aoerr0:1; /* RW */
+ unsigned long rh_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long gr0_aoerr0:1; /* RW */
+ unsigned long gr1_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rt_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long qp_aoerr1:1; /* RW */
+ unsigned long rh_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long gr0_aoerr1:1; /* RW */
+ unsigned long gr1_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rt_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long rsvd_59_63:5;
+ } s2;
};
/* ========================================================================= */
/* UVH_EVENT_OCCURRED0_ALIAS */
/* ========================================================================= */
#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
+
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED1 */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1 0x70080UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT 0
+#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK 0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT 1
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT 2
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT 3
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT 4
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT 5
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT 6
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT 7
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT 8
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT 9
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT 10
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT 11
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT 12
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT 13
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT 14
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT 15
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT 16
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT 17
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT 18
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT 19
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT 20
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT 21
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT 22
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT 23
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT 24
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT 25
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT 26
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT 27
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT 28
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT 29
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT 30
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT 31
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT 32
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT 33
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK 0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT 34
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK 0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT 35
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK 0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT 36
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK 0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT 37
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK 0x0000002000000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT 0
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT 1
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT 2
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 3
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 4
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 5
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 6
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 7
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 8
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 9
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 10
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 11
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 12
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 13
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 14
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 15
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 16
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 17
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 18
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT 19
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT 20
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT 21
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT 22
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT 23
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT 24
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT 25
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT 26
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 27
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 28
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 29
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 30
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 31
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 32
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 33
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 34
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 35
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 36
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 37
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 38
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 39
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 40
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 41
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 42
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT 43
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT 44
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT 45
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT 46
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT 47
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT 48
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT 49
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK 0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT 50
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK 0x0004000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT 0
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT 0
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL
+
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK ( \
+ is_uv(UV5) ? 0x0000000000000002UL : \
+ 0)
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT ( \
+ is_uv(UV5) ? 1 : \
+ -1)
+
+union uvyh_event_occurred1_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred1_s {
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long xh_tlb_int0:1; /* RW */
+ unsigned long xh_tlb_int1:1; /* RW */
+ unsigned long xh_tlb_int2:1; /* RW */
+ unsigned long xh_tlb_int3:1; /* RW */
+ unsigned long xh_tlb_int4:1; /* RW */
+ unsigned long xh_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int0:1; /* RW */
+ unsigned long rdm_tlb_int1:1; /* RW */
+ unsigned long rdm_tlb_int2:1; /* RW */
+ unsigned long rdm_tlb_int3:1; /* RW */
+ unsigned long rdm_tlb_int4:1; /* RW */
+ unsigned long rdm_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int6:1; /* RW */
+ unsigned long rdm_tlb_int7:1; /* RW */
+ unsigned long rdm_tlb_int8:1; /* RW */
+ unsigned long rdm_tlb_int9:1; /* RW */
+ unsigned long rdm_tlb_int10:1; /* RW */
+ unsigned long rdm_tlb_int11:1; /* RW */
+ unsigned long rdm_tlb_int12:1; /* RW */
+ unsigned long rdm_tlb_int13:1; /* RW */
+ unsigned long rdm_tlb_int14:1; /* RW */
+ unsigned long rdm_tlb_int15:1; /* RW */
+ unsigned long rdm_tlb_int16:1; /* RW */
+ unsigned long rdm_tlb_int17:1; /* RW */
+ unsigned long rdm_tlb_int18:1; /* RW */
+ unsigned long rdm_tlb_int19:1; /* RW */
+ unsigned long rdm_tlb_int20:1; /* RW */
+ unsigned long rdm_tlb_int21:1; /* RW */
+ unsigned long rdm_tlb_int22:1; /* RW */
+ unsigned long rdm_tlb_int23:1; /* RW */
+ unsigned long rsvd_38_63:26;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred1_s {
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long xh_tlb_int0:1; /* RW */
+ unsigned long xh_tlb_int1:1; /* RW */
+ unsigned long xh_tlb_int2:1; /* RW */
+ unsigned long xh_tlb_int3:1; /* RW */
+ unsigned long xh_tlb_int4:1; /* RW */
+ unsigned long xh_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int0:1; /* RW */
+ unsigned long rdm_tlb_int1:1; /* RW */
+ unsigned long rdm_tlb_int2:1; /* RW */
+ unsigned long rdm_tlb_int3:1; /* RW */
+ unsigned long rdm_tlb_int4:1; /* RW */
+ unsigned long rdm_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int6:1; /* RW */
+ unsigned long rdm_tlb_int7:1; /* RW */
+ unsigned long rdm_tlb_int8:1; /* RW */
+ unsigned long rdm_tlb_int9:1; /* RW */
+ unsigned long rdm_tlb_int10:1; /* RW */
+ unsigned long rdm_tlb_int11:1; /* RW */
+ unsigned long rdm_tlb_int12:1; /* RW */
+ unsigned long rdm_tlb_int13:1; /* RW */
+ unsigned long rdm_tlb_int14:1; /* RW */
+ unsigned long rdm_tlb_int15:1; /* RW */
+ unsigned long rdm_tlb_int16:1; /* RW */
+ unsigned long rdm_tlb_int17:1; /* RW */
+ unsigned long rdm_tlb_int18:1; /* RW */
+ unsigned long rdm_tlb_int19:1; /* RW */
+ unsigned long rdm_tlb_int20:1; /* RW */
+ unsigned long rdm_tlb_int21:1; /* RW */
+ unsigned long rdm_tlb_int22:1; /* RW */
+ unsigned long rdm_tlb_int23:1; /* RW */
+ unsigned long rsvd_38_63:26;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_event_occurred1_s {
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr0_tlb_int16:1; /* RW */
+ unsigned long gr0_tlb_int17:1; /* RW */
+ unsigned long gr0_tlb_int18:1; /* RW */
+ unsigned long gr0_tlb_int19:1; /* RW */
+ unsigned long gr0_tlb_int20:1; /* RW */
+ unsigned long gr0_tlb_int21:1; /* RW */
+ unsigned long gr0_tlb_int22:1; /* RW */
+ unsigned long gr0_tlb_int23:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int16:1; /* RW */
+ unsigned long gr1_tlb_int17:1; /* RW */
+ unsigned long gr1_tlb_int18:1; /* RW */
+ unsigned long gr1_tlb_int19:1; /* RW */
+ unsigned long gr1_tlb_int20:1; /* RW */
+ unsigned long gr1_tlb_int21:1; /* RW */
+ unsigned long gr1_tlb_int22:1; /* RW */
+ unsigned long gr1_tlb_int23:1; /* RW */
+ unsigned long rsvd_51_63:13;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred1_s {
+ unsigned long bau_data:1; /* RW */
+ unsigned long power_management_req:1; /* RW */
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rsvd_52_63:12;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred1_s {
+ unsigned long bau_data:1; /* RW */
+ unsigned long power_management_req:1; /* RW */
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rsvd_52_63:12;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED1_ALIAS */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL
+
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED2 */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2 0x70100UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 0
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 1
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT 2
+#define UVYH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT 3
+#define UVYH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT 4
+#define UVYH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT 5
+#define UVYH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT 6
+#define UVYH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT 7
+#define UVYH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT 8
+#define UVYH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT 9
+#define UVYH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT 10
+#define UVYH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT 11
+#define UVYH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT 12
+#define UVYH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT 13
+#define UVYH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT 14
+#define UVYH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT 15
+#define UVYH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT 16
+#define UVYH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT 17
+#define UVYH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT 18
+#define UVYH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT 19
+#define UVYH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT 20
+#define UVYH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT 21
+#define UVYH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT 22
+#define UVYH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT 23
+#define UVYH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT 24
+#define UVYH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT 25
+#define UVYH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT 26
+#define UVYH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT 27
+#define UVYH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT 28
+#define UVYH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT 29
+#define UVYH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT 30
+#define UVYH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT 31
+#define UVYH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT 32
+#define UVYH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT 33
+#define UVYH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000200000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+#define UVH_EVENT_OCCURRED2_RTC_1_MASK ( \
+ is_uv(UV5) ? 0x0000000000000008UL : \
+ is_uv(UV4) ? 0x0000000000080000UL : \
+ is_uv(UV3) ? 0x0000000000000002UL : \
+ is_uv(UV2) ? 0x0000000000000002UL : \
+ 0)
+#define UVH_EVENT_OCCURRED2_RTC_1_SHFT ( \
+ is_uv(UV5) ? 3 : \
+ is_uv(UV4) ? 19 : \
+ is_uv(UV3) ? 1 : \
+ is_uv(UV2) ? 1 : \
+ -1)
+
+union uvyh_event_occurred2_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred2_s {
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_34_63:30;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred2_s {
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_34_63:30;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_event_occurred2_s {
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_50_63:14;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred2_s {
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_32_63:32;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred2_s {
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_32_63:32;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED2_ALIAS */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL
/* ========================================================================= */
@@ -725,53 +2135,148 @@ union uvh_event_occurred0_u {
/* ========================================================================= */
#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
-#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
-#define UVH_EXTIO_INT0_BROADCAST_32 ( \
- is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 : \
- is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 : \
- is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 : \
- /*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
-
+/* UVH common defines*/
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL
union uvh_extio_int0_broadcast_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_extio_int0_broadcast_s {
unsigned long enable:1; /* RW */
unsigned long rsvd_1_63:63;
} s;
+
+ /* UV5 unique struct */
+ struct uv5h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_GR0_GAM_GR_CONFIG */
+/* ========================================================================= */
+#define UVH_GR0_GAM_GR_CONFIG ( \
+ is_uv(UV5) ? 0x600028UL : \
+ is_uv(UV4) ? 0x600028UL : \
+ is_uv(UV3) ? 0xc00028UL : \
+ is_uv(UV2) ? 0xc00028UL : \
+ 0)
+
+
+
+/* UVYH common defines */
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV4 unique defines */
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV3 unique defines */
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV2 unique defines */
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT 0
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK 0x000000000000000fUL
+
+
+union uvyh_gr0_gam_gr_config_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr0_gam_gr_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long undef_6_9:4; /* Undefined */
+ unsigned long subspace:1; /* RW */
+ unsigned long reserved:53;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr0_gam_gr_config_s {
+ unsigned long n_gr:4; /* RW */
+ unsigned long reserved:60;
+ } s2;
};
/* ========================================================================= */
/* UVH_GR0_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
-
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
-#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
-#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
-#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
-#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+#define UVH_GR0_TLB_INT0_CONFIG ( \
+ is_uv(UV4) ? 0x61b00UL : \
+ is_uv(UV3) ? 0x61b00UL : \
+ is_uv(UV2) ? 0x61b00UL : \
+ uv_undefined("UVH_GR0_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13
+#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15
+#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16
+#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr0_tlb_int0_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr0_tlb_int0_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -784,33 +2289,97 @@ union uvh_gr0_tlb_int0_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
+
+ /* UVXH common struct */
+ struct uvxh_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s2;
};
/* ========================================================================= */
/* UVH_GR0_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
-
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
-#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
-#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
-#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
-#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+#define UVH_GR0_TLB_INT1_CONFIG ( \
+ is_uv(UV4) ? 0x61b40UL : \
+ is_uv(UV3) ? 0x61b40UL : \
+ is_uv(UV2) ? 0x61b40UL : \
+ uv_undefined("UVH_GR0_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13
+#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15
+#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16
+#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr0_tlb_int1_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr0_tlb_int1_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -823,466 +2392,97 @@ union uvh_gr0_tlb_int1_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_CONTROL */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
-#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
-#define UVH_GR0_TLB_MMR_CONTROL ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-
-#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60
-#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
-
-union uvh_gr0_tlb_mmr_control_u {
- unsigned long v;
- struct uvh_gr0_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_48:17;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
- } s;
- struct uv1h_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_47:16;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53:1;
- unsigned long mmr_inj_tlbpgsize:1; /* RW */
- unsigned long rsvd_55:1;
- unsigned long mmr_inj_tlbrreg:1; /* RW */
- unsigned long rsvd_57_59:3;
- unsigned long mmr_inj_tlblruv:1; /* RW */
- unsigned long rsvd_61_63:3;
- } s1;
- struct uvxh_gr0_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long rsvd_48:1;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
+ /* UVXH common struct */
+ struct uvxh_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} sx;
- struct uv2h_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53_63:11;
- } s2;
- struct uv3h_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long undef_52:1; /* Undefined */
- unsigned long rsvd_53_63:11;
- } s3;
- struct uv4h_gr0_tlb_mmr_control_s {
- unsigned long index:13; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_15:1;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_58:7;
- unsigned long page_size:5; /* RW */
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s4;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_READ_DATA_HI */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_hi_u {
- unsigned long v;
- struct uv1h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s1;
- struct uv2h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s2;
- struct uv3h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_46_54:9; /* Undefined */
- unsigned long way_ecc:9; /* RO */
+ /* UV3 unique struct */
+ struct uv3h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s3;
- struct uv4h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:34; /* RO */
- unsigned long pnid:15; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_54:1; /* Undefined */
- unsigned long way_ecc:9; /* RO */
- } s4;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_READ_DATA_LO */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO ( \
- is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_lo_u {
- unsigned long v;
- struct uvh_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s;
- struct uv1h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s1;
- struct uvxh_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } sx;
- struct uv2h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
+ /* UV2 unique struct */
+ struct uv2h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s3;
- struct uv4h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s4;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
#define UVH_GR1_TLB_INT0_CONFIG ( \
- is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG : \
- is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG : \
- is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
-
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
-#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
-#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
-#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
-#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+ is_uv(UV4) ? 0x62100UL : \
+ is_uv(UV3) ? 0x61f00UL : \
+ is_uv(UV2) ? 0x61f00UL : \
+ uv_undefined("UVH_GR1_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13
+#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15
+#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16
+#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr1_tlb_int0_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr1_tlb_int0_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -1295,41 +2495,97 @@ union uvh_gr1_tlb_int0_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
+
+ /* UVXH common struct */
+ struct uvxh_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s2;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
#define UVH_GR1_TLB_INT1_CONFIG ( \
- is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG : \
- is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG : \
- is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
-
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
-#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
-#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
-#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
-#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+ is_uv(UV4) ? 0x62140UL : \
+ is_uv(UV3) ? 0x61f40UL : \
+ is_uv(UV2) ? 0x61f40UL : \
+ uv_undefined("UVH_GR1_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13
+#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15
+#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16
+#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr1_tlb_int1_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr1_tlb_int1_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -1342,416 +2598,62 @@ union uvh_gr1_tlb_int1_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_CONTROL */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
-#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
-#define UVH_GR1_TLB_MMR_CONTROL ( \
- is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
-
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-
-#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60
-#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
-
-
-union uvh_gr1_tlb_mmr_control_u {
- unsigned long v;
- struct uvh_gr1_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_48:17;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
- } s;
- struct uv1h_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_47:16;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53:1;
- unsigned long mmr_inj_tlbpgsize:1; /* RW */
- unsigned long rsvd_55:1;
- unsigned long mmr_inj_tlbrreg:1; /* RW */
- unsigned long rsvd_57_59:3;
- unsigned long mmr_inj_tlblruv:1; /* RW */
- unsigned long rsvd_61_63:3;
- } s1;
- struct uvxh_gr1_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long rsvd_48:1;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
+ /* UVXH common struct */
+ struct uvxh_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} sx;
- struct uv2h_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53_63:11;
- } s2;
- struct uv3h_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long undef_52:1; /* Undefined */
- unsigned long rsvd_53_63:11;
- } s3;
- struct uv4h_gr1_tlb_mmr_control_s {
- unsigned long index:13; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_15:1;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_58:7;
- unsigned long page_size:5; /* RW */
+
+ /* UV4 unique struct */
+ struct uv4h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s4;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_READ_DATA_HI */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI ( \
- is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_hi_u {
- unsigned long v;
- struct uv1h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s1;
- struct uv2h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s2;
- struct uv3h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_46_54:9; /* Undefined */
- unsigned long way_ecc:9; /* RO */
+ /* UV3 unique struct */
+ struct uv3h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s3;
- struct uv4h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:34; /* RO */
- unsigned long pnid:15; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_54:1; /* Undefined */
- unsigned long way_ecc:9; /* RO */
- } s4;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_READ_DATA_LO */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO ( \
- is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_lo_u {
- unsigned long v;
- struct uvh_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s;
- struct uv1h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s1;
- struct uvxh_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } sx;
- struct uv2h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
+ /* UV2 unique struct */
+ struct uv2h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s3;
- struct uv4h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s4;
};
/* ========================================================================= */
@@ -1759,58 +2661,43 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
/* ========================================================================= */
#define UVH_INT_CMPB 0x22080UL
+/* UVH common defines*/
#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
union uvh_int_cmpb_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_int_cmpb_s {
unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
} s;
-};
-
-/* ========================================================================= */
-/* UVH_INT_CMPC */
-/* ========================================================================= */
-#define UVH_INT_CMPC 0x22100UL
-
-
-#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0
-#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
-
-
-union uvh_int_cmpc_u {
- unsigned long v;
- struct uvh_int_cmpc_s {
- unsigned long real_time_cmpc:56; /* RW */
+ /* UV5 unique struct */
+ struct uv5h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_INT_CMPD */
-/* ========================================================================= */
-#define UVH_INT_CMPD 0x22180UL
-
-
-#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0
-#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+ } s5;
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
+ /* UV4 unique struct */
+ struct uv4h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s4;
+ /* UV3 unique struct */
+ struct uv3h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s3;
-union uvh_int_cmpd_u {
- unsigned long v;
- struct uvh_int_cmpd_s {
- unsigned long real_time_cmpd:56; /* RW */
+ /* UV2 unique struct */
+ struct uv2h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
- } s;
+ } s2;
};
/* ========================================================================= */
@@ -1818,30 +2705,23 @@ union uvh_int_cmpd_u {
/* ========================================================================= */
#define UVH_IPI_INT 0x60500UL
-#define UV1H_IPI_INT_32 0x348
-#define UV2H_IPI_INT_32 0x348
-#define UV3H_IPI_INT_32 0x348
-#define UV4H_IPI_INT_32 0x268
-#define UVH_IPI_INT_32 ( \
- is_uv1_hub() ? UV1H_IPI_INT_32 : \
- is_uv2_hub() ? UV2H_IPI_INT_32 : \
- is_uv3_hub() ? UV3H_IPI_INT_32 : \
- /*is_uv4_hub*/ UV4H_IPI_INT_32)
-
+/* UVH common defines*/
#define UVH_IPI_INT_VECTOR_SHFT 0
-#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
-#define UVH_IPI_INT_DESTMODE_SHFT 11
-#define UVH_IPI_INT_APIC_ID_SHFT 16
-#define UVH_IPI_INT_SEND_SHFT 63
#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
#define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL
+#define UVH_IPI_INT_DESTMODE_SHFT 11
#define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_IPI_INT_APIC_ID_SHFT 16
#define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL
+#define UVH_IPI_INT_SEND_SHFT 63
#define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL
union uvh_ipi_int_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_ipi_int_s {
unsigned long vector_:8; /* RW */
unsigned long delivery_mode:3; /* RW */
@@ -1851,1243 +2731,733 @@ union uvh_ipi_int_u {
unsigned long rsvd_48_62:15;
unsigned long send:1; /* WP */
} s;
+
+ /* UV5 unique struct */
+ struct uv5h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s2;
};
/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
+/* UVH_NODE_ID */
/* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST ( \
- is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
+#define UVH_NODE_ID 0x0UL
+/* UVH common defines*/
+#define UVH_NODE_ID_FORCE1_SHFT 0
+#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT 1
+#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT 12
+#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT 28
+#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT 32
+#define UVH_NODE_ID_NI_PORT_SHFT 57
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UVXH common defines */
+#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
+#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
+#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+/* UVYH common defines */
+#define UVYH_NODE_ID_NODE_ID_MASK 0x0000007f00000000UL
+#define UVYH_NODE_ID_NI_PORT_MASK 0x7e00000000000000UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UV4 unique defines */
+#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48
+#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_SHFT 49
+#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UV3 unique defines */
+#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
+#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV3H_NODE_ID_RESERVED_2_SHFT 49
+#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-union uvh_lb_bau_intd_payload_queue_first_u {
+union uvh_node_id_u {
unsigned long v;
- struct uv1h_lb_bau_intd_payload_queue_first_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_48:6;
- unsigned long node_id:14; /* RW */
- unsigned long rsvd_63:1;
- } s1;
- struct uv2h_lb_bau_intd_payload_queue_first_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_48:6;
- unsigned long node_id:14; /* RW */
+
+ /* UVH common struct */
+ struct uvh_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long rsvd_32_63:32;
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47_49:3;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } sx;
+
+ /* UVYH common struct */
+ struct uvyh_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:7; /* RW */
+ unsigned long rsvd_39_56:18;
+ unsigned long ni_port:6; /* RO */
unsigned long rsvd_63:1;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_first_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_48:6;
- unsigned long node_id:14; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:7; /* RW */
+ unsigned long rsvd_39_56:18;
+ unsigned long ni_port:6; /* RO */
unsigned long rsvd_63:1;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47:1;
+ unsigned long router_select:1; /* RO */
+ unsigned long rsvd_49:1;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47:1;
+ unsigned long router_select:1; /* RO */
+ unsigned long rsvd_49:1;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
} s3;
+
+ /* UV2 unique struct */
+ struct uv2h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47_49:3;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } s2;
};
/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
+/* UVH_NODE_PRESENT_0 */
/* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST ( \
- is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
+#define UVH_NODE_PRESENT_0 ( \
+ is_uv(UV5) ? 0x1400UL : \
+ 0)
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_0_NODES_SHFT 0
+#define UVYH_NODE_PRESENT_0_NODES_MASK 0xffffffffffffffffUL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+union uvh_node_present_0_u {
+ unsigned long v;
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+ /* UVH common struct */
+ struct uvh_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } s;
+ /* UVYH common struct */
+ struct uvyh_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } sy;
-union uvh_lb_bau_intd_payload_queue_last_u {
- unsigned long v;
- struct uv1h_lb_bau_intd_payload_queue_last_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s1;
- struct uv2h_lb_bau_intd_payload_queue_last_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_last_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s3;
+ /* UV5 unique struct */
+ struct uv5h_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
+/* UVH_NODE_PRESENT_1 */
/* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL ( \
- is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
+#define UVH_NODE_PRESENT_1 ( \
+ is_uv(UV5) ? 0x1408UL : \
+ 0)
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_1_NODES_SHFT 0
+#define UVYH_NODE_PRESENT_1_NODES_MASK 0xffffffffffffffffUL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+union uvh_node_present_1_u {
+ unsigned long v;
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+ /* UVH common struct */
+ struct uvh_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } s;
+ /* UVYH common struct */
+ struct uvyh_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } sy;
-union uvh_lb_bau_intd_payload_queue_tail_u {
- unsigned long v;
- struct uv1h_lb_bau_intd_payload_queue_tail_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s1;
- struct uv2h_lb_bau_intd_payload_queue_tail_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_tail_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s3;
+ /* UV5 unique struct */
+ struct uv5h_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
+/* UVH_NODE_PRESENT_TABLE */
/* ========================================================================= */
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE ( \
- is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
-
-
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-union uvh_lb_bau_intd_software_acknowledge_u {
+#define UVH_NODE_PRESENT_TABLE ( \
+ is_uv(UV4) ? 0x1400UL : \
+ is_uv(UV3) ? 0x1400UL : \
+ is_uv(UV2) ? 0x1400UL : \
+ 0)
+
+#define UVH_NODE_PRESENT_TABLE_DEPTH ( \
+ is_uv(UV4) ? 4 : \
+ is_uv(UV3) ? 16 : \
+ is_uv(UV2) ? 16 : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0
+#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
+
+
+union uvh_node_present_table_u {
unsigned long v;
- struct uv1h_lb_bau_intd_software_acknowledge_s {
- unsigned long pending_0:1; /* RW, W1C */
- unsigned long pending_1:1; /* RW, W1C */
- unsigned long pending_2:1; /* RW, W1C */
- unsigned long pending_3:1; /* RW, W1C */
- unsigned long pending_4:1; /* RW, W1C */
- unsigned long pending_5:1; /* RW, W1C */
- unsigned long pending_6:1; /* RW, W1C */
- unsigned long pending_7:1; /* RW, W1C */
- unsigned long timeout_0:1; /* RW, W1C */
- unsigned long timeout_1:1; /* RW, W1C */
- unsigned long timeout_2:1; /* RW, W1C */
- unsigned long timeout_3:1; /* RW, W1C */
- unsigned long timeout_4:1; /* RW, W1C */
- unsigned long timeout_5:1; /* RW, W1C */
- unsigned long timeout_6:1; /* RW, W1C */
- unsigned long timeout_7:1; /* RW, W1C */
- unsigned long rsvd_16_63:48;
- } s1;
- struct uv2h_lb_bau_intd_software_acknowledge_s {
- unsigned long pending_0:1; /* RW */
- unsigned long pending_1:1; /* RW */
- unsigned long pending_2:1; /* RW */
- unsigned long pending_3:1; /* RW */
- unsigned long pending_4:1; /* RW */
- unsigned long pending_5:1; /* RW */
- unsigned long pending_6:1; /* RW */
- unsigned long pending_7:1; /* RW */
- unsigned long timeout_0:1; /* RW */
- unsigned long timeout_1:1; /* RW */
- unsigned long timeout_2:1; /* RW */
- unsigned long timeout_3:1; /* RW */
- unsigned long timeout_4:1; /* RW */
- unsigned long timeout_5:1; /* RW */
- unsigned long timeout_6:1; /* RW */
- unsigned long timeout_7:1; /* RW */
- unsigned long rsvd_16_63:48;
- } s2;
- struct uv3h_lb_bau_intd_software_acknowledge_s {
- unsigned long pending_0:1; /* RW */
- unsigned long pending_1:1; /* RW */
- unsigned long pending_2:1; /* RW */
- unsigned long pending_3:1; /* RW */
- unsigned long pending_4:1; /* RW */
- unsigned long pending_5:1; /* RW */
- unsigned long pending_6:1; /* RW */
- unsigned long pending_7:1; /* RW */
- unsigned long timeout_0:1; /* RW */
- unsigned long timeout_1:1; /* RW */
- unsigned long timeout_2:1; /* RW */
- unsigned long timeout_3:1; /* RW */
- unsigned long timeout_4:1; /* RW */
- unsigned long timeout_5:1; /* RW */
- unsigned long timeout_6:1; /* RW */
- unsigned long timeout_7:1; /* RW */
- unsigned long rsvd_16_63:48;
+
+ /* UVH common struct */
+ struct uvh_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
} s3;
+
+ /* UV2 unique struct */
+ struct uv2h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s2;
};
/* ========================================================================= */
-/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
+/* UVH_RH10_GAM_ADDR_MAP_CONFIG */
/* ========================================================================= */
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS ( \
- is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
+#define UVH_RH10_GAM_ADDR_MAP_CONFIG ( \
+ is_uv(UV5) ? 0x470000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000001c0UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT 12
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK 0x0000000000001000UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL
+
+
+union uvh_rh10_gam_addr_map_config_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ unsigned long rsvd_20_63:44;
+ } s;
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ unsigned long rsvd_20_63:44;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ } s5;
+};
/* ========================================================================= */
-/* UVH_LB_BAU_MISC_CONTROL */
+/* UVH_RH10_GAM_GRU_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
-#define UVH_LB_BAU_MISC_CONTROL ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
-
-#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
-#define UVH_LB_BAU_MISC_CONTROL_32 ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
-
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT 15
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT 37
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK 0x00000000000f8000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK 0x0000002000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT ( \
- is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
-
-union uvh_lb_bau_misc_control_u {
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG ( \
+ is_uv(UV5) ? 0x4700b0UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 25
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x0070000000000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffe000000UL : \
+ 0)
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV5) ? 25 : \
+ -1)
+
+union uvh_rh10_gam_gru_overlay_config_u {
unsigned long v;
- struct uvh_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long rsvd_29_47:19;
- unsigned long fun:16; /* RW */
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
+ unsigned long enable:1; /* RW */
} s;
- struct uv1h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long rsvd_29_47:19;
- unsigned long fun:16; /* RW */
- } s1;
- struct uvxh_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long rsvd_36_47:12;
- unsigned long fun:16; /* RW */
- } sx;
- struct uv2h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long rsvd_36_47:12;
- unsigned long fun:16; /* RW */
- } s2;
- struct uv3h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
- unsigned long enable_intd_prefetch_hint:1; /* RW */
- unsigned long thread_kill_timebase:8; /* RW */
- unsigned long rsvd_46_47:2;
- unsigned long fun:16; /* RW */
- } s3;
- struct uv4h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
- unsigned long rsvd_37:1;
- unsigned long thread_kill_timebase:8; /* RW */
- unsigned long address_interleave_select:1; /* RW */
- unsigned long rsvd_47:1;
- unsigned long fun:16; /* RW */
- } s4;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
+/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 */
/* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL
-
-
-union uvh_lb_bau_sb_activation_control_u {
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 ( \
+ is_uv(UV5) ? 0x473000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffc000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \
+ is_uv(UV5) ? 26 : \
+ -1)
+
+union uvh_rh10_gam_mmioh_overlay_config0_u {
unsigned long v;
- struct uvh_lb_bau_sb_activation_control_s {
- unsigned long index:6; /* RW */
- unsigned long rsvd_6_61:56;
- unsigned long push:1; /* WP */
- unsigned long init:1; /* WP */
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
} s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
+/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 */
/* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_0_u {
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 ( \
+ is_uv(UV5) ? 0x474000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffc000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \
+ is_uv(UV5) ? 26 : \
+ -1)
+
+union uvh_rh10_gam_mmioh_overlay_config1_u {
unsigned long v;
- struct uvh_lb_bau_sb_activation_status_0_s {
- unsigned long status:64; /* RW */
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
} s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
+/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 */
/* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_1_u {
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 ( \
+ is_uv(UV5) ? 0x473800UL : \
+ 0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \
+ is_uv(UV5) ? 128 : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config0_u {
unsigned long v;
- struct uvh_lb_bau_sb_activation_status_1_s {
- unsigned long status:64; /* RW */
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
} s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s5;
};
/* ========================================================================= */
-/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
+/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 */
/* ========================================================================= */
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
-
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
-
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \
- is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK)
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 ( \
+ is_uv(UV5) ? 0x474800UL : \
+ 0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \
+ is_uv(UV5) ? 128 : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config1_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s5;
+};
/* ========================================================================= */
-/* UVH_NODE_ID */
+/* UVH_RH10_GAM_MMR_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UVH_NODE_ID 0x0UL
-#define UV1H_NODE_ID 0x0UL
-#define UV2H_NODE_ID 0x0UL
-#define UV3H_NODE_ID 0x0UL
-#define UV4H_NODE_ID 0x0UL
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG ( \
+ is_uv(UV5) ? 0x470090UL : \
+ 0)
-#define UVH_NODE_ID_FORCE1_SHFT 0
-#define UVH_NODE_ID_MANUFACTURER_SHFT 1
-#define UVH_NODE_ID_PART_NUMBER_SHFT 12
-#define UVH_NODE_ID_REVISION_SHFT 28
-#define UVH_NODE_ID_NODE_ID_SHFT 32
-#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-
-#define UV1H_NODE_ID_FORCE1_SHFT 0
-#define UV1H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV1H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV1H_NODE_ID_REVISION_SHFT 28
-#define UV1H_NODE_ID_NODE_ID_SHFT 32
-#define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48
-#define UV1H_NODE_ID_NI_PORT_SHFT 56
-#define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
-#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
-
-#define UVXH_NODE_ID_FORCE1_SHFT 0
-#define UVXH_NODE_ID_MANUFACTURER_SHFT 1
-#define UVXH_NODE_ID_PART_NUMBER_SHFT 12
-#define UVXH_NODE_ID_REVISION_SHFT 28
-#define UVXH_NODE_ID_NODE_ID_SHFT 32
-#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UVXH_NODE_ID_NI_PORT_SHFT 57
-#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-#define UV2H_NODE_ID_FORCE1_SHFT 0
-#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV2H_NODE_ID_REVISION_SHFT 28
-#define UV2H_NODE_ID_NODE_ID_SHFT 32
-#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV2H_NODE_ID_NI_PORT_SHFT 57
-#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-
-#define UV3H_NODE_ID_FORCE1_SHFT 0
-#define UV3H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV3H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV3H_NODE_ID_REVISION_SHFT 28
-#define UV3H_NODE_ID_NODE_ID_SHFT 32
-#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
-#define UV3H_NODE_ID_RESERVED_2_SHFT 49
-#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV3H_NODE_ID_NI_PORT_SHFT 57
-#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
-#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-
-#define UV4H_NODE_ID_FORCE1_SHFT 0
-#define UV4H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV4H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV4H_NODE_ID_REVISION_SHFT 28
-#define UV4H_NODE_ID_NODE_ID_SHFT 32
-#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48
-#define UV4H_NODE_ID_RESERVED_2_SHFT 49
-#define UV4H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV4H_NODE_ID_NI_PORT_SHFT 57
-#define UV4H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV4H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV4H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV4H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV4H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
-#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-#define UV4H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV4H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 25
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffe000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV5) ? 25 : \
+ -1)
-union uvh_node_id_u {
+union uvh_rh10_gam_mmr_overlay_config_u {
unsigned long v;
- struct uvh_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47_63:17;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
} s;
- struct uv1h_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47:1;
- unsigned long nodes_per_bit:7; /* RW */
- unsigned long rsvd_55:1;
- unsigned long ni_port:4; /* RO */
- unsigned long rsvd_60_63:4;
- } s1;
- struct uvxh_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47_49:3;
- unsigned long nodes_per_bit:7; /* RO */
- unsigned long ni_port:5; /* RO */
- unsigned long rsvd_62_63:2;
- } sx;
- struct uv2h_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47_49:3;
- unsigned long nodes_per_bit:7; /* RO */
- unsigned long ni_port:5; /* RO */
- unsigned long rsvd_62_63:2;
- } s2;
- struct uv3h_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47:1;
- unsigned long router_select:1; /* RO */
- unsigned long rsvd_49:1;
- unsigned long nodes_per_bit:7; /* RO */
- unsigned long ni_port:5; /* RO */
- unsigned long rsvd_62_63:2;
- } s3;
- struct uv4h_node_id_s {
- unsigned long force1:1; /* RO */
- unsigned long manufacturer:11; /* RO */
- unsigned long part_number:16; /* RO */
- unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47:1;
- unsigned long router_select:1; /* RO */
- unsigned long rsvd_49:1;
- unsigned long nodes_per_bit:7; /* RO */
- unsigned long ni_port:5; /* RO */
- unsigned long rsvd_62_63:2;
- } s4;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_NODE_PRESENT_TABLE */
+/* UVH_RH_GAM_ADDR_MAP_CONFIG */
/* ========================================================================= */
-#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_RH_GAM_ADDR_MAP_CONFIG ( \
+ is_uv(UV4) ? 0x480000UL : \
+ is_uv(UV3) ? 0x1600000UL : \
+ is_uv(UV2) ? 0x1600000UL : \
+ 0)
-#define UV1H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
-#define UVH_NODE_PRESENT_TABLE_DEPTH ( \
- is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH : \
- is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH : \
- is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH : \
- /*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
-#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
-#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000003c0UL
+/* UV3 unique defines */
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
-union uvh_node_present_table_u {
+/* UV2 unique defines */
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
+
+
+union uvh_rh_gam_addr_map_config_u {
unsigned long v;
- struct uvh_node_present_table_s {
- unsigned long nodes:64; /* RW */
+
+ /* UVH common struct */
+ struct uvh_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
} s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_addr_map_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_addr_map_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
+/* UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800c8UL : \
+ is_uv(UV3) ? 0x16000c8UL : \
+ is_uv(UV2) ? 0x16000c8UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_0_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3095,15 +3465,9 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
- struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
- } s1;
- struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3111,15 +3475,19 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3127,183 +3495,96 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
+/* UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800d0UL : \
+ is_uv(UV3) ? 0x16000d0UL : \
+ is_uv(UV2) ? 0x16000d0UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
+
+
+union uvh_rh_gam_alias_0_redirect_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_0_redirect_config_s {
unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
} s;
- struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
- } s1;
- struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_0_redirect_config_s {
unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
} sx;
- struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_0_redirect_config_s {
unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_0_redirect_config_s {
unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
} s3;
- struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_0_redirect_config_s {
unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
- } s4;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
+/* UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
+#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800d8UL : \
+ is_uv(UV3) ? 0x16000d8UL : \
+ is_uv(UV2) ? 0x16000d8UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_1_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3311,15 +3592,9 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
- struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
- unsigned long enable:1; /* RW */
- } s1;
- struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3327,15 +3602,19 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -3343,390 +3622,289 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
+/* UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800e0UL : \
+ is_uv(UV3) ? 0x16000e0UL : \
+ is_uv(UV2) ? 0x16000e0UL : \
+ 0)
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+union uvh_rh_gam_alias_1_redirect_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
- struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s1;
- struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
+/* UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800e8UL : \
+ is_uv(UV3) ? 0x16000e8UL : \
+ is_uv(UV2) ? 0x16000e8UL : \
+ 0)
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+union uvh_rh_gam_alias_2_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} s;
- struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s1;
- struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s4;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
+/* UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
+#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800f0UL : \
+ is_uv(UV3) ? 0x16000f0UL : \
+ is_uv(UV2) ? 0x16000f0UL : \
+ 0)
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+union uvh_rh_gam_alias_2_redirect_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
- struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s1;
- struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s4;
-};
-
-/* ========================================================================= */
-/* UVH_RH_GAM_CONFIG_MMR */
-/* ========================================================================= */
-#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
-#define UVH_RH_GAM_CONFIG_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
-
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
-#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12
-#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
-#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
-
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-
-union uvh_rh_gam_config_mmr_u {
- unsigned long v;
- struct uvh_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s;
- struct uv1h_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_11:2;
- unsigned long mmiol_cfg:1; /* RW */
- unsigned long rsvd_13_63:51;
- } s1;
- struct uvxh_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } sx;
- struct uv2h_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
} s2;
- struct uv3h_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s3;
- struct uv4h_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s4;
};
/* ========================================================================= */
-/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
+/* UVH_RH_GAM_GRU_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK ( \
- is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT ( \
- is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
-
-union uvh_rh_gam_gru_overlay_config_mmr_u {
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x480010UL : \
+ is_uv(UV3) ? 0x1600010UL : \
+ is_uv(UV2) ? 0x1600010UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x00f0000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffffc000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT 62
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK 0x4000000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffff0000000UL : \
+ is_uv(UV2) ? 0x00003ffff0000000UL : \
+ 0)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 28 : \
+ is_uv(UV2) ? 28 : \
+ -1)
+
+union uvh_rh_gam_gru_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_51:52;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_45:46;
+ unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} s;
- struct uv1h_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_27:28;
- unsigned long base:18; /* RW */
- unsigned long rsvd_46_47:2;
- unsigned long gr4:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long n_gru:4; /* RW */
- unsigned long rsvd_56_62:7;
- unsigned long enable:1; /* RW */
- } s1;
- struct uvxh_rh_gam_gru_overlay_config_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_gru_overlay_config_s {
unsigned long rsvd_0_45:46;
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_27:28;
- unsigned long base:18; /* RW */
+
+ /* UV4A unique struct */
+ struct uv4ah_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_24:25;
+ unsigned long undef_25:1; /* Undefined */
+ unsigned long base:26; /* RW */
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_24:25;
+ unsigned long undef_25:1; /* Undefined */
+ unsigned long base:20; /* RW */
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_gru_overlay_config_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_gru_overlay_config_s {
unsigned long rsvd_0_27:28;
unsigned long base:18; /* RW */
unsigned long rsvd_46_51:6;
@@ -3735,88 +3913,141 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long mode:1; /* RW */
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_24:25;
- unsigned long undef_25:1; /* Undefined */
- unsigned long base:20; /* RW */
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_27:28;
+ unsigned long base:18; /* RW */
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG ( \
+ is_uv(UV2) ? 0x1600030UL : \
+ 0)
+
+
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT 27
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK 0x00003ffff8000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT 46
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK 0x000fc00000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT 52
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK 0x00f0000000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV2) ? 27 : \
+ uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT"))
+
+union uvh_rh_gam_mmioh_overlay_config_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } sx;
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 ( \
+ is_uv(UV4) ? 0x483000UL : \
+ is_uv(UV3) ? 0x1603000UL : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmioh_overlay_config0_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config0_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s3;
- struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config0_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } sx;
+
+ /* UV4A unique struct */
struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
unsigned long rsvd_0_25:26;
unsigned long base:26; /* RW */
@@ -3825,73 +4056,94 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 */
/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 ( \
+ is_uv(UV4) ? 0x484000UL : \
+ is_uv(UV3) ? 0x1604000UL : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmioh_overlay_config1_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config1_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s3;
- struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config1_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } sx;
+
+ /* UV4A unique struct */
struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
unsigned long rsvd_0_25:26;
unsigned long base:26; /* RW */
@@ -3900,278 +4152,289 @@ union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
} s4a;
-};
-/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
-/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
-
-
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmioh_overlay_config_mmr_u {
- unsigned long v;
- struct uv1h_rh_gam_mmioh_overlay_config_mmr_s {
- unsigned long rsvd_0_29:30;
- unsigned long base:16; /* RW */
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
- unsigned long n_io:4; /* RW */
+ unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s1;
- struct uv2h_rh_gam_mmioh_overlay_config_mmr_s {
- unsigned long rsvd_0_26:27;
- unsigned long base:19; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
- unsigned long n_io:4; /* RW */
+ unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s2;
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 */
/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR)
-
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH)
-
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL
-
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config0_mmr_u {
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 ( \
+ is_uv(UV4) ? 0x483800UL : \
+ is_uv(UV3) ? 0x1603800UL : \
+ 0)
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \
+ is_uv(UV4) ? 128 : \
+ is_uv(UV3) ? 128 : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL
+
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \
+ is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ 0)
+
+
+union uvh_rh_gam_mmioh_redirect_config0_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s3;
- struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s4;
- struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s {
+ } sx;
+
+ struct uv4ah_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:12; /* RW */
unsigned long rsvd_12_63:52;
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 */
/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR)
-
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH)
-
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL
-
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config1_mmr_u {
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 ( \
+ is_uv(UV4) ? 0x484800UL : \
+ is_uv(UV3) ? 0x1604800UL : \
+ 0)
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \
+ is_uv(UV4) ? 128 : \
+ is_uv(UV3) ? 128 : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL
+
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \
+ is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ 0)
+
+
+union uvh_rh_gam_mmioh_redirect_config1_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s3;
- struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s4;
- struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s {
+ } sx;
+
+ struct uv4ah_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:12; /* RW */
unsigned long rsvd_12_63:52;
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
+/* UVH_RH_GAM_MMR_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR ( \
- is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
- is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmr_overlay_config_mmr_u {
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x480028UL : \
+ is_uv(UV3) ? 0x1600028UL : \
+ is_uv(UV2) ? 0x1600028UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 26
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ is_uv(UV2) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ is_uv(UV2) ? 0x00003ffffc000000UL : \
+ 0)
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ is_uv(UV2) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmr_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s;
- struct uv1h_rh_gam_mmr_overlay_config_mmr_s {
- unsigned long rsvd_0_25:26;
- unsigned long base:20; /* RW */
- unsigned long dual_hub:1; /* RW */
- unsigned long rsvd_47_62:16;
- unsigned long enable:1; /* RW */
- } s1;
- struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
/* UVH_RTC */
/* ========================================================================= */
-#define UV1H_RTC 0x340000UL
-#define UV2H_RTC 0x340000UL
-#define UV3H_RTC 0x340000UL
-#define UV4H_RTC 0xe0000UL
#define UVH_RTC ( \
- is_uv1_hub() ? UV1H_RTC : \
- is_uv2_hub() ? UV2H_RTC : \
- is_uv3_hub() ? UV3H_RTC : \
- /*is_uv4_hub*/ UV4H_RTC)
+ is_uv(UV5) ? 0xe0000UL : \
+ is_uv(UV4) ? 0xe0000UL : \
+ is_uv(UV3) ? 0x340000UL : \
+ is_uv(UV2) ? 0x340000UL : \
+ 0)
+/* UVH common defines*/
#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
union uvh_rtc_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_rtc_s {
unsigned long real_time_clock:56; /* RW */
unsigned long rsvd_56_63:8;
} s;
+
+ /* UV5 unique struct */
+ struct uv5h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s2;
};
/* ========================================================================= */
@@ -4179,26 +4442,29 @@ union uvh_rtc_u {
/* ========================================================================= */
#define UVH_RTC1_INT_CONFIG 0x615c0UL
+/* UVH common defines*/
#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
-#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
-#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
-#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
-#define UVH_RTC1_INT_CONFIG_P_SHFT 13
-#define UVH_RTC1_INT_CONFIG_T_SHFT 15
-#define UVH_RTC1_INT_CONFIG_M_SHFT 16
-#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT 13
#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT 15
#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT 16
#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_rtc1_int_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_rtc1_int_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -4211,618 +4477,175 @@ union uvh_rtc1_int_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-
-/* ========================================================================= */
-/* UVH_SCRATCH5 */
-/* ========================================================================= */
-#define UV1H_SCRATCH5 0x2d0200UL
-#define UV2H_SCRATCH5 0x2d0200UL
-#define UV3H_SCRATCH5 0x2d0200UL
-#define UV4H_SCRATCH5 0xb0200UL
-#define UVH_SCRATCH5 ( \
- is_uv1_hub() ? UV1H_SCRATCH5 : \
- is_uv2_hub() ? UV2H_SCRATCH5 : \
- is_uv3_hub() ? UV3H_SCRATCH5 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5)
-
-#define UV1H_SCRATCH5_32 0x778
-#define UV2H_SCRATCH5_32 0x778
-#define UV3H_SCRATCH5_32 0x778
-#define UV4H_SCRATCH5_32 0x798
-#define UVH_SCRATCH5_32 ( \
- is_uv1_hub() ? UV1H_SCRATCH5_32 : \
- is_uv2_hub() ? UV2H_SCRATCH5_32 : \
- is_uv3_hub() ? UV3H_SCRATCH5_32 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_32)
-
-#define UVH_SCRATCH5_SCRATCH5_SHFT 0
-#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-
-union uvh_scratch5_u {
- unsigned long v;
- struct uvh_scratch5_s {
- unsigned long scratch5:64; /* RW, W1CS */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_SCRATCH5_ALIAS */
-/* ========================================================================= */
-#define UV1H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV4H_SCRATCH5_ALIAS 0xb0208UL
-#define UVH_SCRATCH5_ALIAS ( \
- is_uv1_hub() ? UV1H_SCRATCH5_ALIAS : \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
-
-#define UV1H_SCRATCH5_ALIAS_32 0x780
-#define UV2H_SCRATCH5_ALIAS_32 0x780
-#define UV3H_SCRATCH5_ALIAS_32 0x780
-#define UV4H_SCRATCH5_ALIAS_32 0x7a0
-#define UVH_SCRATCH5_ALIAS_32 ( \
- is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 : \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
-
-
-/* ========================================================================= */
-/* UVH_SCRATCH5_ALIAS_2 */
-/* ========================================================================= */
-#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
-#define UVH_SCRATCH5_ALIAS_2 ( \
- is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 : \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
-#define UVH_SCRATCH5_ALIAS_2_32 0x788
-
-
-/* ========================================================================= */
-/* UVXH_EVENT_OCCURRED2 */
-/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2 0x70100UL
-
-#define UV2H_EVENT_OCCURRED2_32 0xb68
-#define UV3H_EVENT_OCCURRED2_32 0xb68
-#define UV4H_EVENT_OCCURRED2_32 0x608
-#define UVH_EVENT_OCCURRED2_32 ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
-
-
-#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
-
-#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+ /* UV5 unique struct */
+ struct uv5h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s5;
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17
-#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18
-#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19
-#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20
-#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21
-#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22
-#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23
-#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24
-#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25
-#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26
-#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27
-#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28
-#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29
-#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30
-#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31
-#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32
-#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33
-#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34
-#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35
-#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36
-#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37
-#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38
-#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39
-#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40
-#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41
-#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42
-#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43
-#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44
-#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45
-#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46
-#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47
-#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48
-#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL
-#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL
-#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL
-#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL
-#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL
-#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL
-#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL
-#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL
+ /* UV4 unique struct */
+ struct uv4h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
+ /* UV3 unique struct */
+ struct uv3h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
-union uvh_event_occurred2_u {
- unsigned long v;
- struct uv2h_event_occurred2_s {
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_32_63:32;
+ /* UV2 unique struct */
+ struct uv2h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_event_occurred2_s {
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_32_63:32;
- } s3;
- struct uv4h_event_occurred2_s {
- unsigned long message_accelerator_int0:1; /* RW */
- unsigned long message_accelerator_int1:1; /* RW */
- unsigned long message_accelerator_int2:1; /* RW */
- unsigned long message_accelerator_int3:1; /* RW */
- unsigned long message_accelerator_int4:1; /* RW */
- unsigned long message_accelerator_int5:1; /* RW */
- unsigned long message_accelerator_int6:1; /* RW */
- unsigned long message_accelerator_int7:1; /* RW */
- unsigned long message_accelerator_int8:1; /* RW */
- unsigned long message_accelerator_int9:1; /* RW */
- unsigned long message_accelerator_int10:1; /* RW */
- unsigned long message_accelerator_int11:1; /* RW */
- unsigned long message_accelerator_int12:1; /* RW */
- unsigned long message_accelerator_int13:1; /* RW */
- unsigned long message_accelerator_int14:1; /* RW */
- unsigned long message_accelerator_int15:1; /* RW */
- unsigned long rtc_interval_int:1; /* RW */
- unsigned long bau_dashboard_int:1; /* RW */
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_50_63:14;
- } s4;
};
/* ========================================================================= */
-/* UVXH_EVENT_OCCURRED2_ALIAS */
-/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
-
-#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
-#define UVH_EVENT_OCCURRED2_ALIAS_32 ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
-
-
-/* ========================================================================= */
-/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
+/* UVH_SCRATCH5 */
/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
+#define UVH_SCRATCH5 ( \
+ is_uv(UV5) ? 0xb0200UL : \
+ is_uv(UV4) ? 0xb0200UL : \
+ is_uv(UV3) ? 0x2d0200UL : \
+ is_uv(UV2) ? 0x2d0200UL : \
+ 0)
+#define UV5H_SCRATCH5 0xb0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVH common defines*/
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVXH common defines */
+#define UVXH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVYH common defines */
+#define UVYH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVYH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV5 unique defines */
+#define UV5H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV5H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+/* UV4 unique defines */
+#define UV4H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV4H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-union uvxh_lb_bau_sb_activation_status_2_u {
- unsigned long v;
- struct uvxh_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } sx;
- struct uv2h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s2;
- struct uv3h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s3;
- struct uv4h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s4;
-};
+/* UV3 unique defines */
+#define UV3H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-/* ========================================================================= */
-/* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */
-/* ========================================================================= */
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0
+/* UV2 unique defines */
+#define UV2H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
-union uv1h_lb_target_physical_apic_id_mask_u {
+union uvh_scratch5_u {
unsigned long v;
- struct uv1h_lb_target_physical_apic_id_mask_s {
- unsigned long bit_enables:32; /* RW */
- unsigned long rsvd_32_63:32;
- } s1;
-};
-/* ========================================================================= */
-/* UV3H_GR0_GAM_GR_CONFIG */
-/* ========================================================================= */
-#define UV3H_GR0_GAM_GR_CONFIG 0xc00028UL
-
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+ /* UVH common struct */
+ struct uvh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s;
-union uv3h_gr0_gam_gr_config_u {
- unsigned long v;
- struct uv3h_gr0_gam_gr_config_s {
- unsigned long m_skt:6; /* RW */
- unsigned long undef_6_9:4; /* Undefined */
- unsigned long subspace:1; /* RW */
- unsigned long reserved:53;
- } s3;
-};
+ /* UVXH common struct */
+ struct uvxh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } sx;
-/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_QUEUE_FIRST */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL
+ /* UVYH common struct */
+ struct uvyh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } sy;
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+ /* UV5 unique struct */
+ struct uv5h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s5;
-union uv4h_lb_proc_intd_queue_first_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_queue_first_s {
- unsigned long undef_0_5:6; /* Undefined */
- unsigned long first_payload_address:40; /* RW */
+ /* UV4 unique struct */
+ struct uv4h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
} s4;
-};
-
-/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_QUEUE_LAST */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_LAST 0xa4108UL
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
+ /* UV3 unique struct */
+ struct uv3h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s3;
-union uv4h_lb_proc_intd_queue_last_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_queue_last_s {
- unsigned long undef_0_4:5; /* Undefined */
- unsigned long last_payload_address:41; /* RW */
- } s4;
+ /* UV2 unique struct */
+ struct uv2h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s2;
};
/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR */
+/* UVH_SCRATCH5_ALIAS */
/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR 0xa4118UL
-
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
+#define UVH_SCRATCH5_ALIAS ( \
+ is_uv(UV5) ? 0xb0208UL : \
+ is_uv(UV4) ? 0xb0208UL : \
+ is_uv(UV3) ? 0x2d0208UL : \
+ is_uv(UV2) ? 0x2d0208UL : \
+ 0)
+#define UV5H_SCRATCH5_ALIAS 0xb0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-union uv4h_lb_proc_intd_soft_ack_clear_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_soft_ack_clear_s {
- unsigned long soft_ack_pending_flags:8; /* WP */
- } s4;
-};
/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_SOFT_ACK_PENDING */
+/* UVH_SCRATCH5_ALIAS_2 */
/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING 0xa4110UL
-
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+#define UVH_SCRATCH5_ALIAS_2 ( \
+ is_uv(UV5) ? 0xb0210UL : \
+ is_uv(UV4) ? 0xb0210UL : \
+ is_uv(UV3) ? 0x2d0210UL : \
+ is_uv(UV2) ? 0x2d0210UL : \
+ 0)
+#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-union uv4h_lb_proc_intd_soft_ack_pending_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_soft_ack_pending_s {
- unsigned long soft_ack_flags:8; /* RW */
- } s4;
-};
#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..b7253ef3205a 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -15,36 +15,29 @@ struct vdso_image {
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long alt, alt_len;
+ unsigned long extable_base, extable_len;
+ const void *extable;
- long sym_vvar_start; /* Negative offset to the vvar area */
-
- long sym_vvar_page;
- long sym_hpet_page;
- long sym_pvclock_page;
- long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
long sym___kernel_vsyscall;
long sym_int80_landing_pad;
+ long sym_vdso32_sigreturn_landing_pad;
+ long sym_vdso32_rt_sigreturn_landing_pad;
};
-#ifdef CONFIG_X86_64
extern const struct vdso_image vdso_image_64;
-#endif
-
-#ifdef CONFIG_X86_X32
extern const struct vdso_image vdso_image_x32;
-#endif
-
-#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
extern const struct vdso_image vdso_image_32;
-#endif
-extern void __init init_vdso_image(const struct vdso_image *image);
+extern int __init init_vdso_image(const struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
+extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..136e5e57cfe1
--- /dev/null
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ VDSO_CLOCKMODE_TSC, \
+ VDSO_CLOCKMODE_PVCLOCK, \
+ VDSO_CLOCKMODE_HVCLOCK
+
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..ff1c11b9fa27
--- /dev/null
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/unistd.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret) :
+ "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+ "rcx", "r11", "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..73b2e7ee8f0f
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLER__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages. These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const. The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TIMER
+extern struct ms_hyperv_tsc_page hvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+ : "memory", "edx");
+
+ return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+ const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+ u32 version;
+ u64 ret;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+ *
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+
+ do {
+ version = pvclock_read_begin(pvti);
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
+
+ ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+ } while (pvclock_read_retry(pvti, version));
+
+ return ret & S64_MAX;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TIMER
+static u64 vread_hvclock(void)
+{
+ u64 tsc, time;
+
+ if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time))
+ return time & S64_MAX;
+
+ return U64_MAX;
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_time_data *vd)
+{
+ if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
+ return (u64)rdtsc_ordered() & S64_MAX;
+ /*
+ * For any memory-mapped vclock type, we need to make sure that gcc
+ * doesn't cleverly hoist a load before the mode check. Otherwise we
+ * might end up touching the memory-mapped page even if the vclock in
+ * question isn't enabled, which will segfault. Hence the barriers.
+ */
+#ifdef CONFIG_PARAVIRT_CLOCK
+ if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) {
+ barrier();
+ return vread_pvclock();
+ }
+#endif
+#ifdef CONFIG_HYPERV_TIMER
+ if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) {
+ barrier();
+ return vread_hvclock();
+ }
+#endif
+ return U64_MAX;
+}
+
+static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc)
+{
+ return true;
+}
+#define vdso_clocksource_ok arch_vdso_clocksource_ok
+
+/*
+ * Clocksource read value validation to handle PV and HyperV clocksources
+ * which can be invalidated asynchronously and indicate invalidation by
+ * returning U64_MAX, which can be effectively tested by checking for a
+ * negative value after casting it to s64.
+ *
+ * This effectively forces a S64_MAX mask on the calculations, unlike the
+ * U64_MAX mask normally used by x86 clocksources.
+ */
+static inline bool arch_vdso_cycles_ok(u64 cycles)
+{
+ return (s64)cycles >= 0;
+}
+#define vdso_cycles_ok arch_vdso_cycles_ok
+
+/*
+ * x86 specific calculation of nanoseconds for the current cycle count
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @vd->cycles_last. If not then use @vd->cycles_last, which is the base
+ * time of the current conversion period.
+ *
+ * This variant also uses a custom mask because while the clocksource mask of
+ * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
+ * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above
+ * declares everything with the MSB/Sign-bit set as invalid. Therefore the
+ * effective mask is S64_MAX.
+ */
+static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base)
+{
+ u64 delta = cycles - vc->cycle_last;
+
+ /*
+ * Negative motion and deltas which can cause multiplication
+ * overflow require special treatment. This check covers both as
+ * negative motion is guaranteed to be greater than @vc::max_cycles
+ * due to unsigned comparison.
+ *
+ * Due to the MSB/Sign-bit being used as invalid marker (see
+ * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
+ * case is also unlikely and will also take the unlikely path here.
+ */
+ if (unlikely(delta > vc->max_cycles)) {
+ /*
+ * Due to the above mentioned TSC wobbles, filter out
+ * negative motion. Per the above masking, the effective
+ * sign bit is now bit 62.
+ */
+ if (delta & (1ULL << 62))
+ return base >> vc->shift;
+
+ /* Handle multiplication overflow gracefully */
+ return mul_u64_u32_add_u64_shr(delta & S64_MAX, vc->mult, base, vc->shift);
+ }
+
+ return ((delta * vc->mult) + base) >> vc->shift;
+}
+#define vdso_calc_ns vdso_calc_ns
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..7000aeb59aa2
--- /dev/null
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+
+/* PAUSE is a good thing to insert into busy-wait loops. */
+static __always_inline void native_pause(void)
+{
+ asm volatile("pause" ::: "memory");
+}
+
+static __always_inline void cpu_relax(void)
+{
+ native_pause();
+}
+
+struct getcpu_cache;
+
+notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..4aa311a923f2
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#define __VDSO_PAGES 6
+
+#define VDSO_NR_VCLOCK_PAGES 2
+#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE)
+#define VDSO_PAGE_PVCLOCK_OFFSET 0
+#define VDSO_PAGE_HVCLOCK_OFFSET 1
+
+#ifndef __ASSEMBLER__
+
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
new file mode 100644
index 000000000000..5d471253c755
--- /dev/null
+++ b/arch/x86/include/asm/vermagic.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+#else
+# define MODULE_ARCH_VERMAGIC ""
+#endif
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a0ce291abcae 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -2,92 +2,18 @@
#ifndef _ASM_X86_VGTOD_H
#define _ASM_X86_VGTOD_H
-#include <linux/compiler.h>
-#include <linux/clocksource.h>
-
-#include <uapi/linux/time.h>
-
-#ifdef BUILD_VDSO32_64
-typedef u64 gtod_long_t;
-#else
-typedef unsigned long gtod_long_t;
-#endif
-
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid. For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
- u64 sec;
- u64 nsec;
-};
-
-#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
/*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
+ * This check is required to prevent ARCH=um to include
+ * unwanted headers.
*/
-struct vsyscall_gtod_data {
- unsigned int seq;
-
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- struct vgtod_ts basetime[VGTOD_BASES];
-
- int tz_minuteswest;
- int tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
-extern int vclocks_used;
-static inline bool vclock_was_used(int vclock)
-{
- return READ_ONCE(vclocks_used) & (1 << vclock);
-}
-
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
- unsigned int ret;
-
-repeat:
- ret = READ_ONCE(s->seq);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
- smp_rmb();
- return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
- unsigned int start)
-{
- smp_rmb();
- return unlikely(s->seq != start);
-}
+#ifdef CONFIG_GENERIC_GETTIMEOFDAY
+#include <linux/compiler.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
- ++s->seq;
- smp_wmb();
-}
+#include <uapi/linux/time.h>
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
- smp_wmb();
- ++s->seq;
-}
+#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h
new file mode 100644
index 000000000000..08ec328203ef
--- /dev/null
+++ b/arch/x86/include/asm/video.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VIDEO_H
+#define _ASM_X86_VIDEO_H
+
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+struct device;
+
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+ unsigned long vm_start, unsigned long vm_end,
+ unsigned long offset);
+#define pgprot_framebuffer pgprot_framebuffer
+
+#ifdef CONFIG_VIDEO
+bool video_is_primary_device(struct device *dev);
+#define video_is_primary_device video_is_primary_device
+#endif
+
+#include <asm-generic/video.h>
+
+#endif /* _ASM_X86_VIDEO_H */
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644
index 1fc7a0d1e877..000000000000
--- a/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
- unsigned long ecx = cpuid_ecx(1);
- return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/** Disable VMX on the current CPU
- *
- * vmxoff causes a undefined-opcode exception if vmxon was not run
- * on the CPU previously. Only call this function if you know VMX
- * is enabled.
- */
-static inline void cpu_vmxoff(void)
-{
- asm volatile ("vmxoff");
- cr4_clear_bits(X86_CR4_VMXE);
-}
-
-static inline int cpu_vmx_enabled(void)
-{
- return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
- if (cpu_vmx_enabled())
- cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
- if (msg)
- *msg = "not amd or hygon";
- return 0;
- }
-
- if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
- if (msg)
- *msg = "can't execute cpuid_8000000a";
- return 0;
- }
-
- if (!boot_cpu_has(X86_FEATURE_SVM)) {
- if (msg)
- *msg = "svm not available";
- return 0;
- }
- return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
- if (cpu_has_svm(NULL))
- cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 26efbec94448..62ee19909903 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -36,7 +36,6 @@ struct vm86 {
unsigned long saved_sp0;
unsigned long flags;
- unsigned long screen_bitmap;
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
@@ -85,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
-#define free_vm86(t) do { } while(0)
+#define free_vm86(task) do { (void)(task); } while(0)
#endif /* CONFIG_VM86 */
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..49ce331f3ac6
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/cpufeature.h>
+#include <asm/page.h>
+#include <asm/pgtable_areas.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifdef CONFIG_X86_64
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
+}
+#endif
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_PSE);
+}
+
+#endif
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..c9cf43d5ef23
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+#ifndef _ASM_X86_VMWARE_H
+#define _ASM_X86_VMWARE_H
+
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
+#include <linux/stringify.h>
+
+/*
+ * VMware hypercall ABI.
+ *
+ * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall)
+ * have up to 6 input and 6 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg2 - Hypercall command
+ * arg3 bits [15:0] - Port number, LB and direction flags
+ *
+ * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB
+ * hypercalls. They also have up to 6 input and 6 output on registers
+ * arguments, with different argument to register mapping:
+ * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3),
+ * %rsi (arg4), %rdi (arg5).
+ *
+ * - High bandwidth (HB) hypercalls are I/O port based only. They have
+ * up to 7 input and 7 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5), %ebp (arg6).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg1 - Hypercall command
+ * arg3 bits [15:0] - Port number, HB and direction flags
+ *
+ * For compatibility purposes, x86_64 systems use only lower 32 bits
+ * for input and output arguments.
+ *
+ * The hypercall definitions differ in the low word of the %edx (arg3)
+ * in the following way: the old I/O port based interface uses the port
+ * number to distinguish between high- and low bandwidth versions, and
+ * uses IN/OUT instructions to define transfer direction.
+ *
+ * The new vmcall interface instead uses a set of flags to select
+ * bandwidth mode and transfer direction. The flags should be loaded
+ * into arg3 by any user and are automatically replaced by the port
+ * number if the I/O port method is used.
+ */
+
+#define VMWARE_HYPERVISOR_HB BIT(0)
+#define VMWARE_HYPERVISOR_OUT BIT(1)
+
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \
+ VMWARE_HYPERVISOR_HB)
+
+#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U
+
+#define VMWARE_CMD_GETVERSION 10
+#define VMWARE_CMD_GETHZ 45
+#define VMWARE_CMD_GETVCPU_INFO 68
+#define VMWARE_CMD_STEALCLOCK 91
+/*
+ * Hypercall command mask:
+ * bits [6:0] command, range [0, 127]
+ * bits [19:16] sub-command, range [0, 15]
+ */
+#define VMWARE_CMD_MASK 0xf007fU
+
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
+
+extern unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
+
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC 1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
+
+/*
+ * The low bandwidth call. The low word of %edx is presumed to have OUT bit
+ * set. The high word of %edx may contain input data from the caller.
+ */
+#define VMWARE_HYPERCALL \
+ ALTERNATIVE_2("movw %[port], %%dx\n\t" \
+ "inl (%%dx), %%eax", \
+ "vmcall", X86_FEATURE_VMCALL, \
+ "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+static inline
+unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
+ unsigned long in3, u32 *out2,
+ u32 *out3, u32 *out4, u32 *out5)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
+ "=D" (*out5)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out1,
+ u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+#ifdef CONFIG_X86_64
+#define VMW_BP_CONSTRAINT "r"
+#else
+#define VMW_BP_CONSTRAINT "m"
+#endif
+
+/*
+ * High bandwidth calls are not supported on encrypted memory guests.
+ * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use
+ * low bandwidth hypercall if memory encryption is set.
+ * This assumption simplifies HB hypercall implementation to just I/O port
+ * based approach without alternative patching.
+ */
+static inline
+unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep outsb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep insb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+#undef VMW_BP_CONSTRAINT
+#undef VMWARE_HYPERCALL
+
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4e4133e86484..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -1,93 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* vmx.h: VMX Architecture related definitions
* Copyright (c) 2004, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* A few random additions are:
* Copyright (C) 2006 Qumranet
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
- *
*/
#ifndef VMX_H
#define VMX_H
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/types.h>
+
#include <uapi/asm/vmx.h>
+#include <asm/trapnr.h>
+#include <asm/vmxfeatures.h>
+
+#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
-#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
-#define CPU_BASED_HLT_EXITING 0x00000080
-#define CPU_BASED_INVLPG_EXITING 0x00000200
-#define CPU_BASED_MWAIT_EXITING 0x00000400
-#define CPU_BASED_RDPMC_EXITING 0x00000800
-#define CPU_BASED_RDTSC_EXITING 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
-#define CPU_BASED_CR3_STORE_EXITING 0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
-#define CPU_BASED_CR8_STORE_EXITING 0x00100000
-#define CPU_BASED_TPR_SHADOW 0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
-#define CPU_BASED_MOV_DR_EXITING 0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
-#define CPU_BASED_USE_IO_BITMAPS 0x02000000
-#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
-#define CPU_BASED_MONITOR_EXITING 0x20000000
-#define CPU_BASED_PAUSE_EXITING 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING)
+#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING)
+#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING)
+#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING)
+#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING)
+#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING)
+#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING)
+#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
+#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING)
+#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS)
+#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
+#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
+#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
+#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING)
+#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING)
+#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
+#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS)
+#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG)
+#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS)
+#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING)
+#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING)
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS)
#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
/*
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
-#define SECONDARY_EXEC_DESC 0x00000004
-#define SECONDARY_EXEC_RDTSCP 0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
-#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
-#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
-#define SECONDARY_EXEC_ENABLE_PML 0x00020000
-#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000
-#define SECONDARY_EXEC_XSAVES 0x00100000
-#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
-#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
-#define SECONDARY_EXEC_TSC_SCALING 0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK 0x00000001
-#define PIN_BASED_NMI_EXITING 0x00000008
-#define PIN_BASED_VIRTUAL_NMIS 0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
-#define PIN_BASED_POSTED_INTR 0x00000080
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
+#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT)
+#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING)
+#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
+#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID)
+#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING)
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST)
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT)
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY)
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING)
+#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING)
+#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID)
+#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC)
+#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS)
+#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
+#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
+#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
+#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
+#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
+#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
+#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
+#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION)
+#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING)
+
+/*
+ * Definitions of Tertiary Processor-Based VM-Execution Controls.
+ */
+#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT)
+
+#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
+#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
+#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS)
+#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER)
+#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR)
#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
@@ -103,6 +106,7 @@
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
+#define VM_EXIT_LOAD_CET_STATE 0x10000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -116,18 +120,22 @@
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
+#define VM_ENTRY_LOAD_CET_STATE 0x00100000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA 0x00000020
-#define VMX_MISC_ACTIVITY_HLT 0x00000040
-#define VMX_MISC_ZERO_LEN_INS 0x40000000
-
/* VMFUNC functions */
-#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
+
+#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
#define VMFUNC_EPTP_ENTRIES 512
+#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48)
+#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
+#define VMX_BASIC_INOUT BIT_ULL(54)
+#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -138,9 +146,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
}
+static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
+}
+
+static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
+{
+ return revision | ((u64)size << 32) | ((u64)memtype << 50);
+}
+
+#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5)
+#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6)
+#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7)
+#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8)
+#define VMX_MISC_INTEL_PT BIT_ULL(14)
+#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15)
+#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28)
+#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29)
+#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30)
+#define VMX_MISC_MSR_LIST_MULTIPLIER 512
+
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
{
- return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ return vmx_misc & GENMASK_ULL(4, 0);
}
static inline int vmx_misc_cr3_count(u64 vmx_misc)
@@ -162,6 +191,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
POSTED_INTR_NV = 0x00000002,
+ LAST_PID_POINTER_INDEX = 0x00000008,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@@ -219,12 +249,19 @@ enum vmcs_field {
VMREAD_BITMAP_HIGH = 0x00002027,
VMWRITE_BITMAP = 0x00002028,
VMWRITE_BITMAP_HIGH = 0x00002029,
+ VE_INFORMATION_ADDRESS = 0x0000202A,
+ VE_INFORMATION_ADDRESS_HIGH = 0x0000202B,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
ENCLS_EXITING_BITMAP = 0x0000202E,
ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
+ TERTIARY_VM_EXEC_CONTROL = 0x00002034,
+ TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035,
+ SHARED_EPT_POINTER = 0x0000203C,
+ PID_POINTER_TABLE = 0x00002042,
+ PID_POINTER_TABLE_HIGH = 0x00002043,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@@ -273,6 +310,7 @@ enum vmcs_field {
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
PLE_GAP = 0x00004020,
PLE_WINDOW = 0x00004022,
+ NOTIFY_WINDOW = 0x00004024,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
@@ -300,7 +338,7 @@ enum vmcs_field {
GUEST_LDTR_AR_BYTES = 0x00004820,
GUEST_TR_AR_BYTES = 0x00004822,
GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
- GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_ACTIVITY_STATE = 0x00004826,
GUEST_SYSENTER_CS = 0x0000482A,
VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
HOST_IA32_SYSENTER_CS = 0x00004c00,
@@ -334,6 +372,9 @@ enum vmcs_field {
GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
GUEST_SYSENTER_ESP = 0x00006824,
GUEST_SYSENTER_EIP = 0x00006826,
+ GUEST_S_CET = 0x00006828,
+ GUEST_SSP = 0x0000682a,
+ GUEST_INTR_SSP_TABLE = 0x0000682c,
HOST_CR0 = 0x00006c00,
HOST_CR3 = 0x00006c02,
HOST_CR4 = 0x00006c04,
@@ -346,6 +387,9 @@ enum vmcs_field {
HOST_IA32_SYSENTER_EIP = 0x00006c12,
HOST_RSP = 0x00006c14,
HOST_RIP = 0x00006c16,
+ HOST_S_CET = 0x00006c18,
+ HOST_SSP = 0x00006c1a,
+ HOST_INTR_SSP_TABLE = 0x00006c1c
};
/*
@@ -363,20 +407,21 @@ enum vmcs_field {
#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
-#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
-#define INTR_TYPE_RESERVED (1 << 8) /* reserved */
-#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
-#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
-#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
-#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */
+#define INTR_TYPE_EXT_INTR (EVENT_TYPE_EXTINT << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED (EVENT_TYPE_RESERVED << 8) /* reserved */
+#define INTR_TYPE_NMI_INTR (EVENT_TYPE_NMI << 8) /* NMI */
+#define INTR_TYPE_HARD_EXCEPTION (EVENT_TYPE_HWEXC << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR (EVENT_TYPE_SWINT << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION (EVENT_TYPE_PRIV_SWEXC << 8) /* ICE breakpoint */
+#define INTR_TYPE_SOFT_EXCEPTION (EVENT_TYPE_SWEXC << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT (EVENT_TYPE_OTHER << 8) /* other event */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
#define GUEST_INTR_STATE_MOV_SS 0x00000002
#define GUEST_INTR_STATE_SMI 0x00000004
#define GUEST_INTR_STATE_NMI 0x00000008
+#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010
/* GUEST_ACTIVITY_STATE flags */
#define GUEST_ACTIVITY_ACTIVE 0
@@ -492,20 +537,34 @@ enum vmcs_field {
#define VMX_EPTP_PWL_4 0x18ull
#define VMX_EPTP_PWL_5 0x20ull
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
+/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
#define VMX_EPTP_MT_MASK 0x7ull
-#define VMX_EPTP_MT_WB 0x6ull
-#define VMX_EPTP_MT_UC 0x0ull
+#define VMX_EPTP_MT_WB X86_MEMTYPE_WB
+#define VMX_EPTP_MT_UC X86_MEMTYPE_UC
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_ACCESS_BIT (1ull << 8)
#define VMX_EPT_DIRTY_BIT (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
+static inline u8 vmx_eptp_page_walk_level(u64 eptp)
+{
+ u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;
+
+ if (encoded_level == VMX_EPTP_PWL_5)
+ return 5;
+
+ /* @eptp must be pre-validated by the caller. */
+ WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
+ return 4;
+}
+
/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
@@ -521,28 +580,38 @@ struct vmx_msr_entry {
/*
* Exit Qualifications for entry failure during or after loading guest state
*/
-#define ENTRY_FAIL_DEFAULT 0
-#define ENTRY_FAIL_PDPTE 2
-#define ENTRY_FAIL_NMI 3
-#define ENTRY_FAIL_VMCS_LINK_PTR 4
+enum vm_entry_failure_code {
+ ENTRY_FAIL_DEFAULT = 0,
+ ENTRY_FAIL_PDPTE = 2,
+ ENTRY_FAIL_NMI = 3,
+ ENTRY_FAIL_VMCS_LINK_PTR = 4,
+};
/*
* Exit Qualifications for EPT Violations
*/
-#define EPT_VIOLATION_ACC_READ_BIT 0
-#define EPT_VIOLATION_ACC_WRITE_BIT 1
-#define EPT_VIOLATION_ACC_INSTR_BIT 2
-#define EPT_VIOLATION_READABLE_BIT 3
-#define EPT_VIOLATION_WRITABLE_BIT 4
-#define EPT_VIOLATION_EXECUTABLE_BIT 5
-#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
-#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
-#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
-#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
-#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
-#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
-#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
-#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
+#define EPT_VIOLATION_ACC_READ BIT(0)
+#define EPT_VIOLATION_ACC_WRITE BIT(1)
+#define EPT_VIOLATION_ACC_INSTR BIT(2)
+#define EPT_VIOLATION_PROT_READ BIT(3)
+#define EPT_VIOLATION_PROT_WRITE BIT(4)
+#define EPT_VIOLATION_PROT_EXEC BIT(5)
+#define EPT_VIOLATION_EXEC_FOR_RING3_LIN BIT(6)
+#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \
+ EPT_VIOLATION_PROT_WRITE | \
+ EPT_VIOLATION_PROT_EXEC)
+#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
+
+#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
+
+static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
+ (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
+
+/*
+ * Exit Qualifications for NOTIFY VM EXIT
+ */
+#define NOTIFY_VM_CONTEXT_INVALID BIT(0)
/*
* VM-instruction error numbers
@@ -575,6 +644,20 @@ enum vm_instruction_error_number {
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
};
+/*
+ * VM-instruction errors that can be encountered on VM-Enter, used to trace
+ * nested VM-Enter failures reported by hardware. Errors unique to VM-Enter
+ * from a SMI Transfer Monitor are not included as things have gone seriously
+ * sideways if we get one of those...
+ */
+#define VMX_VMENTER_INSTRUCTION_ERRORS \
+ { VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \
+ { VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \
+ { VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \
+ { VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \
+ { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \
+ { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
+
enum vmx_l1d_flush_state {
VMENTER_L1D_FLUSH_AUTO,
VMENTER_L1D_FLUSH_NEVER,
@@ -586,4 +669,13 @@ enum vmx_l1d_flush_state {
extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+struct vmx_ve_information {
+ u32 exit_reason;
+ u32 delivery;
+ u64 exit_qualification;
+ u64 guest_linear_address;
+ u64 guest_physical_address;
+ u16 eptp_index;
+};
+
#endif
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
new file mode 100644
index 000000000000..09b1d7e607c1
--- /dev/null
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VMXFEATURES_H
+#define _ASM_X86_VMXFEATURES_H
+
+/*
+ * Defines VMX CPU feature bits
+ */
+#define NVMXINTS 5 /* N 32-bit words worth of info */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit
+ * is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
+#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */
+#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */
+#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */
+
+/* EPT/VPID features, scattered to bits 16-23 */
+#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */
+#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
+#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */
+
+/* Aggregated APIC features 24-27 */
+#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+
+/* VM-Functions, shifted to bits 28-31 */
+#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */
+
+/* Primary Processor-Based VM-Execution Controls, word 1 */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
+#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */
+#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */
+#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
+#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */
+#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
+#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */
+
+/* Secondary Processor-Based VM-Execution Controls, word 2 */
+#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */
+#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
+#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */
+#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
+#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */
+#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
+#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */
+#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */
+#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */
+
+/* Tertiary Processor-Based VM-Execution Controls, word 3 */
+#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */
+#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..472f0263dbc6 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
@@ -13,13 +14,24 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
return false;
}
#endif
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+ return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
deleted file mode 100644
index 3f32dfc2ab73..000000000000
--- a/arch/x86/include/asm/vvar.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * vvar.h: Shared vDSO/kernel variable declarations
- * Copyright (c) 2011 Andy Lutomirski
- * Subject to the GNU General Public License, version 2
- *
- * A handful of variables are accessible (read-only) from userspace
- * code in the vsyscall page and the vdso. They are declared here.
- * Some other file must define them with DEFINE_VVAR.
- *
- * In normal kernel code, they are used like any other variable.
- * In user code, they are accessed through the VVAR macro.
- *
- * These variables live in a page of kernel data that has an extra RO
- * mapping for userspace. Each variable needs a unique offset within
- * that page; specify that offset with the DECLARE_VVAR macro. (If
- * you mess up, the linker will catch it.)
- */
-
-#ifndef _ASM_X86_VVAR_H
-#define _ASM_X86_VVAR_H
-
-#if defined(__VVAR_KERNEL_LDS)
-
-/* The kernel linker script defines its own magic to put vvars in the
- * right place.
- */
-#define DECLARE_VVAR(offset, type, name) \
- EMIT_VVAR(name, offset)
-
-#else
-
-extern char __vvar_page;
-
-#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name __attribute__((visibility("hidden")));
-
-#define VVAR(name) (vvar_ ## name)
-
-#define DEFINE_VVAR(type, name) \
- type name \
- __attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#endif
-
-/* DECLARE_VVAR(offset, type, name) */
-
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
-
-#undef DECLARE_VVAR
-
-#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 06006b0351f3..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -2,47 +2,15 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
struct word_at_a_time {
const unsigned long one_bits, high_bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
- return mask*0x0001020304050608ul >> 56;
-}
-
-#else /* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001+mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
-}
-
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -56,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
return bits;
}
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
static inline unsigned long create_zero_mask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
@@ -65,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline unsigned long find_zero(unsigned long mask)
{
- return count_masked_bytes(mask);
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
}
+#endif
+
/*
* Load an unaligned word from kernel space.
*
@@ -79,27 +69,15 @@ static inline unsigned long find_zero(unsigned long mask)
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
- unsigned long ret, dummy;
+ unsigned long ret;
- asm(
- "1:\tmov %2,%0\n"
+ asm volatile(
+ "1: mov %[mem], %[ret]\n"
"2:\n"
- ".section .fixup,\"ax\"\n"
- "3:\t"
- "lea %2,%1\n\t"
- "and %3,%1\n\t"
- "mov (%1),%0\n\t"
- "leal %2,%%ecx\n\t"
- "andl %4,%%ecx\n\t"
- "shll $3,%%ecx\n\t"
- "shr %%cl,%0\n\t"
- "jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- :"=&r" (ret),"=&c" (dummy)
- :"m" (*(unsigned long *)addr),
- "i" (-sizeof(unsigned long)),
- "i" (sizeof(unsigned long)-1));
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD)
+ : [ret] "=r" (ret)
+ : [mem] "m" (*(unsigned long *)addr));
+
return ret;
}
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b85a7c54c6a1..6c8a6ead84f6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -2,33 +2,26 @@
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H
-#include <asm/bootparam.h>
-
+struct ghcb;
struct mpc_bus;
struct mpc_cpu;
+struct pt_regs;
struct mpc_table;
struct cpuinfo_x86;
+struct irq_domain;
/**
* struct x86_init_mpparse - platform specific mpparse ops
- * @mpc_record: platform specific mpc record accounting
* @setup_ioapic_ids: platform specific ioapic id override
- * @mpc_apic_id: platform specific mpc apic id assignment
- * @smp_read_mpc_oem: platform specific oem mpc table setup
- * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL)
- * @mpc_oem_bus_info: platform specific mpc bus info
- * @find_smp_config: find the smp configuration
- * @get_smp_config: get the smp configuration
+ * @find_mptable: Find MPTABLE early to reserve the memory region
+ * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init()
+ * @parse_smp_cfg: Parse the SMP configuration data
*/
struct x86_init_mpparse {
- void (*mpc_record)(unsigned int mode);
void (*setup_ioapic_ids)(void);
- int (*mpc_apic_id)(struct mpc_cpu *m);
- void (*smp_read_mpc_oem)(struct mpc_table *mpc);
- void (*mpc_oem_pci_bus)(struct mpc_bus *m);
- void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
- void (*find_smp_config)(void);
- void (*get_smp_config)(unsigned int early);
+ void (*find_mptable)(void);
+ void (*early_parse_smp_cfg)(void);
+ void (*parse_smp_cfg)(void);
};
/**
@@ -37,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
@@ -50,14 +44,16 @@ struct x86_init_resources {
* @pre_vector_init: init code to run before interrupt vectors
* are set up.
* @intr_init: interrupt init code
- * @trap_init: platform specific trap setup
+ * @intr_mode_select: interrupt delivery mode selection
* @intr_mode_init: interrupt delivery mode setup
+ * @create_pci_msi_domain: Create the PCI/MSI interrupt domain
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
- void (*trap_init)(void);
+ void (*intr_mode_select)(void);
void (*intr_mode_init)(void);
+ struct irq_domain *(*create_pci_msi_domain)(void);
};
/**
@@ -83,7 +79,7 @@ struct x86_init_paging {
/**
* struct x86_init_timers - platform specific timer setup
- * @setup_perpcu_clockev: set up the per cpu clock event device for the
+ * @setup_percpu_clockev: set up the per cpu clock event device for the
* boot cpu
* @timer_init: initialize the platform timer (default PIT/HPET)
* @wallclock_init: init the wallclock device
@@ -121,6 +117,7 @@ struct x86_init_pci {
* @init_platform: platform setup
* @guest_late_init: guest late init
* @x2apic_available: X2APIC detection
+ * @msi_ext_dest_id: MSI supports 15-bit APIC IDs
* @init_mem_mapping: setup early mappings during init_mem_mapping()
* @init_after_bootmem: guest init after boot allocator is finished
*/
@@ -128,21 +125,49 @@ struct x86_hyper_init {
void (*init_platform)(void);
void (*guest_late_init)(void);
bool (*x2apic_available)(void);
+ bool (*msi_ext_dest_id)(void);
void (*init_mem_mapping)(void);
void (*init_after_bootmem)(void);
};
/**
* struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_pointer: set RSDP address
* @get_root_pointer: get RSDP address
* @reduced_hw_early_init: hardware reduced platform early init
*/
struct x86_init_acpi {
+ void (*set_root_pointer)(u64 addr);
u64 (*get_root_pointer)(void);
void (*reduced_hw_early_init)(void);
};
/**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare: Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish: Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required: Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required: Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin: Begin the two-step process of converting shared memory back
+ * to private. It stops the new conversions from being started
+ * and waits in-flight conversions to finish, if possible.
+ * @enc_kexec_finish: Finish the two-step process of converting shared memory to
+ * private. All memory is private after the call when
+ * the function returns.
+ * It is called on only one CPU while the others are shut down
+ * and with interrupts disabled.
+ */
+struct x86_guest {
+ int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_tlb_flush_required)(bool enc);
+ bool (*enc_cache_flush_required)(void);
+ void (*enc_kexec_begin)(void);
+ void (*enc_kexec_finish)(void);
+};
+
+/**
* struct x86_init_ops - functions for platform specific setup
*
*/
@@ -163,11 +188,14 @@ struct x86_init_ops {
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
+ * @fixup_cpu_id: fixup function for cpuinfo_x86::topo.pkg_id
+ * @parallel_bringup: Parallel bringup control
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+ bool parallel_bringup;
};
struct timespec64;
@@ -201,7 +229,7 @@ struct x86_legacy_devices {
* given platform/subarch.
* @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
* is absent.
- * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ * @X86_LEGACY_I8042_EXPECTED_PRESENT: the controller is likely to be
* present, the i8042 driver should probe for controller existence.
*/
enum x86_legacy_i8042_state {
@@ -216,6 +244,8 @@ enum x86_legacy_i8042_state {
* @i8042: indicated if we expect the device to have i8042 controller
* present.
* @rtc: this device has a CMOS real-time clock present
+ * @warm_reset: 1 if platform allows warm reset, else 0
+ * @no_vga: 1 if (FADT.boot_flags & ACPI_FADT_NO_VGA) is set, else 0
* @reserve_bios_regions: boot code will search for the EBDA address and the
* start of the 640k - 1M BIOS region. If false, the platform must
* ensure that its memory map correctly reserves sub-1MB regions as needed.
@@ -234,10 +264,26 @@ struct x86_legacy_features {
/**
* struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
*
- * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
+ * @pin_vcpu: pin current vcpu to specified physical
+ * cpu (run rarely)
+ * @sev_es_hcall_prepare: Load additional hypervisor-specific
+ * state into the GHCB when doing a VMMCALL under
+ * SEV-ES. Called from the #VC exception handler.
+ * @sev_es_hcall_finish: Copies state from the GHCB back into the
+ * processor (or pt_regs). Also runs checks on the
+ * state returned from the hypervisor after a
+ * VMMCALL under SEV-ES. Needs to return 'false'
+ * if the checks fail. Called from the #VC
+ * exception handler.
+ * @is_private_mmio: For CoCo VMs, must map MMIO address as private.
+ * Used when device is emulated by a paravisor
+ * layer in the VM context.
*/
struct x86_hyper_runtime {
void (*pin_vcpu)(int cpu);
+ void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
+ bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
+ bool (*is_private_mmio)(u64 addr);
};
/**
@@ -246,8 +292,10 @@ struct x86_hyper_runtime {
* @calibrate_tsc: calibrate TSC, if different from CPU
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
- * @is_untracked_pat_range exclude from PAT logic
- * @nmi_init enable NMI on cpus
+ * @iommu_shutdown: set by an IOMMU driver for shutdown if necessary
+ * @is_untracked_pat_range: exclude from PAT logic
+ * @nmi_init: enable NMI on cpus
+ * @get_nmi_reason: get the reason an NMI was received
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
* @apic_post_init: adjust apic if needed
@@ -259,7 +307,10 @@ struct x86_hyper_runtime {
* possible in x86_early_init_platform_quirks() by
* only using the current x86_hardware_subarch
* semantics.
+ * @realmode_reserve: reserve memory for realmode trampoline
+ * @realmode_init: initialize realmode trampoline
* @hyper: x86 hypervisor specific runtime callbacks
+ * @guest: guest incarnations callbacks
*/
struct x86_platform_ops {
unsigned long (*calibrate_cpu)(void);
@@ -275,16 +326,10 @@ struct x86_platform_ops {
void (*apic_post_init)(void);
struct x86_legacy_features legacy;
void (*set_legacy_features)(void);
+ void (*realmode_reserve)(void);
+ void (*realmode_init)(void);
struct x86_hyper_runtime hyper;
-};
-
-struct pci_dev;
-
-struct x86_msi_ops {
- int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
- void (*teardown_msi_irq)(unsigned int irq);
- void (*teardown_msi_irqs)(struct pci_dev *dev);
- void (*restore_msi_irqs)(struct pci_dev *dev);
+ struct x86_guest guest;
};
struct x86_apic_ops {
@@ -301,6 +346,10 @@ extern struct x86_apic_ops x86_apic_ops;
extern void x86_early_init_platform_quirks(void);
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
+extern bool bool_x86_init_noop(void);
+extern void x86_op_int_noop(int cpu);
extern bool x86_pnpbios_disabled(void);
+extern int set_rtc_noop(const struct timespec64 *now);
+extern void get_rtc_noop(struct timespec64 *now);
#endif
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..a3c29b1496c8 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -89,17 +89,40 @@
* Sub-leaf 2: EAX: host tsc frequency in kHz
*/
+#define XEN_CPUID_TSC_EMULATED (1u << 0)
+#define XEN_CPUID_HOST_TSC_RELIABLE (1u << 1)
+#define XEN_CPUID_RDTSCP_INSTR_AVAIL (1u << 2)
+
+#define XEN_CPUID_TSC_MODE_DEFAULT (0)
+#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE (1u)
+#define XEN_CPUID_TSC_MODE_NEVER_EMULATE (2u)
+#define XEN_CPUID_TSC_MODE_PVRDTSCP (3u)
+
/*
* Leaf 5 (0x40000x04)
* HVM-specific features
* Sub-leaf 0: EAX: Features
* Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
*/
#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+/*
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
+#define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6)
/*
* Leaf 6 (0x40000x05)
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 068d9b067c83..62bdceb594f1 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -23,7 +23,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
/* No need for a barrier -- XCHG is a barrier on x86. */
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
-extern int xen_have_vector_callback;
+extern bool xen_have_vector_callback;
/*
* Events delivered via platform PCI interrupts are always
@@ -34,4 +34,5 @@ static inline bool xen_support_evtchn_rebind(void)
return (!xen_hvm_domain() || xen_have_vector_callback);
}
+extern bool xen_percpu_upcall;
#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ef05bea7010d..a16d4631547c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -38,11 +38,13 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/pgtable.h>
+#include <linux/instrumentation.h>
#include <trace/events/xen.h>
+#include <asm/alternative.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/smap.h>
#include <asm/nospec-branch.h>
@@ -82,15 +84,25 @@ struct xen_dm_op_buf;
* - clobber the rest
*
* The result certainly isn't pretty, and it really shows up cpp's
- * weakness as as macro language. Sorry. (But let's just give thanks
+ * weakness as a macro language. Sorry. (But let's just give thanks
* there aren't more than 5 arguments...)
*/
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
-#define __HYPERCALL "call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x) \
- [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall \
+ __stringify(.global STATIC_CALL_KEY(xen_hypercall);)
+#endif
+
+#define __HYPERCALL \
+ __ADDRESSABLE_xen_hypercall \
+ __stringify(call STATIC_CALL_TRAMP(xen_hypercall))
+
+#define __HYPERCALL_ENTRY(x) "a" (x)
#ifdef CONFIG_X86_32
#define __HYPERCALL_RETREG "eax"
@@ -148,7 +160,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_0ARG(); \
asm volatile (__HYPERCALL \
: __HYPERCALL_0PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER0); \
(type)__res; \
})
@@ -159,7 +171,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_1ARG(a1); \
asm volatile (__HYPERCALL \
: __HYPERCALL_1PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER1); \
(type)__res; \
})
@@ -170,7 +182,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_2ARG(a1, a2); \
asm volatile (__HYPERCALL \
: __HYPERCALL_2PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER2); \
(type)__res; \
})
@@ -181,7 +193,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_3ARG(a1, a2, a3); \
asm volatile (__HYPERCALL \
: __HYPERCALL_3PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER3); \
(type)__res; \
})
@@ -192,7 +204,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_4ARG(a1, a2, a3, a4); \
asm volatile (__HYPERCALL \
: __HYPERCALL_4PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER4); \
(type)__res; \
})
@@ -206,14 +218,28 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
- asm volatile(CALL_NOSPEC
+ asm volatile(__HYPERCALL
: __HYPERCALL_5PARAM
- : [thunk_target] "a" (&hypercall_page[call])
+ : __HYPERCALL_ENTRY(call)
: __HYPERCALL_CLOBBER5);
return (long)__res;
}
+static __always_inline void __xen_stac(void)
+{
+ /*
+ * Suppress objtool seeing the STAC/CLAC and getting confused about it
+ * calling random code with AC=1.
+ */
+ asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+ asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags");
+}
+
static inline long
privcmd_call(unsigned int call,
unsigned long a1, unsigned long a2,
@@ -222,13 +248,14 @@ privcmd_call(unsigned int call,
{
long res;
- stac();
+ __xen_stac();
res = xen_single_call(call, a1, a2, a3, a4, a5);
- clac();
+ __xen_clac();
return res;
}
+#ifdef CONFIG_XEN_PV
static inline int
HYPERVISOR_set_trap_table(struct trap_info *table)
{
@@ -261,7 +288,108 @@ HYPERVISOR_callback_op(int cmd, void *arg)
return _hypercall2(int, callback_op, cmd, arg);
}
+static __always_inline int
+HYPERVISOR_set_debugreg(int reg, unsigned long value)
+{
+ return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static __always_inline unsigned long
+HYPERVISOR_get_debugreg(int reg)
+{
+ return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
static inline int
+HYPERVISOR_update_descriptor(u64 ma, u64 desc)
+{
+ return _hypercall2(int, update_descriptor, ma, desc);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
+ unsigned long flags)
+{
+ return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
+}
+
+static inline int
+HYPERVISOR_set_segment_base(int reg, unsigned long value)
+{
+ return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline void
+MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
+{
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = set;
+
+ trace_xen_mc_entry(mcl, 1);
+}
+
+static inline void
+MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
+ pte_t new_val, unsigned long flags)
+{
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = va;
+ mcl->args[1] = new_val.pte;
+ mcl->args[2] = flags;
+
+ trace_xen_mc_entry(mcl, 3);
+}
+
+static inline void
+MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
+ struct desc_struct desc)
+{
+ mcl->op = __HYPERVISOR_update_descriptor;
+ mcl->args[0] = maddr;
+ mcl->args[1] = *(unsigned long *)&desc;
+
+ trace_xen_mc_entry(mcl, 2);
+}
+
+static inline void
+MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
+ int count, int *success_count, domid_t domid)
+{
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)req;
+ mcl->args[1] = count;
+ mcl->args[2] = (unsigned long)success_count;
+ mcl->args[3] = domid;
+
+ trace_xen_mc_entry(mcl, 4);
+}
+
+static inline void
+MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
+ int *success_count, domid_t domid)
+{
+ mcl->op = __HYPERVISOR_mmuext_op;
+ mcl->args[0] = (unsigned long)op;
+ mcl->args[1] = count;
+ mcl->args[2] = (unsigned long)success_count;
+ mcl->args[3] = domid;
+
+ trace_xen_mc_entry(mcl, 4);
+}
+
+static inline void
+MULTI_stack_switch(struct multicall_entry *mcl,
+ unsigned long ss, unsigned long esp)
+{
+ mcl->op = __HYPERVISOR_stack_switch;
+ mcl->args[0] = ss;
+ mcl->args[1] = esp;
+
+ trace_xen_mc_entry(mcl, 2);
+}
+#endif
+
+static __always_inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
@@ -289,26 +417,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
return _hypercall1(int, platform_op, op);
}
-static inline int
-HYPERVISOR_set_debugreg(int reg, unsigned long value)
-{
- return _hypercall2(int, set_debugreg, reg, value);
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(int reg)
-{
- return _hypercall1(unsigned long, get_debugreg, reg);
-}
-
-static inline int
-HYPERVISOR_update_descriptor(u64 ma, u64 desc)
-{
- if (sizeof(u64) == sizeof(long))
- return _hypercall2(int, update_descriptor, ma, desc);
- return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
-}
-
static inline long
HYPERVISOR_memory_op(unsigned int cmd, void *arg)
{
@@ -322,28 +430,12 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
}
static inline int
-HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
- unsigned long flags)
-{
- if (sizeof(new_val) == sizeof(long))
- return _hypercall3(int, update_va_mapping, va,
- new_val.pte, flags);
- else
- return _hypercall4(int, update_va_mapping, va,
- new_val.pte, new_val.pte >> 32, flags);
-}
-extern int __must_check xen_event_channel_op_compat(int, void *);
-
-static inline int
HYPERVISOR_event_channel_op(int cmd, void *arg)
{
- int rc = _hypercall2(int, event_channel_op, cmd, arg);
- if (unlikely(rc == -ENOSYS))
- rc = xen_event_channel_op_compat(cmd, arg);
- return rc;
+ return _hypercall2(int, event_channel_op, cmd, arg);
}
-static inline int
+static __always_inline int
HYPERVISOR_xen_version(int cmd, void *arg)
{
return _hypercall2(int, xen_version, cmd, arg);
@@ -355,15 +447,10 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
return _hypercall3(int, console_io, cmd, count, str);
}
-extern int __must_check xen_physdev_op_compat(int, void *);
-
static inline int
HYPERVISOR_physdev_op(int cmd, void *arg)
{
- int rc = _hypercall2(int, physdev_op, cmd, arg);
- if (unlikely(rc == -ENOSYS))
- rc = xen_physdev_op_compat(cmd, arg);
- return rc;
+ return _hypercall2(int, physdev_op, cmd, arg);
}
static inline int
@@ -384,14 +471,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
}
-#ifdef CONFIG_X86_64
-static inline int
-HYPERVISOR_set_segment_base(int reg, unsigned long value)
-{
- return _hypercall2(int, set_segment_base, reg, value);
-}
-#endif
-
static inline int
HYPERVISOR_suspend(unsigned long start_info_mfn)
{
@@ -413,13 +492,6 @@ HYPERVISOR_hvm_op(int op, void *arg)
}
static inline int
-HYPERVISOR_tmem_op(
- struct tmem_op *op)
-{
- return _hypercall1(int, tmem_op, op);
-}
-
-static inline int
HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
{
return _hypercall2(int, xenpmu_op, op, arg);
@@ -430,94 +502,10 @@ HYPERVISOR_dm_op(
domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
{
int ret;
- stac();
+ __xen_stac();
ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
- clac();
+ __xen_clac();
return ret;
}
-static inline void
-MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
-{
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = set;
-
- trace_xen_mc_entry(mcl, 1);
-}
-
-static inline void
-MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
- pte_t new_val, unsigned long flags)
-{
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = va;
- if (sizeof(new_val) == sizeof(long)) {
- mcl->args[1] = new_val.pte;
- mcl->args[2] = flags;
- } else {
- mcl->args[1] = new_val.pte;
- mcl->args[2] = new_val.pte >> 32;
- mcl->args[3] = flags;
- }
-
- trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4);
-}
-
-static inline void
-MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
- struct desc_struct desc)
-{
- mcl->op = __HYPERVISOR_update_descriptor;
- if (sizeof(maddr) == sizeof(long)) {
- mcl->args[0] = maddr;
- mcl->args[1] = *(unsigned long *)&desc;
- } else {
- u32 *p = (u32 *)&desc;
-
- mcl->args[0] = maddr;
- mcl->args[1] = maddr >> 32;
- mcl->args[2] = *p++;
- mcl->args[3] = *p;
- }
-
- trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
-}
-
-static inline void
-MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
- int count, int *success_count, domid_t domid)
-{
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)req;
- mcl->args[1] = count;
- mcl->args[2] = (unsigned long)success_count;
- mcl->args[3] = domid;
-
- trace_xen_mc_entry(mcl, 4);
-}
-
-static inline void
-MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
- int *success_count, domid_t domid)
-{
- mcl->op = __HYPERVISOR_mmuext_op;
- mcl->args[0] = (unsigned long)op;
- mcl->args[1] = count;
- mcl->args[2] = (unsigned long)success_count;
- mcl->args[3] = domid;
-
- trace_xen_mc_entry(mcl, 4);
-}
-
-static inline void
-MULTI_stack_switch(struct multicall_entry *mcl,
- unsigned long ss, unsigned long esp)
-{
- mcl->op = __HYPERVISOR_stack_switch;
- mcl->args[0] = ss;
- mcl->args[1] = esp;
-
- trace_xen_mc_entry(mcl, 2);
-}
-
#endif /* _ASM_X86_XEN_HYPERCALL_H */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 39171b3646bb..c2fc7869b996 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -36,25 +36,22 @@
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
+#include <asm/bug.h>
#include <asm/processor.h>
+#define XEN_SIGNATURE "XenVMMXenVMM"
+
static inline uint32_t xen_cpuid_base(void)
{
- return hypervisor_cpuid_base("XenVMMXenVMM", 2);
+ return cpuid_base_hypervisor(XEN_SIGNATURE, 2);
}
-#ifdef CONFIG_XEN
-extern bool xen_hvm_need_lapic(void);
+struct pci_dev;
-static inline bool xen_x2apic_para_available(void)
-{
- return xen_hvm_need_lapic();
-}
+#ifdef CONFIG_XEN_PV_DOM0
+bool xen_initdom_restore_msi(struct pci_dev *dev);
#else
-static inline bool xen_x2apic_para_available(void)
-{
- return (xen_cpuid_base() != 0);
-}
+static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return true; }
#endif
#ifdef CONFIG_HOTPLUG_CPU
@@ -62,6 +59,43 @@ void xen_arch_register_cpu(int num);
void xen_arch_unregister_cpu(int num);
#endif
-extern void xen_set_iopl_mask(unsigned mask);
+#ifdef CONFIG_PVH
+void __init xen_pvh_init(struct boot_params *boot_params);
+void __init mem_map_via_hcall(struct boot_params *boot_params_p);
+#endif
+
+/* Lazy mode for batching updates / context switch */
+enum xen_lazy_mode {
+ XEN_LAZY_NONE,
+ XEN_LAZY_MMU,
+ XEN_LAZY_CPU,
+};
+
+DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode);
+
+static inline void enter_lazy(enum xen_lazy_mode mode)
+{
+ BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE);
+
+ this_cpu_write(xen_lazy_mode, mode);
+}
+
+static inline void leave_lazy(enum xen_lazy_mode mode)
+{
+ BUG_ON(this_cpu_read(xen_lazy_mode) != mode);
+
+ this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
+}
+
+enum xen_lazy_mode xen_get_lazy_mode(void);
+
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+ BUG();
+}
+#endif
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 62ca03ef5c65..a078a2b0f032 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -72,7 +72,7 @@
#endif
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Explicitly size integers that represent pfns in the public interface
* with Xen so that on ARM we can have one ABI that works for 32 and 64
* bit guests. */
@@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct trap_info {
uint8_t vector; /* exception vector */
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
@@ -182,8 +182,11 @@ struct arch_shared_info {
unsigned long p2m_cr3; /* cr3 value of the p2m address space */
unsigned long p2m_vaddr; /* virtual address of the p2m list */
unsigned long p2m_generation; /* generation count of p2m mapping */
+#ifdef CONFIG_X86_32
+ uint32_t wc_sec_hi;
+#endif
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_32
#include <asm/xen/interface_32.h>
@@ -193,7 +196,7 @@ struct arch_shared_info {
#include <asm/pvclock-abi.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -373,18 +376,15 @@ struct xen_pmu_arch {
} c;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Prefix forces emulation of some non-trapping instructions.
* Currently only CPUID.
*/
-#ifdef __ASSEMBLY__
-#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
-#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
-#else
-#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
-#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
-#endif
+#include <asm/emulate_prefix.h>
+
+#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
+#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
#endif /* _ASM_X86_XEN_INTERFACE_H */
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index dc40578abded..74d9768a9cf7 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -44,7 +44,7 @@
*/
#define __HYPERVISOR_VIRT_START 0xF5800000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct cpu_user_regs {
uint32_t ebx;
@@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t;
#define XEN_CALLBACK(__cs, __eip) \
((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index c599ec269a25..38a19edb81a3 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -61,7 +61,7 @@
* RING1 -> RING3 kernel mode.
* RING2 -> RING3 kernel mode.
* RING3 -> RING3 user mode.
- * However RING0 indicates that the guest kernel should return to iteself
+ * However RING0 indicates that the guest kernel should return to itself
* directly with
* orb $3,1*8(%rsp)
* iretq
@@ -77,7 +77,7 @@
#define VGCF_in_syscall (1<<_VGCF_in_syscall)
#define VGCF_IN_SYSCALL VGCF_in_syscall
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct iret_context {
/* Top of stack (%rsp at point of hypercall). */
@@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t;
#define XEN_CALLBACK(__cs, __rip) \
((unsigned long)(__rip))
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_XEN_INTERFACE_64_H */
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
deleted file mode 100644
index 116777e7f387..000000000000
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
-#define _ASM_X86_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- void *vstart = (void*)__get_free_pages(flags, get_order(size));
- *dma_handle = virt_to_phys(vstart);
- return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
- dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 790ce08e41f2..59f642a94b9d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -11,11 +11,10 @@
#include <asm/extable.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
-#include <xen/features.h>
/* Xen machine address */
typedef struct xmaddr {
@@ -97,11 +96,7 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
asm volatile("1: mov %[val], %[ptr]\n"
"2:\n"
- ".section .fixup, \"ax\"\n"
- "3: sub $1, %[ret]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
: [ret] "+r" (ret), [ptr] "=m" (*addr)
: [val] "r" (val));
@@ -111,16 +106,12 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
static inline int xen_safe_read_ulong(const unsigned long *addr,
unsigned long *val)
{
- int ret = 0;
unsigned long rval = ~0ul;
+ int ret = 0;
asm volatile("1: mov %[ptr], %[rval]\n"
"2:\n"
- ".section .fixup, \"ax\"\n"
- "3: sub $1, %[ret]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
: [ret] "+r" (ret), [rval] "+r" (rval)
: [ptr] "m" (*addr));
*val = rval;
@@ -171,7 +162,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
* pfn_to_mfn. This will have to be removed when we figured
* out which call.
*/
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return pfn;
mfn = __pfn_to_mfn(pfn);
@@ -184,7 +175,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return 1;
return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
@@ -219,7 +210,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
* gfn_to_pfn. This will have to be removed when we figure
* out which call.
*/
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return mfn;
pfn = mfn_to_pfn_no_overrides(mfn);
@@ -251,7 +242,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
/* Pseudo-physical <-> Guest conversion */
static inline unsigned long pfn_to_gfn(unsigned long pfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return pfn;
else
return pfn_to_mfn(pfn);
@@ -259,7 +250,7 @@ static inline unsigned long pfn_to_gfn(unsigned long pfn)
static inline unsigned long gfn_to_pfn(unsigned long gfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return gfn;
else
return mfn_to_pfn(gfn);
@@ -293,7 +284,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn;
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return mfn;
pfn = mfn_to_pfn(mfn);
@@ -304,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
+static inline unsigned long virt_to_pfn(const void *v)
+{
+ return PFN_DOWN(__pa(v));
+}
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
@@ -356,9 +350,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
-#define xen_remap(cookie, size) ioremap((cookie), (size));
-#define xen_unmap(cookie) iounmap((cookie))
-
static inline bool xen_arch_need_swiotlb(struct device *dev,
phys_addr_t phys,
dma_addr_t dev_addr)
@@ -366,9 +357,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
return false;
}
-static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
-{
- return __get_free_pages(__GFP_NOWARN, order);
-}
-
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 3506d8c598c1..9015b888edd6 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -14,29 +14,13 @@ static inline int pci_xen_hvm_init(void)
return -1;
}
#endif
-#if defined(CONFIG_XEN_DOM0)
+#ifdef CONFIG_XEN_PV_DOM0
int __init pci_xen_initial_domain(void);
-int xen_find_device_domain_owner(struct pci_dev *dev);
-int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
-int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
static inline int __init pci_xen_initial_domain(void)
{
return -1;
}
-static inline int xen_find_device_domain_owner(struct pci_dev *dev)
-{
- return -1;
-}
-static inline int xen_register_device_domain_owner(struct pci_dev *dev,
- uint16_t domain)
-{
- return -1;
-}
-static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
-{
- return -1;
-}
#endif
#if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 6b56d0d45d15..abde0f44df57 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -2,16 +2,10 @@
#ifndef _ASM_X86_SWIOTLB_XEN_H
#define _ASM_X86_SWIOTLB_XEN_H
-#ifdef CONFIG_SWIOTLB_XEN
-extern int xen_swiotlb;
-extern int __init pci_xen_swiotlb_detect(void);
-extern void __init pci_xen_swiotlb_init(void);
-extern int pci_xen_swiotlb_init_late(void);
-#else
-#define xen_swiotlb (0)
-static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
-static inline void __init pci_xen_swiotlb_init(void) { }
-static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
-#endif
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
+int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
+ unsigned int address_bits,
+ dma_addr_t *dma_handle);
+void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
#endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 45c8605467f1..7b0307acc410 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_XOR_H
#define _ASM_X86_XOR_H
/*
* Optimized RAID-5 checksumming functions for SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -65,7 +57,8 @@
op(i + 3, 3)
static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -116,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -150,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -209,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -246,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -312,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -351,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
@@ -424,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 635eac543922..7a6b9474591e 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_XOR_32_H
#define _ASM_X86_XOR_32_H
/*
* Optimized RAID-5 checksumming functions for MMX.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -29,7 +21,8 @@
#include <asm/fpu/api.h>
static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 7;
@@ -72,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 7;
@@ -121,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 7;
@@ -176,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 7;
@@ -256,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
#undef BLOCK
static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 6;
@@ -303,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 6;
@@ -360,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 6;
@@ -426,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 6;
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 22a7b1870a31..7f81dd5897f4 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_XOR_AVX_H
#define _ASM_X86_XOR_AVX_H
@@ -8,15 +9,8 @@
* Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
-#ifdef CONFIG_AS_AVX
-
#include <linux/compiler.h>
#include <asm/fpu/api.h>
@@ -32,7 +26,8 @@
BLOCK4(8) \
BLOCK4(12)
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1)
{
unsigned long lines = bytes >> 9;
@@ -58,8 +53,9 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 9;
@@ -88,8 +84,10 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 9;
@@ -121,8 +119,11 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 9;
@@ -174,11 +175,4 @@ do { \
#define AVX_SELECT(FASTEST) \
(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
-#else
-
-#define AVX_XOR_SPEED {}
-
-#define AVX_SELECT(FASTEST) (FASTEST)
-
-#endif
#endif
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index f6648e9928b3..39606a856d3b 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
-include include/uapi/asm-generic/Kbuild.asm
-
+# SPDX-License-Identifier: GPL-2.0
generated-y += unistd_32.h
generated-y += unistd_64.h
generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
new file mode 100644
index 000000000000..92d8f256d096
--- /dev/null
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
+#define _UAPI_ASM_X86_AMD_HSMP_H_
+
+#include <linux/types.h>
+
+#pragma pack(4)
+
+#define HSMP_MAX_MSG_LEN 8
+
+/*
+ * HSMP Messages supported
+ */
+enum hsmp_message_ids {
+ HSMP_TEST = 1, /* 01h Increments input value by 1 */
+ HSMP_GET_SMU_VER, /* 02h SMU FW version */
+ HSMP_GET_PROTO_VER, /* 03h HSMP interface version */
+ HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */
+ HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */
+ HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */
+ HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
+ HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */
+ HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */
+ HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */
+ HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */
+ HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */
+ HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */
+ HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */
+ HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */
+ HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
+ HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
+ HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
+ HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
+ HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
+ HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
+ HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
+ HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
+ HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
+ HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
+ HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
+ HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
+ HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */
+ HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
+ HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
+ HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
+ HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
+ HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
+ HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
+ HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */
+ HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
+ HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
+ HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */
+ HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */
+ HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */
+ HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */
+ HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */
+ HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */
+ HSMP_MSG_ID_MAX,
+};
+
+struct hsmp_message {
+ __u32 msg_id; /* Message ID */
+ __u16 num_args; /* Number of input argument words in message */
+ __u16 response_sz; /* Number of expected output/response words */
+ __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */
+ __u16 sock_ind; /* socket number */
+};
+
+enum hsmp_msg_type {
+ HSMP_RSVD = -1,
+ HSMP_SET = 0,
+ HSMP_GET = 1,
+ HSMP_SET_GET = 2,
+};
+
+enum hsmp_proto_versions {
+ HSMP_PROTO_VER2 = 2,
+ HSMP_PROTO_VER3,
+ HSMP_PROTO_VER4,
+ HSMP_PROTO_VER5,
+ HSMP_PROTO_VER6,
+ HSMP_PROTO_VER7
+};
+
+struct hsmp_msg_desc {
+ int num_args;
+ int response_sz;
+ enum hsmp_msg_type type;
+};
+
+/*
+ * User may use these comments as reference, please find the
+ * supported list of messages and message definition in the
+ * HSMP chapter of respective family/model PPR.
+ *
+ * Not supported messages would return -ENOMSG.
+ */
+static const struct hsmp_msg_desc hsmp_msg_desc_table[]
+ __attribute__((unused)) = {
+ /* RESERVED */
+ {0, 0, HSMP_RSVD},
+
+ /*
+ * HSMP_TEST, num_args = 1, response_sz = 1
+ * input: args[0] = xx
+ * output: args[0] = xx + 1
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
+ * output: args[0] = smu fw ver
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
+ * output: args[0] = proto version
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
+ * output: args[0] = socket power in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
+ * input: args[0] = power limit value in mWatts
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = socket power limit value in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
+ * output: args[0] = maximuam socket power limit in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
+ * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
+ * input: args[0] = boost limit value in MHz
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
+ * input: args[0] = apic id
+ * output: args[0] = boost limit value in MHz
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
+ * output: args[0] = proc hot status
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
+ * input: args[0] = min link width[15:8] + max link width[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
+ * input: args[0] = df pstate[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
+ {0, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
+ * output: args[0] = fclk in MHz, args[1] = mclk in MHz
+ */
+ {0, 2, HSMP_GET},
+
+ /*
+ * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = core clock in MHz
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
+ * output: args[0] = average c0 residency
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
+ * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
+ * input: args[0] = nbioid[23:16]
+ * output: args[0] = max dpm level[15:8] + min dpm level[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
+ * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
+ * bw in percentage[7:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
+ * output: args[0] = temperature in degree celsius. [15:8] integer part +
+ * [7:5] fractional part
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = refresh rate[3] + temperature range[2:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
+ * input: args[0] = apic id [31:0]
+ * output: args[0] = frequency in MHz[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
+ * output: args[0] = power in mW[31:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
+ * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = io bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = xgmi bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
+ * input: args[0] = min link width[15:8] + max link width[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
+ * input: args[0] = link rate control value
+ * output: args[0] = previous link rate control value
+ */
+ {1, 1, HSMP_SET},
+
+ /*
+ * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
+ * input: args[0] = power efficiency mode[2:0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /*
+ * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
+ * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
+ * output: args[0] = metrics table version
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
+ */
+ {0, 0, HSMP_GET},
+
+ /*
+ * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
+ * output: args[0] = lower 32 bits of the address
+ * output: args[1] = upper 32 bits of the address
+ */
+ {0, 2, HSMP_GET},
+
+ /*
+ * HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
+ * input: args[0] = min xGMI p-state[15:8] + max xGMI p-state[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] +
+ * disable/enable independent control[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /*
+ * HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] + enable/disable DFC[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /* RESERVED(0x29-0x2f) */
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+
+ /*
+ * HSMP_GET_RAPL_UNITS, response_sz = 1
+ * output: args[0] = tu value[19:16] + esu value[12:8]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
+ * input: args[0] = apic id[15:0]
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {1, 2, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {0, 2, HSMP_GET},
+
+};
+
+/* Metrics table (supported only with proto version 6) */
+struct hsmp_metric_table {
+ __u32 accumulation_counter;
+
+ /* TEMPERATURE */
+ __u32 max_socket_temperature;
+ __u32 max_vr_temperature;
+ __u32 max_hbm_temperature;
+ __u64 max_socket_temperature_acc;
+ __u64 max_vr_temperature_acc;
+ __u64 max_hbm_temperature_acc;
+
+ /* POWER */
+ __u32 socket_power_limit;
+ __u32 max_socket_power_limit;
+ __u32 socket_power;
+
+ /* ENERGY */
+ __u64 timestamp;
+ __u64 socket_energy_acc;
+ __u64 ccd_energy_acc;
+ __u64 xcd_energy_acc;
+ __u64 aid_energy_acc;
+ __u64 hbm_energy_acc;
+
+ /* FREQUENCY */
+ __u32 cclk_frequency_limit;
+ __u32 gfxclk_frequency_limit;
+ __u32 fclk_frequency;
+ __u32 uclk_frequency;
+ __u32 socclk_frequency[4];
+ __u32 vclk_frequency[4];
+ __u32 dclk_frequency[4];
+ __u32 lclk_frequency[4];
+ __u64 gfxclk_frequency_acc[8];
+ __u64 cclk_frequency_acc[96];
+
+ /* FREQUENCY RANGE */
+ __u32 max_cclk_frequency;
+ __u32 min_cclk_frequency;
+ __u32 max_gfxclk_frequency;
+ __u32 min_gfxclk_frequency;
+ __u32 fclk_frequency_table[4];
+ __u32 uclk_frequency_table[4];
+ __u32 socclk_frequency_table[4];
+ __u32 vclk_frequency_table[4];
+ __u32 dclk_frequency_table[4];
+ __u32 lclk_frequency_table[4];
+ __u32 max_lclk_dpm_range;
+ __u32 min_lclk_dpm_range;
+
+ /* XGMI */
+ __u32 xgmi_width;
+ __u32 xgmi_bitrate;
+ __u64 xgmi_read_bandwidth_acc[8];
+ __u64 xgmi_write_bandwidth_acc[8];
+
+ /* ACTIVITY */
+ __u32 socket_c0_residency;
+ __u32 socket_gfx_busy;
+ __u32 dram_bandwidth_utilization;
+ __u64 socket_c0_residency_acc;
+ __u64 socket_gfx_busy_acc;
+ __u64 dram_bandwidth_acc;
+ __u32 max_dram_bandwidth;
+ __u64 dram_bandwidth_utilization_acc;
+ __u64 pcie_bandwidth_acc[4];
+
+ /* THROTTLERS */
+ __u32 prochot_residency_acc;
+ __u32 ppt_residency_acc;
+ __u32 socket_thm_residency_acc;
+ __u32 vr_thm_residency_acc;
+ __u32 hbm_thm_residency_acc;
+ __u32 spare;
+
+ /* New items at the end to maintain driver compatibility */
+ __u32 gfxclk_frequency[8];
+};
+
+/* Reset to default packing */
+#pragma pack()
+
+/* Define unique ioctl command for hsmp msgs using generic _IOWR */
+#define HSMP_BASE_IOCTL_NR 0xF8
+#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
+
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h
index 580e3c567046..6beb55bbefa4 100644
--- a/arch/x86/include/uapi/asm/auxvec.h
+++ b/arch/x86/include/uapi/asm/auxvec.h
@@ -12,9 +12,9 @@
/* entries in ARCH_DLINFO: */
#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
-# define AT_VECTOR_SIZE_ARCH 2
+# define AT_VECTOR_SIZE_ARCH 3
#else /* else it's non-compat x86-64 */
-# define AT_VECTOR_SIZE_ARCH 1
+# define AT_VECTOR_SIZE_ARCH 2
#endif
#endif /* _ASM_X86_AUXVEC_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 60733f137e9a..dafbf581c515 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,14 +2,7 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
-/* setup_data types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-#define SETUP_EFI 4
-#define SETUP_APPLE_PROPERTIES 5
-#define SETUP_JAILHOUSE 6
+#include <asm/setup_data.h>
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -29,8 +22,11 @@
#define XLF_EFI_HANDOVER_32 (1<<2)
#define XLF_EFI_HANDOVER_64 (1<<3)
#define XLF_EFI_KEXEC (1<<4)
+#define XLF_5LEVEL (1<<5)
+#define XLF_5LEVEL_ENABLED (1<<6)
+#define XLF_MEM_ENCRYPTION (1<<7)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/screen_info.h>
@@ -39,14 +35,6 @@
#include <asm/ist.h>
#include <video/edid.h>
-/* extensible setup data list node */
-struct setup_data {
- __u64 next;
- __u32 type;
- __u32 len;
- __u8 data[0];
-};
-
struct setup_header {
__u8 setup_sects;
__u16 root_flags;
@@ -86,6 +74,7 @@ struct setup_header {
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
+ __u32 kernel_info_offset;
} __attribute__((packed));
struct sys_desc_table {
@@ -119,35 +108,10 @@ struct efi_info {
#define E820_MAX_ENTRIES_ZEROPAGE 128
/*
- * The E820 memory region entry of the boot protocol ABI:
- */
-struct boot_e820_entry {
- __u64 addr;
- __u64 size;
- __u32 type;
-} __attribute__((packed));
-
-/*
* Smallest compatible version of jailhouse_setup_data required by this kernel.
*/
#define JAILHOUSE_SETUP_REQUIRED_VERSION 1
-/*
- * The boot loader is passing platform information via this Jailhouse-specific
- * setup data structure.
- */
-struct jailhouse_setup_data {
- __u16 version;
- __u16 compatible_version;
- __u16 pm_timer_address;
- __u16 num_cpus;
- __u64 pci_mmconfig_base;
- __u32 tsc_khz;
- __u32 apic_khz;
- __u8 standard_ioapic;
- __u8 cpu_ids[255];
-} __attribute__((packed));
-
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
@@ -164,7 +128,8 @@ struct boot_params {
__u32 ext_ramdisk_image; /* 0x0c0 */
__u32 ext_ramdisk_size; /* 0x0c4 */
__u32 ext_cmd_line_ptr; /* 0x0c8 */
- __u8 _pad4[116]; /* 0x0cc */
+ __u8 _pad4[112]; /* 0x0cc */
+ __u32 cc_blob_address; /* 0x13c */
struct edid_info edid_info; /* 0x140 */
struct efi_info efi_info; /* 0x1c0 */
__u32 alt_mem_k; /* 0x1e0 */
@@ -211,7 +176,7 @@ struct boot_params {
* handling of page tables.
*
* These enums should only ever be used by x86 code, and the code that uses
- * it should be well contained and compartamentalized.
+ * it should be well contained and compartmentalized.
*
* KVM and Xen HVM do not have a subarch as these are expected to follow
* standard x86 boot entries. If there is a genuine need for "hypervisor" type
@@ -229,10 +194,10 @@ struct boot_params {
* @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
* which start at asm startup_xen() entry point and later jump to the C
* xen_start_kernel() entry point. Both domU and dom0 type of guests are
- * currently supportd through this PV boot path.
+ * currently supported through this PV boot path.
* @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
* systems which do not have the PCI legacy interfaces.
- * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for
+ * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC
* for settop boxes and media devices, the use of a subarch for CE4100
* is more of a hack...
*/
@@ -245,6 +210,6 @@ enum x86_hardware_subarch {
X86_NR_SUBARCHS,
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index 484e3cfd7ef2..149143cab9ff 100644
--- a/arch/x86/include/uapi/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_BYTEORDER_H
#define _ASM_X86_BYTEORDER_H
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index d95d080b30e3..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
@@ -24,6 +43,7 @@
#define DR_TRAP3 (0x8) /* db3 */
#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
+#define DR_BUS_LOCK (0x800) /* bus_lock */
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 2f491efe3a12..55bc66867156 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -54,7 +54,7 @@
*/
#define E820_RESERVED_KERN 128
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct e820entry {
__u64 addr; /* start of memory segment */
@@ -76,7 +76,7 @@ struct e820map {
#define BIOS_ROM_BASE 0xffe00000
#define BIOS_ROM_END 0xffffffff
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h
new file mode 100644
index 000000000000..468e135fa285
--- /dev/null
+++ b/arch/x86/include/uapi/asm/elf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+
+#include <linux/types.h>
+
+struct x86_xfeat_component {
+ __u32 type;
+ __u32 size;
+ __u32 offset;
+ __u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned");
+
+#endif /* _UAPI_ASM_X86_ELF_H */
diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/x86/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/x86/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 6ebaae90e207..054604aba9f0 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -1,8 +1,13 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_HWCAP2_H
#define _ASM_X86_HWCAP2_H
+#include <linux/const.h>
+
/* MONITOR/MWAIT enabled in Ring 3 */
-#define HWCAP2_RING3MWAIT (1 << 0)
+#define HWCAP2_RING3MWAIT _BITUL(0)
+
+/* Kernel allows FSGSBASE instructions available in Ring 3 */
+#define HWCAP2_FSGSBASE _BITUL(1)
#endif
diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/x86/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/x86/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/x86/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7ceff6583652 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -7,11 +7,15 @@
*
*/
+#include <linux/const.h>
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/ioctl.h>
+#include <linux/stddef.h>
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
#define DE_VECTOR 0
#define DB_VECTOR 1
@@ -31,6 +35,11 @@
#define MC_VECTOR 18
#define XM_VECTOR 19
#define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
@@ -38,7 +47,6 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
@@ -47,19 +55,10 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
-struct kvm_memory_alias {
- __u32 slot; /* this has a different namespace than memory slots */
- __u32 flags;
- __u64 guest_phys_addr;
- __u64 memory_size;
- __u64 target_phys_addr;
-};
-
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
@@ -111,6 +110,8 @@ struct kvm_ioapic_state {
#define KVM_NR_IRQCHIPS 3
#define KVM_RUN_X86_SMM (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -157,6 +158,19 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
+struct kvm_sregs2 {
+ /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 flags;
+ __u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
@@ -183,15 +197,40 @@ struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
- struct kvm_msr_entry entries[0];
+ struct kvm_msr_entry entries[];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
- __u32 indices[0];
+ __u32 indices[];
+};
+
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+ KVM_MSR_FILTER_WRITE)
+ __u32 flags;
+ __u32 nmsrs; /* number of msrs in bitmap */
+ __u32 base; /* MSR index the bitmap starts at */
+ __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
};
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#ifndef __KERNEL__
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#endif
+#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
+ __u32 flags;
+ struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
struct kvm_cpuid_entry {
__u32 function;
@@ -206,7 +245,7 @@ struct kvm_cpuid_entry {
struct kvm_cpuid {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry entries[0];
+ struct kvm_cpuid_entry entries[];
};
struct kvm_cpuid_entry2 {
@@ -228,7 +267,7 @@ struct kvm_cpuid_entry2 {
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry2 entries[0];
+ struct kvm_cpuid_entry2 entries[];
};
/* for KVM_GET_PIT and KVM_SET_PIT */
@@ -260,6 +299,7 @@ struct kvm_debug_exit_arch {
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
#define KVM_GUESTDBG_INJECT_DB 0x00040000
#define KVM_GUESTDBG_INJECT_BP 0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ 0x00100000
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
@@ -270,7 +310,8 @@ struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
-#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
struct kvm_pit_state2 {
struct kvm_pit_channel_state channels[3];
@@ -289,6 +330,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
+#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -323,7 +365,10 @@ struct kvm_vcpu_events {
__u8 smm_inside_nmi;
__u8 latched_init;
} smi;
- __u8 reserved[27];
+ struct {
+ __u8 pending;
+ } triple_fault;
+ __u8 reserved[26];
__u8 exception_has_payload;
__u64 exception_payload;
};
@@ -337,9 +382,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[];
};
#define KVM_MAX_XCRS 16
@@ -357,6 +416,35 @@ struct kvm_xcrs {
__u64 padding[16];
};
+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_KVM 3
+
+#define KVM_X86_KVM_REG_SIZE(reg) \
+({ \
+ reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ID(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP 0
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
@@ -378,46 +466,581 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
+#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
+#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
+
+#define KVM_STATE_NESTED_FORMAT_VMX 0
+#define KVM_STATE_NESTED_FORMAT_SVM 1
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
+#define KVM_STATE_NESTED_GIF_SET 0x00000100
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000
+
+#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM 0
+# define KVM_X86_XCOMP_GUEST_SUPP 0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV 1
+# define KVM_X86_SEV_VMSA_FEATURES 0
+# define KVM_X86_SNP_POLICY_BITS 1
+
+struct kvm_vmx_nested_state_data {
+ __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
+struct kvm_vmx_nested_state_hdr {
__u64 vmxon_pa;
- __u64 vmcs_pa;
+ __u64 vmcs12_pa;
struct {
__u16 flags;
} smm;
+
+ __u16 pad;
+
+ __u32 flags;
+ __u64 preemption_timer_deadline;
+};
+
+struct kvm_svm_nested_state_data {
+ /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */
+ __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+ __u64 vmcb_pa;
};
/* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state {
- /* KVM_STATE_* flags */
__u16 flags;
-
- /* 0 for VMX, 1 for SVM. */
__u16 format;
-
- /* 128 for SVM, 128 + VMCS size for VMX. */
__u32 size;
union {
- /* VMXON, VMCS */
- struct kvm_vmx_nested_state vmx;
+ struct kvm_vmx_nested_state_hdr vmx;
+ struct kvm_svm_nested_state_hdr svm;
/* Pad the header to 128 bytes. */
__u8 pad[120];
- };
+ } hdr;
+
+ /*
+ * Define data region as 0 bytes to preserve backwards-compatability
+ * to old definition of kvm_nested_state in order to avoid changing
+ * KVM_{GET,PUT}_NESTED_STATE ioctl values.
+ */
+ union {
+ __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx);
+ __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm);
+ } data;
+};
+
+/* for KVM_CAP_PMU_EVENT_FILTER */
+struct kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[];
+};
+
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
+#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ __u8 runstate_update_flag;
+ union {
+ __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+ __u64 hva;
+ } shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
+ __u64 pad[8];
+ } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+ __u64 hva;
+ __u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
+ /* Guest Migration Extension */
+ KVM_SEV_SEND_CANCEL,
+
+ /* Second time is the charm; improved versions of the above ioctls. */
+ KVM_SEV_INIT2,
+
+ /* SNP-specific commands */
+ KVM_SEV_SNP_LAUNCH_START = 100,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ KVM_SEV_SNP_LAUNCH_FINISH,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u32 pad0;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_init {
+ __u64 vmsa_features;
+ __u32 flags;
+ __u16 ghcb_version;
+ __u16 pad1;
+ __u32 pad2[8];
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_send_start {
+ __u32 policy;
+ __u32 pad0;
+ __u64 pdh_cert_uaddr;
+ __u32 pdh_cert_len;
+ __u32 pad1;
+ __u64 plat_certs_uaddr;
+ __u32 plat_certs_len;
+ __u32 pad2;
+ __u64 amd_certs_uaddr;
+ __u32 amd_certs_len;
+ __u32 pad3;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad4;
+};
+
+struct kvm_sev_send_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_receive_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 pdh_uaddr;
+ __u32 pdh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_receive_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_snp_launch_start {
+ __u64 policy;
+ __u8 gosvw[16];
+ __u16 flags;
+ __u8 pad0[6];
+ __u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_PAGE_TYPE_INVALID 0x0
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u64 len;
+ __u8 type;
+ __u8 pad0;
+ __u16 flags;
+ __u32 pad1;
+ __u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+
+struct kvm_sev_snp_launch_finish {
+ __u64 id_block_uaddr;
+ __u64 id_auth_uaddr;
+ __u8 id_block_en;
+ __u8 auth_key_en;
+ __u8 vcek_disabled;
+ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+ __u8 pad0[3];
+ __u16 flags;
+ __u64 pad1[4];
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
+/*
+ * Masked event layout.
+ * Bits Description
+ * ---- -----------
+ * 7:0 event select (low bits)
+ * 15:8 umask match
+ * 31:16 unused
+ * 35:32 event select (high bits)
+ * 36:54 unused
+ * 55 exclude bit
+ * 63:56 umask mask
+ */
+
+#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \
+ (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \
+ (((mask) & 0xFFULL) << 56) | \
+ (((match) & 0xFFULL) << 8) | \
+ ((__u64)(!!(exclude)) << 55))
+
+#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
+ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
+
+/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
+#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
+#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+
+/* x86-specific KVM_EXIT_HYPERCALL flags. */
+#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0)
+
+#define KVM_X86_DEFAULT_VM 0
+#define KVM_X86_SW_PROTECTED_VM 1
+#define KVM_X86_SEV_VM 2
+#define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
+
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+ KVM_TDX_CAPABILITIES = 0,
+ KVM_TDX_INIT_VM,
+ KVM_TDX_INIT_VCPU,
+ KVM_TDX_INIT_MEM_REGION,
+ KVM_TDX_FINALIZE_VM,
+ KVM_TDX_GET_CPUID,
+
+ KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+ /* enum kvm_tdx_cmd_id */
+ __u32 id;
+ /* flags for sub-commend. If sub-command doesn't use this, set zero. */
+ __u32 flags;
+ /*
+ * data for each sub-command. An immediate or a pointer to the actual
+ * data in process virtual address. If sub-command doesn't use it,
+ * set zero.
+ */
+ __u64 data;
+ /*
+ * Auxiliary error code. The sub-command may return TDX SEAMCALL
+ * status code in addition to -Exxx.
+ */
+ __u64 hw_error;
+};
+
+struct kvm_tdx_capabilities {
+ __u64 supported_attrs;
+ __u64 supported_xfam;
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
+
+ /* Configurable CPUID bits for userspace */
+ struct kvm_cpuid2 cpuid;
+};
+
+struct kvm_tdx_init_vm {
+ __u64 attributes;
+ __u64 xfam;
+ __u64 mrconfigid[6]; /* sha384 digest */
+ __u64 mrowner[6]; /* sha384 digest */
+ __u64 mrownerconfig[6]; /* sha384 digest */
+
+ /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
+ __u64 reserved[12];
+
+ /*
+ * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+ * KVM_SET_CPUID2.
+ * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+ * TDX module directly virtualizes those CPUIDs without VMM. The user
+ * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+ * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
+ * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+ * module doesn't virtualize.
+ */
+ struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
- __u8 data[0];
+struct kvm_tdx_init_mem_region {
+ __u64 source_addr;
+ __u64 gpa;
+ __u64 nr_pages;
};
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec1a316..a1efa7907a0b 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -8,6 +8,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should
@@ -29,6 +30,12 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
+#define KVM_FEATURE_POLL_CONTROL 12
+#define KVM_FEATURE_PV_SCHED_YIELD 13
+#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE 16
+#define KVM_FEATURE_MIGRATION_CONTROL 17
#define KVM_HINTS_REALTIME 0
@@ -47,6 +54,10 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_POLL_CONTROL 0x4b564d05
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08
struct kvm_steal_time {
__u64 steal;
@@ -78,6 +89,21 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0)
+
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY (1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
@@ -109,9 +135,13 @@ struct kvm_mmu_op_release_pt {
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
- __u32 reason;
- __u8 pad[60];
- __u32 enabled;
+ /* Used for 'page not present' events delivered via #PF */
+ __u32 flags;
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ __u32 token;
+
+ __u8 pad[56];
};
#define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index d62ac5db093b..a82c039d8e6a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -12,7 +12,7 @@
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
@@ -44,5 +44,5 @@ struct user_desc {
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_LDT_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 955c2a2e1cf9..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@
/*
* Fields are zero when not available. Also, this struct is shared with
* userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
*/
struct mce {
__u64 status; /* Bank's MCi_STATUS MSR */
@@ -35,6 +36,7 @@ struct mce {
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
__u32 microcode; /* Microcode revision */
+ __u64 kflags; /* Internal kernel use */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfb..ac1e6277212b 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,28 +3,7 @@
#define _ASM_X86_MMAN_H
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags) __pgprot( \
- ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \
- ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \
- ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \
- ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
-#define arch_calc_vm_prot_bits(prot, key) ( \
- ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
- ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
- ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
- ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
-#endif
+#define MAP_ABOVE4G 0x80 /* only map above 4GB */
#include <asm-generic/mman.h>
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 90ab9a795b49..ac83e25bbf37 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -5,19 +5,22 @@
#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/msgbuf.h>
#else
+
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
struct msqid64_ds {
struct ipc64_perm msg_perm;
- __kernel_time_t msg_stime; /* last msgsnd time */
- __kernel_time_t msg_rtime; /* last msgrcv time */
- __kernel_time_t msg_ctime; /* last change time */
+ __kernel_long_t msg_stime; /* last msgsnd time */
+ __kernel_long_t msg_rtime; /* last msgrcv time */
+ __kernel_long_t msg_ctime; /* last change time */
__kernel_ulong_t msg_cbytes; /* current number of bytes on queue */
__kernel_ulong_t msg_qnum; /* number of messages in queue */
__kernel_ulong_t msg_qbytes; /* max number of bytes on queue */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index e7516b402a00..4b8917ca28fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_MSR_H
#define _UAPI_ASM_X86_MSR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/ioctl.h>
@@ -10,5 +10,5 @@
#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
index 376563f2bac1..3a8a8eb8ac3a 100644
--- a/arch/x86/include/uapi/asm/mtrr.h
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -81,14 +81,6 @@ typedef __u8 mtrr_type;
#define MTRR_NUM_FIXED_RANGES 88
#define MTRR_MAX_VAR_RANGES 256
-struct mtrr_state_type {
- struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
- mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
@@ -115,9 +107,9 @@ struct mtrr_state_type {
#define MTRR_NUM_TYPES 7
/*
- * Invalid MTRR memory type. mtrr_type_lookup() returns this value when
- * MTRRs are disabled. Note, this value is allocated from the reserved
- * values (0x7-0xff) of the MTRR memory types.
+ * Invalid MTRR memory type. No longer used outside of MTRR code.
+ * Note, this value is allocated from the reserved values (0x7-0xff) of
+ * the MTRR memory types.
*/
#define MTRR_TYPE_INVALID 0xff
diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/x86/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..7c9d2bb3833b 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,32 @@ enum perf_event_x86_regs {
PERF_REG_X86_R13,
PERF_REG_X86_R14,
PERF_REG_X86_R15,
-
+ /* These are the limits for the GPRs. */
PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+ /* These all need two bits set because they are 128bit */
+ PERF_REG_X86_XMM0 = 32,
+ PERF_REG_X86_XMM1 = 34,
+ PERF_REG_X86_XMM2 = 36,
+ PERF_REG_X86_XMM3 = 38,
+ PERF_REG_X86_XMM4 = 40,
+ PERF_REG_X86_XMM5 = 42,
+ PERF_REG_X86_XMM6 = 44,
+ PERF_REG_X86_XMM7 = 46,
+ PERF_REG_X86_XMM8 = 48,
+ PERF_REG_X86_XMM9 = 50,
+ PERF_REG_X86_XMM10 = 52,
+ PERF_REG_X86_XMM11 = 54,
+ PERF_REG_X86_XMM12 = 56,
+ PERF_REG_X86_XMM13 = 58,
+ PERF_REG_X86_XMM14 = 60,
+ PERF_REG_X86_XMM15 = 62,
+
+ /* These include both GPRs and XMMX registers */
+ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
+
+#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 5a6aac9fa41f..384e2cc6ac19 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -2,16 +2,42 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
+
+#define ARCH_XCOMP_TILECFG 17
+#define ARCH_XCOMP_TILEDATA 18
+
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
+
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+#define ARCH_FORCE_TAGGED_SVA 0x4004
+
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_LOCK 0x5003
+#define ARCH_SHSTK_UNLOCK 0x5004
+#define ARCH_SHSTK_STATUS 0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#define ARCH_SHSTK_WRSS (1ULL << 1)
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index bcba3c643e63..81d0c8bf1137 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -82,6 +82,10 @@
#define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */
+#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT)
+#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */
+#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT)
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
@@ -130,6 +134,19 @@
#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT)
#define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
+#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)
+#define X86_CR4_LASS_BIT 27 /* enable Linear Address Space Separation support */
+#define X86_CR4_LASS _BITUL(X86_CR4_LASS_BIT)
+#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
+#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
+
+#ifdef __x86_64__
+#define X86_CR4_FRED_BIT 32 /* enable FRED kernel entry */
+#define X86_CR4_FRED _BITUL(X86_CR4_FRED_BIT)
+#else
+#define X86_CR4_FRED (0)
+#endif
/*
* x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 16074b9c93bb..5823584dea13 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,7 +25,7 @@
#else /* __i386__ */
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
@@ -57,7 +57,7 @@
#define EFLAGS 144
#define RSP 152
#define SS 160
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* top of stack page */
#define FRAME_SIZE 168
@@ -87,7 +87,7 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#endif
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..e0b5b4f6226b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -7,7 +7,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
/* this struct defines the way the registers are stored on the
@@ -81,6 +81,6 @@ struct pt_regs {
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/x86/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 89de6cd9f0a7..71205b02a191 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SEMBUF_H
#define _ASM_X86_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The semid64_ds structure for x86 architecture.
* Note extra padding because this structure is passed back and forth
@@ -21,9 +23,9 @@ struct semid64_ds {
unsigned long sem_ctime; /* last change time */
unsigned long sem_ctime_high;
#else
- __kernel_time_t sem_otime; /* last semop time */
+ __kernel_long_t sem_otime; /* last semop time */
__kernel_ulong_t __unused1;
- __kernel_time_t sem_ctime; /* last change time */
+ __kernel_long_t sem_ctime; /* last change time */
__kernel_ulong_t __unused2;
#endif
__kernel_ulong_t sem_nsems; /* no. of semaphores in array */
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
new file mode 100644
index 000000000000..2671c4e1b3a0
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_SETUP_DATA_H
+#define _UAPI_ASM_X86_SETUP_DATA_H
+
+/* setup_data/setup_indirect types */
+#define SETUP_NONE 0
+#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
+#define SETUP_PCI 3
+#define SETUP_EFI 4
+#define SETUP_APPLE_PROPERTIES 5
+#define SETUP_JAILHOUSE 6
+#define SETUP_CC_BLOB 7
+#define SETUP_IMA 8
+#define SETUP_RNG_SEED 9
+#define SETUP_KEXEC_KHO 10
+#define SETUP_ENUM_MAX SETUP_KEXEC_KHO
+
+#define SETUP_INDIRECT (1<<31)
+#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+/* extensible setup data list node */
+struct setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
+};
+
+/* extensible setup indirect data node */
+struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+};
+
+/*
+ * The E820 memory region entry of the boot protocol ABI:
+ */
+struct boot_e820_entry {
+ __u64 addr;
+ __u64 size;
+ __u32 type;
+} __attribute__((packed));
+
+/*
+ * The boot loader is passing platform information via this Jailhouse-specific
+ * setup data structure.
+ */
+struct jailhouse_setup_data {
+ struct {
+ __u16 version;
+ __u16 compatible_version;
+ } __attribute__((packed)) hdr;
+ struct {
+ __u16 pm_timer_address;
+ __u16 num_cpus;
+ __u64 pci_mmconfig_base;
+ __u32 tsc_khz;
+ __u32 apic_khz;
+ __u8 standard_ioapic;
+ __u8 cpu_ids[255];
+ } __attribute__((packed)) v1;
+ struct {
+ __u32 flags;
+ } __attribute__((packed)) v2;
+} __attribute__((packed));
+
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+ __u64 addr;
+ __u64 size;
+} __attribute__((packed));
+
+/*
+ * Locations of kexec handover metadata
+ */
+struct kho_data {
+ __u64 fdt_addr;
+ __u64 fdt_size;
+ __u64 scratch_addr;
+ __u64 scratch_size;
+} __attribute__((packed));
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
new file mode 100644
index 000000000000..3c4d52072189
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+#ifndef _UAPI_ASM_X86_SGX_H
+#define _UAPI_ASM_X86_SGX_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/**
+ * enum sgx_page_flags - page control flags
+ * @SGX_PAGE_MEASURE: Measure the page contents with a sequence of
+ * ENCLS[EEXTEND] operations.
+ */
+enum sgx_page_flags {
+ SGX_PAGE_MEASURE = 0x01,
+};
+
+#define SGX_MAGIC 0xA4
+
+#define SGX_IOC_ENCLAVE_CREATE \
+ _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
+#define SGX_IOC_ENCLAVE_ADD_PAGES \
+ _IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
+#define SGX_IOC_ENCLAVE_INIT \
+ _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
+#define SGX_IOC_ENCLAVE_PROVISION \
+ _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
+#define SGX_IOC_VEPC_REMOVE_ALL \
+ _IO(SGX_MAGIC, 0x04)
+#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \
+ _IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions)
+#define SGX_IOC_ENCLAVE_MODIFY_TYPES \
+ _IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types)
+#define SGX_IOC_ENCLAVE_REMOVE_PAGES \
+ _IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages)
+
+/**
+ * struct sgx_enclave_create - parameter structure for the
+ * %SGX_IOC_ENCLAVE_CREATE ioctl
+ * @src: address for the SECS page data
+ */
+struct sgx_enclave_create {
+ __u64 src;
+};
+
+/**
+ * struct sgx_enclave_add_pages - parameter structure for the
+ * %SGX_IOC_ENCLAVE_ADD_PAGE ioctl
+ * @src: start address for the page data
+ * @offset: starting page offset
+ * @length: length of the data (multiple of the page size)
+ * @secinfo: address for the SECINFO data
+ * @flags: page control flags
+ * @count: number of bytes added (multiple of the page size)
+ */
+struct sgx_enclave_add_pages {
+ __u64 src;
+ __u64 offset;
+ __u64 length;
+ __u64 secinfo;
+ __u64 flags;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_init - parameter structure for the
+ * %SGX_IOC_ENCLAVE_INIT ioctl
+ * @sigstruct: address for the SIGSTRUCT data
+ */
+struct sgx_enclave_init {
+ __u64 sigstruct;
+};
+
+/**
+ * struct sgx_enclave_provision - parameter structure for the
+ * %SGX_IOC_ENCLAVE_PROVISION ioctl
+ * @fd: file handle of /dev/sgx_provision
+ */
+struct sgx_enclave_provision {
+ __u64 fd;
+};
+
+/**
+ * struct sgx_enclave_restrict_permissions - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @permissions:new permission bits for pages in range described by @offset
+ * and @length
+ * @result: (output) SGX result code of ENCLS[EMODPR] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_restrict_permissions {
+ __u64 offset;
+ __u64 length;
+ __u64 permissions;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_modify_types - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @page_type: new type for pages in range described by @offset and @length
+ * @result: (output) SGX result code of ENCLS[EMODT] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_modify_types {
+ __u64 offset;
+ __u64 length;
+ __u64 page_type;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @count: (output) bytes successfully changed (multiple of page size)
+ *
+ * Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized
+ * enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * ioctl() should be used to change the page type to PT_TRIM. After that
+ * succeeds ENCLU[EACCEPT] should be run from within the enclave and then
+ * %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal.
+ */
+struct sgx_enclave_remove_pages {
+ __u64 offset;
+ __u64 length;
+ __u64 count;
+};
+
+struct sgx_enclave_run;
+
+/**
+ * typedef sgx_enclave_user_handler_t - Exit handler function accepted by
+ * __vdso_sgx_enter_enclave()
+ * @rdi: RDI at the time of EEXIT, undefined on AEX
+ * @rsi: RSI at the time of EEXIT, undefined on AEX
+ * @rdx: RDX at the time of EEXIT, undefined on AEX
+ * @rsp: RSP (untrusted) at the time of EEXIT or AEX
+ * @r8: R8 at the time of EEXIT, undefined on AEX
+ * @r9: R9 at the time of EEXIT, undefined on AEX
+ * @run: The run instance given by the caller
+ *
+ * The register parameters contain the snapshot of their values at enclave
+ * exit. An invalid ENCLU function number will cause -EINVAL to be returned
+ * to the caller.
+ *
+ * Return:
+ * - <= 0: The given value is returned back to the caller.
+ * - > 0: ENCLU function to invoke, either EENTER or ERESUME.
+ */
+typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx,
+ long rsp, long r8, long r9,
+ struct sgx_enclave_run *run);
+
+/**
+ * struct sgx_enclave_run - the execution context of __vdso_sgx_enter_enclave()
+ * @tcs: TCS used to enter the enclave
+ * @function: The last seen ENCLU function (EENTER, ERESUME or EEXIT)
+ * @exception_vector: The interrupt vector of the exception
+ * @exception_error_code: The exception error code pulled out of the stack
+ * @exception_addr: The address that triggered the exception
+ * @user_handler: User provided callback run on exception
+ * @user_data: Data passed to the user handler
+ * @reserved: Reserved for future extensions
+ *
+ * If @user_handler is provided, the handler will be invoked on all return paths
+ * of the normal flow. The user handler may transfer control, e.g. via a
+ * longjmp() call or a C++ exception, without returning to
+ * __vdso_sgx_enter_enclave().
+ */
+struct sgx_enclave_run {
+ __u64 tcs;
+ __u32 function;
+ __u16 exception_vector;
+ __u16 exception_error_code;
+ __u64 exception_addr;
+ __u64 user_handler;
+ __u64 user_data;
+ __u8 reserved[216];
+};
+
+/**
+ * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(),
+ * a vDSO function to enter an SGX enclave.
+ * @rdi: Pass-through value for RDI
+ * @rsi: Pass-through value for RSI
+ * @rdx: Pass-through value for RDX
+ * @function: ENCLU function, must be EENTER or ERESUME
+ * @r8: Pass-through value for R8
+ * @r9: Pass-through value for R9
+ * @run: struct sgx_enclave_run, must be non-NULL
+ *
+ * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
+ * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile
+ * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
+ * state in accordance with the x86-64 ABI is the responsibility of the enclave
+ * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
+ * code without careful consideration by both the enclave and its runtime.
+ *
+ * All general purpose registers except RAX, RBX and RCX are passed as-is to the
+ * enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are loaded
+ * with @function, asynchronous exit pointer, and @run.tcs respectively.
+ *
+ * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
+ * pre-enclave state, e.g. to retrieve @run.exception and @run.user_handler
+ * after an enclave exit. All other registers are available for use by the
+ * enclave and its runtime, e.g. an enclave can push additional data onto the
+ * stack (and modify RSP) to pass information to the optional user handler (see
+ * below).
+ *
+ * Most exceptions reported on ENCLU, including those that occur within the
+ * enclave, are fixed up and reported synchronously instead of being delivered
+ * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
+ * never fixed up and are always delivered via standard signals. On synchronously
+ * reported exceptions, -EFAULT is returned and details about the exception are
+ * recorded in @run.exception, the optional sgx_enclave_exception struct.
+ *
+ * Return:
+ * - 0: ENCLU function was successfully executed.
+ * - -EINVAL: Invalid ENCL number (neither EENTER nor ERESUME).
+ */
+typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
+ unsigned long rdx, unsigned int function,
+ unsigned long r8, unsigned long r9,
+ struct sgx_enclave_run *run);
+
+#endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 644421f3823b..13775bfdfee2 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -5,20 +5,24 @@
#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/shmbuf.h>
#else
+
+#include <asm/ipcbuf.h>
+#include <asm/posix_types.h>
+
/*
* The shmid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
struct shmid64_ds {
struct ipc64_perm shm_perm; /* operation perms */
- size_t shm_segsz; /* size of segment (bytes) */
- __kernel_time_t shm_atime; /* last attach time */
- __kernel_time_t shm_dtime; /* last detach time */
- __kernel_time_t shm_ctime; /* last change time */
+ __kernel_size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_long_t shm_atime; /* last attach time */
+ __kernel_long_t shm_dtime; /* last detach time */
+ __kernel_long_t shm_ctime; /* last change time */
__kernel_pid_t shm_cpid; /* pid of creator */
__kernel_pid_t shm_lpid; /* pid of last operator */
__kernel_ulong_t shm_nattch; /* no. of current attaches */
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 844d60eb1882..d0d9b331d3a1 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -139,7 +139,7 @@ struct _fpstate_32 {
* The 64-bit FPU frame. (FXSAVE format and later)
*
* Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
- * larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ * larger: 'struct _xstate'. Note that 'struct _xstate' embeds
* 'struct _fpstate' so that you can always assume the _fpstate portion
* exists so that you can check the magic value.
*
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index 6b18e88de8a6..7114801d0499 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_SIGCONTEXT32_H
#define _ASM_X86_SIGCONTEXT32_H
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index e5745d593dc7..1067efabf18b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -2,9 +2,8 @@
#ifndef _UAPI_ASM_X86_SIGNAL_H
#define _UAPI_ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
-#include <linux/time.h>
#include <linux/compiler.h>
/* Avoid too many header ordering problems. */
@@ -17,7 +16,7 @@ struct siginfo;
typedef unsigned long sigset_t;
#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define SIGHUP 1
@@ -62,30 +61,6 @@ typedef unsigned long sigset_t;
#define SIGRTMIN 32
#define SIGRTMAX _NSIG
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP 0x00000001u
-#define SA_NOCLDWAIT 0x00000002u
-#define SA_SIGINFO 0x00000004u
-#define SA_ONSTACK 0x08000000u
-#define SA_RESTART 0x10000000u
-#define SA_NODEFER 0x40000000u
-#define SA_RESETHAND 0x80000000u
-
-#define SA_NOMASK SA_NODEFER
-#define SA_ONESHOT SA_RESETHAND
-
#define SA_RESTORER 0x04000000
#define MINSIGSTKSZ 2048
@@ -93,7 +68,7 @@ typedef unsigned long sigset_t;
#include <asm-generic/signal-defs.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
# ifndef __KERNEL__
@@ -128,9 +103,9 @@ struct sigaction {
typedef struct sigaltstack {
void __user *ss_sp;
int ss_flags;
- size_t ss_size;
+ __kernel_size_t ss_size;
} stack_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/x86/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/x86/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index a9731f8a480f..650e3256ea7d 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -29,6 +29,7 @@
#define SVM_EXIT_WRITE_DR6 0x036
#define SVM_EXIT_WRITE_DR7 0x037
#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_LAST_EXCP 0x05f
#define SVM_EXIT_INTR 0x060
#define SVM_EXIT_NMI 0x061
#define SVM_EXIT_SMI 0x062
@@ -75,9 +76,63 @@
#define SVM_EXIT_MWAIT 0x08b
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
+#define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP 0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP 0x090
+#define SVM_EXIT_CR1_WRITE_TRAP 0x091
+#define SVM_EXIT_CR2_WRITE_TRAP 0x092
+#define SVM_EXIT_CR3_WRITE_TRAP 0x093
+#define SVM_EXIT_CR4_WRITE_TRAP 0x094
+#define SVM_EXIT_CR5_WRITE_TRAP 0x095
+#define SVM_EXIT_CR6_WRITE_TRAP 0x096
+#define SVM_EXIT_CR7_WRITE_TRAP 0x097
+#define SVM_EXIT_CR8_WRITE_TRAP 0x098
+#define SVM_EXIT_CR9_WRITE_TRAP 0x099
+#define SVM_EXIT_CR10_WRITE_TRAP 0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP 0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP 0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP 0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
+#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_BUS_LOCK 0x0a5
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+#define SVM_EXIT_VMGEXIT 0x403
+
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ 0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE 0x80000002
+#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003
+#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004
+#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
+#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
+#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_PSC 0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
+#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
+#define SVM_VMGEXIT_AP_CREATE 1
+#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
+#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
+#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
+#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
+#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
+ /* SW_EXITINFO1[3:0] */ \
+ (((((u64)reason_set) & 0xf)) | \
+ /* SW_EXITINFO1[11:4] */ \
+ ((((u64)reason_code) & 0xff) << 4))
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW 0xf0000000
#define SVM_EXIT_ERR -1
@@ -170,9 +225,27 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
+ { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \
+ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
+ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
+ { SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_BUS_LOCK, "buslock" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
+ { SVM_EXIT_VMGEXIT, "vmgexit" }, \
+ { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \
+ { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \
+ { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
+ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \
+ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \
+ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \
+ { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \
+ { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \
+ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/x86/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/x86/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
deleted file mode 100644
index df55e1ddb0c9..000000000000
--- a/arch/x86/include/uapi/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
-#include <asm-generic/types.h>
-
-#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 30d7d04d72d6..be5e2e747f50 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -2,7 +2,14 @@
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H
-/* x32 syscall flag bit */
+/*
+ * x32 syscall flag bit. Some user programs expect syscall NR macros
+ * and __X32_SYSCALL_BIT to have type int, even though syscall numbers
+ * are, for practical purposes, unsigned long.
+ *
+ * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
+ * thing regardless.
+ */
#define __X32_SYSCALL_BIT 0x40000000
#ifndef __KERNEL__
diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h
index d2ee4e307ef8..18909b8050bc 100644
--- a/arch/x86/include/uapi/asm/vm86.h
+++ b/arch/x86/include/uapi/asm/vm86.h
@@ -97,7 +97,7 @@ struct revectored_struct {
struct vm86_struct {
struct vm86_regs regs;
unsigned long flags;
- unsigned long screen_bitmap;
+ unsigned long screen_bitmap; /* unused, preserved by vm86() */
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
@@ -106,7 +106,7 @@ struct vm86_struct {
/*
* flags masks
*/
-#define VM86_SCREEN_BITMAP 0x0001
+#define VM86_SCREEN_BITMAP 0x0001 /* no longer supported */
struct vm86plus_info_struct {
unsigned long force_return_for_pic:1;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..1baa86dfe029 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -27,12 +27,16 @@
#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_INIT_SIGNAL 3
+#define EXIT_REASON_SIPI_SIGNAL 4
+#define EXIT_REASON_OTHER_SMI 6
-#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_INTERRUPT_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
#define EXIT_REASON_TASK_SWITCH 9
#define EXIT_REASON_CPUID 10
@@ -85,12 +89,22 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
+#define EXIT_REASON_UMWAIT 67
+#define EXIT_REASON_TPAUSE 68
+#define EXIT_REASON_BUS_LOCK 74
+#define EXIT_REASON_NOTIFY 75
+#define EXIT_REASON_SEAMCALL 76
+#define EXIT_REASON_TDCALL 77
+#define EXIT_REASON_MSR_READ_IMM 84
+#define EXIT_REASON_MSR_WRITE_IMM 85
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
+ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
+ { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \
+ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
{ EXIT_REASON_CPUID, "CPUID" }, \
@@ -142,7 +156,17 @@
{ EXIT_REASON_RDSEED, "RDSEED" }, \
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }, \
+ { EXIT_REASON_UMWAIT, "UMWAIT" }, \
+ { EXIT_REASON_TPAUSE, "TPAUSE" }, \
+ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
+ { EXIT_REASON_NOTIFY, "NOTIFY" }, \
+ { EXIT_REASON_TDCALL, "TDCALL" }, \
+ { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
+ { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
+
+#define VMX_EXIT_REASON_FLAGS \
+ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2