diff options
Diffstat (limited to 'arch/x86')
238 files changed, 2990 insertions, 4568 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2aed8012e9c..9c9bc348c412 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -73,7 +73,6 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY @@ -932,36 +931,6 @@ config GART_IOMMU If unsure, say Y. -config CALGARY_IOMMU - bool "IBM Calgary IOMMU support" - select IOMMU_HELPER - select SWIOTLB - depends on X86_64 && PCI - ---help--- - Support for hardware IOMMUs in IBM's xSeries x366 and x460 - systems. Needed to run systems with more than 3GB of memory - properly with 32-bit PCI devices that do not support DAC - (Double Address Cycle). Calgary also supports bus level - isolation, where all DMAs pass through the IOMMU. This - prevents them from going anywhere except their intended - destination. This catches hard-to-find kernel bugs and - mis-behaving drivers and devices that do not use the DMA-API - properly to set up their DMA buffers. The IOMMU can be - turned off at boot time with the iommu=off parameter. - Normally the kernel will make the right choice by itself. - If unsure, say Y. - -config CALGARY_IOMMU_ENABLED_BY_DEFAULT - def_bool y - prompt "Should Calgary be enabled by default?" - depends on CALGARY_IOMMU - ---help--- - Should Calgary be enabled by default? if you choose 'y', Calgary - will be used (if it exists). If you choose 'n', Calgary will not be - used even if it exists. If you choose 'n' and would like to use - Calgary anyway, pass 'iommu=calgary' on the kernel command line. - If unsure, say Y. - config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL @@ -1000,8 +969,8 @@ config NR_CPUS_RANGE_END config NR_CPUS_RANGE_END int depends on X86_64 - default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK) - default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK) + default 8192 if SMP && CPUMASK_OFFSTACK + default 512 if SMP && !CPUMASK_OFFSTACK default 1 if !SMP config NR_CPUS_DEFAULT @@ -1254,6 +1223,24 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. +config X86_IOPL_IOPERM + bool "IOPERM and IOPL Emulation" + default y + ---help--- + This enables the ioperm() and iopl() syscalls which are necessary + for legacy applications. + + Legacy IOPL support is an overbroad mechanism which allows user + space aside of accessing all 65536 I/O ports also to disable + interrupts. To gain this access the caller needs CAP_SYS_RAWIO + capabilities and permission from potentially active security + modules. + + The emulation restricts the functionality of the syscall to + only allowing the full range I/O port access, but prevents the + ability to disable interrupts from user space which would be + granted if the hardware IOPL mechanism would be used. + config TOSHIBA tristate "Toshiba Laptop support" depends on X86_32 @@ -1492,6 +1479,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" + default y select DYNAMIC_MEMORY_LAYOUT select SPARSEMEM_VMEMMAP depends on X86_64 @@ -1751,7 +1739,7 @@ config X86_RESERVE_LOW config MATH_EMULATION bool depends on MODIFY_LDT_SYSCALL - prompt "Math emulation" if X86_32 + prompt "Math emulation" if X86_32 && (M486SX || MELAN) ---help--- Linux can emulate a math coprocessor (used for floating point operations) if you don't have one. 486DX and Pentium processors have @@ -1880,16 +1868,16 @@ config X86_SMAP If unsure, say Y. -config X86_INTEL_UMIP +config X86_UMIP def_bool y - depends on CPU_SUP_INTEL - prompt "Intel User Mode Instruction Prevention" if EXPERT + depends on CPU_SUP_INTEL || CPU_SUP_AMD + prompt "User Mode Instruction Prevention" if EXPERT ---help--- - The User Mode Instruction Prevention (UMIP) is a security - feature in newer Intel processors. If enabled, a general - protection fault is issued if the SGDT, SLDT, SIDT, SMSW - or STR instructions are executed in user mode. These instructions - unnecessarily expose information about the hardware state. + User Mode Instruction Prevention (UMIP) is a security feature in + some x86 processors. If enabled, a general protection fault is + issued if the SGDT, SLDT, SIDT, SMSW or STR instructions are + executed in user mode. These instructions unnecessarily expose + information about the hardware state. The vast majority of applications do not use these instructions. For the very few that do, software emulation is provided in diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8e29c991ba3e..af9c967782f6 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -50,12 +50,19 @@ choice See each option's help text for additional details. If you don't know what to do, choose "486". +config M486SX + bool "486SX" + depends on X86_32 + ---help--- + Select this for an 486-class CPU without an FPU such as + AMD/Cyrix/IBM/Intel SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5S. + config M486 - bool "486" + bool "486DX" depends on X86_32 ---help--- Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel - 486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + 486DX/DX2/DX4 and UMC U5D. config M586 bool "586/K5/5x86/6x86/6x86MX" @@ -312,20 +319,20 @@ config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU - default "4" if MELAN || M486 || MGEODEGX1 + default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX config X86_F00F_BUG def_bool y - depends on M586MMX || M586TSC || M586 || M486 + depends on M586MMX || M586TSC || M586 || M486SX || M486 config X86_INVD_BUG def_bool y - depends on M486 + depends on M486SX || M486 config X86_ALIGNMENT_16 def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1 config X86_INTEL_USERCOPY def_bool y @@ -378,7 +385,7 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y - depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML + depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT @@ -402,7 +409,7 @@ config CPU_SUP_INTEL config CPU_SUP_CYRIX_32 default y bool "Support Cyrix processors" if PROCESSOR_SELECT - depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) + depends on M486SX || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) ---help--- This enables detection, tunings and quirks for Cyrix processors @@ -470,7 +477,7 @@ config CPU_SUP_TRANSMETA_32 config CPU_SUP_UMC_32 default y bool "Support UMC processors" if PROCESSOR_SELECT - depends on M486 || (EXPERT && !64BIT) + depends on M486SX || M486 || (EXPERT && !64BIT) ---help--- This enables detection, tunings and quirks for UMC processors diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1f5faf8606b4..cd3056759880 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -10,6 +10,7 @@ else tune = $(call cc-option,-mcpu=$(1),$(2)) endif +cflags-$(CONFIG_M486SX) += -march=i486 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 cflags-$(CONFIG_M586TSC) += -march=i586 diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index e2839b5c246c..95410d6ee2ff 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -67,6 +67,7 @@ clean-files += cpustr.h KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) GCOV_PROFILE := n UBSAN_SANITIZE := n @@ -87,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6b84afdd7538..aa976adb7094 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -38,6 +38,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += -Wno-pointer-sign +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -72,8 +73,8 @@ $(obj)/../voffset.h: vmlinux FORCE $(obj)/misc.o: $(obj)/../voffset.h -vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ - $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ +vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \ + $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ $(obj)/piggy.o $(obj)/cpuflags.o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 82bc60c8acb2..72b08fde6de6 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -554,7 +554,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - e820_type = E820_TYPE_RAM; + if (efi_soft_reserve_enabled() && + (d->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else + e820_type = E820_TYPE_RAM; break; case EFI_ACPI_MEMORY_NVS: @@ -782,6 +786,9 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) /* Ask the firmware to clear memory on unclean shutdown */ efi_enable_reset_attack_mitigation(sys_table); + + efi_random_get_seed(sys_table); + efi_retrieve_tpm2_eventlog(sys_table); setup_graphics(boot_params); diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S index 257e341fd2c8..ed6c351d34ed 100644 --- a/arch/x86/boot/compressed/efi_stub_32.S +++ b/arch/x86/boot/compressed/efi_stub_32.S @@ -24,7 +24,7 @@ */ .text -ENTRY(efi_call_phys) +SYM_FUNC_START(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found @@ -77,7 +77,7 @@ ENTRY(efi_call_phys) movl saved_return_addr(%edx), %ecx pushl %ecx ret -ENDPROC(efi_call_phys) +SYM_FUNC_END(efi_call_phys) .previous .data diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index bff9ab7c6317..593913692d16 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -23,7 +23,7 @@ .code64 .text -ENTRY(efi64_thunk) +SYM_FUNC_START(efi64_thunk) push %rbp push %rbx @@ -97,14 +97,14 @@ ENTRY(efi64_thunk) pop %rbx pop %rbp ret -ENDPROC(efi64_thunk) +SYM_FUNC_END(efi64_thunk) -ENTRY(efi_exit32) +SYM_FUNC_START_LOCAL(efi_exit32) movq func_rt_ptr(%rip), %rax push %rax mov %rdi, %rax ret -ENDPROC(efi_exit32) +SYM_FUNC_END(efi_exit32) .code32 /* @@ -112,7 +112,7 @@ ENDPROC(efi_exit32) * * The stack should represent the 32-bit calling convention. */ -ENTRY(efi_enter32) +SYM_FUNC_START_LOCAL(efi_enter32) movl $__KERNEL_DS, %eax movl %eax, %ds movl %eax, %es @@ -172,20 +172,23 @@ ENTRY(efi_enter32) btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 lret -ENDPROC(efi_enter32) +SYM_FUNC_END(efi_enter32) .data .balign 8 - .global efi32_boot_gdt -efi32_boot_gdt: .word 0 - .quad 0 +SYM_DATA_START(efi32_boot_gdt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_gdt) + +SYM_DATA_START_LOCAL(save_gdt) + .word 0 + .quad 0 +SYM_DATA_END(save_gdt) -save_gdt: .word 0 - .quad 0 -func_rt_ptr: .quad 0 +SYM_DATA_LOCAL(func_rt_ptr, .quad 0) - .global efi_gdt64 -efi_gdt64: +SYM_DATA_START(efi_gdt64) .word efi_gdt64_end - efi_gdt64 .long 0 /* Filled out by user */ .word 0 @@ -194,4 +197,4 @@ efi_gdt64: .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ -efi_gdt64_end: +SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5e30eaaf8576..f2dfd6d083ef 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -61,7 +61,7 @@ .hidden _egot __HEAD -ENTRY(startup_32) +SYM_FUNC_START(startup_32) cld /* * Test KEEP_SEGMENTS flag to see if the bootloader is asking @@ -142,14 +142,14 @@ ENTRY(startup_32) */ leal .Lrelocated(%ebx), %eax jmp *%eax -ENDPROC(startup_32) +SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB /* * We don't need the return address, so set up the stack so efi_main() can find * its arguments. */ -ENTRY(efi_pe_entry) +SYM_FUNC_START(efi_pe_entry) add $0x4, %esp call 1f @@ -174,9 +174,9 @@ ENTRY(efi_pe_entry) pushl %eax pushl %ecx jmp 2f /* Skip efi_config initialization */ -ENDPROC(efi_pe_entry) +SYM_FUNC_END(efi_pe_entry) -ENTRY(efi32_stub_entry) +SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp popl %ecx popl %edx @@ -205,11 +205,11 @@ fail: movl BP_code32_start(%esi), %eax leal startup_32(%eax), %eax jmp *%eax -ENDPROC(efi32_stub_entry) +SYM_FUNC_END(efi32_stub_entry) #endif .text -.Lrelocated: +SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* * Clear BSS (stack is currently empty) @@ -260,6 +260,7 @@ ENDPROC(efi32_stub_entry) */ xorl %ebx, %ebx jmp *%eax +SYM_FUNC_END(.Lrelocated) #ifdef CONFIG_EFI_STUB .data diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d98cd483377e..58a512e33d8d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -45,7 +45,7 @@ __HEAD .code32 -ENTRY(startup_32) +SYM_FUNC_START(startup_32) /* * 32bit entry is 0 and it is ABI so immutable! * If we come here directly from a bootloader, @@ -222,11 +222,11 @@ ENTRY(startup_32) /* Jump from 32bit compatibility mode into 64bit mode. */ lret -ENDPROC(startup_32) +SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_MIXED .org 0x190 -ENTRY(efi32_stub_entry) +SYM_FUNC_START(efi32_stub_entry) add $0x4, %esp /* Discard return address */ popl %ecx popl %edx @@ -245,12 +245,12 @@ ENTRY(efi32_stub_entry) movl %eax, efi_config(%ebp) jmp startup_32 -ENDPROC(efi32_stub_entry) +SYM_FUNC_END(efi32_stub_entry) #endif .code64 .org 0x200 -ENTRY(startup_64) +SYM_CODE_START(startup_64) /* * 64bit entry is 0x200 and it is ABI so immutable! * We come here either from startup_32 or directly from a @@ -442,11 +442,12 @@ trampoline_return: */ leaq .Lrelocated(%rbx), %rax jmp *%rax +SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB /* The entry point for the PE/COFF executable is efi_pe_entry. */ -ENTRY(efi_pe_entry) +SYM_FUNC_START(efi_pe_entry) movq %rcx, efi64_config(%rip) /* Handle */ movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ @@ -495,10 +496,10 @@ fail: movl BP_code32_start(%esi), %eax leaq startup_64(%rax), %rax jmp *%rax -ENDPROC(efi_pe_entry) +SYM_FUNC_END(efi_pe_entry) .org 0x390 -ENTRY(efi64_stub_entry) +SYM_FUNC_START(efi64_stub_entry) movq %rdi, efi64_config(%rip) /* Handle */ movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ @@ -507,11 +508,11 @@ ENTRY(efi64_stub_entry) movq %rdx, %rsi jmp handover_entry -ENDPROC(efi64_stub_entry) +SYM_FUNC_END(efi64_stub_entry) #endif .text -.Lrelocated: +SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* * Clear BSS (stack is currently empty) @@ -540,6 +541,7 @@ ENDPROC(efi64_stub_entry) * Jump to the decompressed kernel. */ jmp *%rax +SYM_FUNC_END(.Lrelocated) /* * Adjust the global offset table @@ -570,7 +572,7 @@ ENDPROC(efi64_stub_entry) * ECX contains the base address of the trampoline memory. * Non zero RDX means trampoline needs to enable 5-level paging. */ -ENTRY(trampoline_32bit_src) +SYM_CODE_START(trampoline_32bit_src) /* Set up data and stack segments */ movl $__KERNEL_DS, %eax movl %eax, %ds @@ -633,11 +635,13 @@ ENTRY(trampoline_32bit_src) movl %eax, %cr0 lret +SYM_CODE_END(trampoline_32bit_src) .code64 -.Lpaging_enabled: +SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) /* Return from the trampoline */ jmp *%rdi +SYM_FUNC_END(.Lpaging_enabled) /* * The trampoline code has a size limit. @@ -647,20 +651,22 @@ ENTRY(trampoline_32bit_src) .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE .code32 -.Lno_longmode: +SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: hlt jmp 1b +SYM_FUNC_END(.Lno_longmode) #include "../../kernel/verify_cpu.S" .data -gdt64: +SYM_DATA_START_LOCAL(gdt64) .word gdt_end - gdt .quad 0 +SYM_DATA_END(gdt64) .balign 8 -gdt: +SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt .long gdt .word 0 @@ -669,25 +675,24 @@ gdt: .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ -gdt_end: +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) #ifdef CONFIG_EFI_STUB -efi_config: - .quad 0 +SYM_DATA_LOCAL(efi_config, .quad 0) #ifdef CONFIG_EFI_MIXED - .global efi32_config -efi32_config: +SYM_DATA_START(efi32_config) .fill 5,8,0 .quad efi64_thunk .byte 0 +SYM_DATA_END(efi32_config) #endif - .global efi64_config -efi64_config: +SYM_DATA_START(efi64_config) .fill 5,8,0 .quad efi_call .byte 1 +SYM_DATA_END(efi64_config) #endif /* CONFIG_EFI_STUB */ /* @@ -695,23 +700,21 @@ efi64_config: */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 -boot_stack: +SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) + +SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 -boot_stack_end: +SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) /* * Space for page tables (not in .bss so not zeroed) */ .section ".pgtable","a",@nobits .balign 4096 -pgtable: - .fill BOOT_PGT_SIZE, 1, 0 +SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0) /* * The page table is going to be used instead of page table in the trampoline * memory. */ -top_pgtable: - .fill PAGE_SIZE, 1, 0 +SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 2e53c056ba20..d7408af55738 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -132,8 +132,14 @@ char *skip_spaces(const char *str) #include "../../../../lib/ctype.c" #include "../../../../lib/cmdline.c" +enum parse_mode { + PARSE_MEMMAP, + PARSE_EFI, +}; + static int -parse_memmap(char *p, unsigned long long *start, unsigned long long *size) +parse_memmap(char *p, unsigned long long *start, unsigned long long *size, + enum parse_mode mode) { char *oldp; @@ -156,8 +162,29 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) *start = memparse(p + 1, &p); return 0; case '@': - /* memmap=nn@ss specifies usable region, should be skipped */ - *size = 0; + if (mode == PARSE_MEMMAP) { + /* + * memmap=nn@ss specifies usable region, should + * be skipped + */ + *size = 0; + } else { + unsigned long long flags; + + /* + * efi_fake_mem=nn@ss:attr the attr specifies + * flags that might imply a soft-reservation. + */ + *start = memparse(p + 1, &p); + if (p && *p == ':') { + p++; + if (kstrtoull(p, 0, &flags) < 0) + *size = 0; + else if (flags & EFI_MEMORY_SP) + return 0; + } + *size = 0; + } /* Fall through */ default: /* @@ -172,7 +199,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) return -EINVAL; } -static void mem_avoid_memmap(char *str) +static void mem_avoid_memmap(enum parse_mode mode, char *str) { static int i; @@ -187,7 +214,7 @@ static void mem_avoid_memmap(char *str) if (k) *k++ = 0; - rc = parse_memmap(str, &start, &size); + rc = parse_memmap(str, &start, &size, mode); if (rc < 0) break; str = k; @@ -238,7 +265,6 @@ static void parse_gb_huge_pages(char *param, char *val) } } - static void handle_mem_options(void) { char *args = (char *)get_cmd_line_ptr(); @@ -271,7 +297,7 @@ static void handle_mem_options(void) } if (!strcmp(param, "memmap")) { - mem_avoid_memmap(val); + mem_avoid_memmap(PARSE_MEMMAP, val); } else if (strstr(param, "hugepages")) { parse_gb_huge_pages(param, val); } else if (!strcmp(param, "mem")) { @@ -284,6 +310,8 @@ static void handle_mem_options(void) goto out; mem_limit = mem_size; + } else if (!strcmp(param, "efi_fake_mem")) { + mem_avoid_memmap(PARSE_EFI, val); } } @@ -459,6 +487,18 @@ static bool mem_avoid_overlap(struct mem_vector *img, is_overlapping = true; } + if (ptr->type == SETUP_INDIRECT && + ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { + avoid.start = ((struct setup_indirect *)ptr->data)->addr; + avoid.size = ((struct setup_indirect *)ptr->data)->len; + + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { + *overlap = avoid; + earliest = overlap->start; + is_overlapping = true; + } + } + ptr = (struct setup_data *)(unsigned long)ptr->next; } @@ -760,6 +800,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size) if (md->type != EFI_CONVENTIONAL_MEMORY) continue; + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + if (efi_mirror_found && !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) continue; diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S new file mode 100644 index 000000000000..f818ee8fba38 --- /dev/null +++ b/arch/x86/boot/compressed/kernel_info.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm/bootparam.h> + + .section ".rodata.kernel_info", "a" + + .global kernel_info + +kernel_info: + /* Header, Linux top (structure). */ + .ascii "LToP" + /* Size. */ + .long kernel_info_var_len_data - kernel_info + /* Size total. */ + .long kernel_info_end - kernel_info + + /* Maximal allowed type for setup_data and setup_indirect structs. */ + .long SETUP_TYPE_MAX + +kernel_info_var_len_data: + /* Empty for time being... */ +kernel_info_end: diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 6afb7130a387..dd07e7b41b11 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -15,7 +15,7 @@ .text .code32 -ENTRY(get_sev_encryption_bit) +SYM_FUNC_START(get_sev_encryption_bit) xor %eax, %eax #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -65,10 +65,10 @@ ENTRY(get_sev_encryption_bit) #endif /* CONFIG_AMD_MEM_ENCRYPT */ ret -ENDPROC(get_sev_encryption_bit) +SYM_FUNC_END(get_sev_encryption_bit) .code64 -ENTRY(set_sev_encryption_mask) +SYM_FUNC_START(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp push %rdx @@ -90,12 +90,11 @@ ENTRY(set_sev_encryption_mask) xor %rax, %rax ret -ENDPROC(set_sev_encryption_mask) +SYM_FUNC_END(set_sev_encryption_mask) .data #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -GLOBAL(sme_me_mask) - .quad 0 +SYM_DATA(sme_me_mask, .quad 0) #endif diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 4c5f4f4ad035..6afd05e819d2 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -15,7 +15,7 @@ .code16 .text -GLOBAL(memcpy) +SYM_FUNC_START_NOALIGN(memcpy) pushw %si pushw %di movw %ax, %di @@ -29,9 +29,9 @@ GLOBAL(memcpy) popw %di popw %si retl -ENDPROC(memcpy) +SYM_FUNC_END(memcpy) -GLOBAL(memset) +SYM_FUNC_START_NOALIGN(memset) pushw %di movw %ax, %di movzbl %dl, %eax @@ -44,22 +44,22 @@ GLOBAL(memset) rep; stosb popw %di retl -ENDPROC(memset) +SYM_FUNC_END(memset) -GLOBAL(copy_from_fs) +SYM_FUNC_START_NOALIGN(copy_from_fs) pushw %ds pushw %fs popw %ds calll memcpy popw %ds retl -ENDPROC(copy_from_fs) +SYM_FUNC_END(copy_from_fs) -GLOBAL(copy_to_fs) +SYM_FUNC_START_NOALIGN(copy_to_fs) pushw %es pushw %fs popw %es calll memcpy popw %es retl -ENDPROC(copy_to_fs) +SYM_FUNC_END(copy_to_fs) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 2c11c0f45d49..97d9b6d6c1af 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -300,7 +300,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x020d # header version number (>= 0x0105) + .word 0x020f # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -567,6 +567,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c +kernel_info_offset: .long 0 # Filled in by build.c # End of setup header ##################################################### diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index c22f9a7d1aeb..cbec8bd0841f 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -21,7 +21,7 @@ /* * void protected_mode_jump(u32 entrypoint, u32 bootparams); */ -GLOBAL(protected_mode_jump) +SYM_FUNC_START_NOALIGN(protected_mode_jump) movl %edx, %esi # Pointer to boot_params table xorl %ebx, %ebx @@ -40,13 +40,13 @@ GLOBAL(protected_mode_jump) # Transition to 32-bit mode .byte 0x66, 0xea # ljmpl opcode -2: .long in_pm32 # offset +2: .long .Lin_pm32 # offset .word __BOOT_CS # segment -ENDPROC(protected_mode_jump) +SYM_FUNC_END(protected_mode_jump) .code32 .section ".text32","ax" -GLOBAL(in_pm32) +SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32) # Set up data segments for flat 32-bit mode movl %ecx, %ds movl %ecx, %es @@ -72,4 +72,4 @@ GLOBAL(in_pm32) lldt %cx jmpl *%eax # Jump to the 32-bit entrypoint -ENDPROC(in_pm32) +SYM_FUNC_END(.Lin_pm32) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a93d44e58f9c..55e669d29e54 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -56,6 +56,7 @@ u8 buf[SETUP_SECT_MAX*512]; unsigned long efi32_stub_entry; unsigned long efi64_stub_entry; unsigned long efi_pe_entry; +unsigned long kernel_info; unsigned long startup_64; /*----------------------------------------------------------------------*/ @@ -321,6 +322,7 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi32_stub_entry); PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); + PARSE_ZOFS(p, kernel_info); PARSE_ZOFS(p, startup_64); p = strchr(p, '\n'); @@ -410,6 +412,9 @@ int main(int argc, char ** argv) efi_stub_entry_update(); + /* Update kernel_info offset. */ + put_unaligned_le32(kernel_info, &buf[0x268]); + crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) die("Writing setup failed"); diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index d0a5ffeae8df..0b9654c7a05c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -25,7 +25,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_SMP=y -CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_PREEMPT_VOLUNTARY=y diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 4434607e366d..51d46d93efbc 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -71,7 +71,7 @@ * %r8 * %r9 */ -__load_partial: +SYM_FUNC_START_LOCAL(__load_partial) xor %r9d, %r9d pxor MSG, MSG @@ -123,7 +123,7 @@ __load_partial: .Lld_partial_8: ret -ENDPROC(__load_partial) +SYM_FUNC_END(__load_partial) /* * __store_partial: internal ABI @@ -137,7 +137,7 @@ ENDPROC(__load_partial) * %r9 * %r10 */ -__store_partial: +SYM_FUNC_START_LOCAL(__store_partial) mov LEN, %r8 mov DST, %r9 @@ -181,12 +181,12 @@ __store_partial: .Lst_partial_1: ret -ENDPROC(__store_partial) +SYM_FUNC_END(__store_partial) /* * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); */ -ENTRY(crypto_aegis128_aesni_init) +SYM_FUNC_START(crypto_aegis128_aesni_init) FRAME_BEGIN /* load IV: */ @@ -226,13 +226,13 @@ ENTRY(crypto_aegis128_aesni_init) FRAME_END ret -ENDPROC(crypto_aegis128_aesni_init) +SYM_FUNC_END(crypto_aegis128_aesni_init) /* * void crypto_aegis128_aesni_ad(void *state, unsigned int length, * const void *data); */ -ENTRY(crypto_aegis128_aesni_ad) +SYM_FUNC_START(crypto_aegis128_aesni_ad) FRAME_BEGIN cmp $0x10, LEN @@ -378,7 +378,7 @@ ENTRY(crypto_aegis128_aesni_ad) .Lad_out: FRAME_END ret -ENDPROC(crypto_aegis128_aesni_ad) +SYM_FUNC_END(crypto_aegis128_aesni_ad) .macro encrypt_block a s0 s1 s2 s3 s4 i movdq\a (\i * 0x10)(SRC), MSG @@ -402,7 +402,7 @@ ENDPROC(crypto_aegis128_aesni_ad) * void crypto_aegis128_aesni_enc(void *state, unsigned int length, * const void *src, void *dst); */ -ENTRY(crypto_aegis128_aesni_enc) +SYM_FUNC_START(crypto_aegis128_aesni_enc) FRAME_BEGIN cmp $0x10, LEN @@ -493,13 +493,13 @@ ENTRY(crypto_aegis128_aesni_enc) .Lenc_out: FRAME_END ret -ENDPROC(crypto_aegis128_aesni_enc) +SYM_FUNC_END(crypto_aegis128_aesni_enc) /* * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, * const void *src, void *dst); */ -ENTRY(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_BEGIN /* load the state: */ @@ -533,7 +533,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail) FRAME_END ret -ENDPROC(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i movdq\a (\i * 0x10)(SRC), MSG @@ -556,7 +556,7 @@ ENDPROC(crypto_aegis128_aesni_enc_tail) * void crypto_aegis128_aesni_dec(void *state, unsigned int length, * const void *src, void *dst); */ -ENTRY(crypto_aegis128_aesni_dec) +SYM_FUNC_START(crypto_aegis128_aesni_dec) FRAME_BEGIN cmp $0x10, LEN @@ -647,13 +647,13 @@ ENTRY(crypto_aegis128_aesni_dec) .Ldec_out: FRAME_END ret -ENDPROC(crypto_aegis128_aesni_dec) +SYM_FUNC_END(crypto_aegis128_aesni_dec) /* * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, * const void *src, void *dst); */ -ENTRY(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) FRAME_BEGIN /* load the state: */ @@ -697,13 +697,13 @@ ENTRY(crypto_aegis128_aesni_dec_tail) FRAME_END ret -ENDPROC(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) /* * void crypto_aegis128_aesni_final(void *state, void *tag_xor, * u64 assoclen, u64 cryptlen); */ -ENTRY(crypto_aegis128_aesni_final) +SYM_FUNC_START(crypto_aegis128_aesni_final) FRAME_BEGIN /* load the state: */ @@ -744,4 +744,4 @@ ENTRY(crypto_aegis128_aesni_final) FRAME_END ret -ENDPROC(crypto_aegis128_aesni_final) +SYM_FUNC_END(crypto_aegis128_aesni_final) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 5f6a5af9c489..ec437db1fa54 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -544,11 +544,11 @@ ddq_add_8: * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, * unsigned int num_bytes) */ -ENTRY(aes_ctr_enc_128_avx_by8) +SYM_FUNC_START(aes_ctr_enc_128_avx_by8) /* call the aes main loop */ do_aes_ctrmain KEY_128 -ENDPROC(aes_ctr_enc_128_avx_by8) +SYM_FUNC_END(aes_ctr_enc_128_avx_by8) /* * routine to do AES192 CTR enc/decrypt "by8" @@ -557,11 +557,11 @@ ENDPROC(aes_ctr_enc_128_avx_by8) * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, * unsigned int num_bytes) */ -ENTRY(aes_ctr_enc_192_avx_by8) +SYM_FUNC_START(aes_ctr_enc_192_avx_by8) /* call the aes main loop */ do_aes_ctrmain KEY_192 -ENDPROC(aes_ctr_enc_192_avx_by8) +SYM_FUNC_END(aes_ctr_enc_192_avx_by8) /* * routine to do AES256 CTR enc/decrypt "by8" @@ -570,8 +570,8 @@ ENDPROC(aes_ctr_enc_192_avx_by8) * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, * unsigned int num_bytes) */ -ENTRY(aes_ctr_enc_256_avx_by8) +SYM_FUNC_START(aes_ctr_enc_256_avx_by8) /* call the aes main loop */ do_aes_ctrmain KEY_256 -ENDPROC(aes_ctr_enc_256_avx_by8) +SYM_FUNC_END(aes_ctr_enc_256_avx_by8) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e40bdf024ba7..d28503f99f58 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1592,7 +1592,7 @@ _esb_loop_\@: * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ -ENTRY(aesni_gcm_dec) +SYM_FUNC_START(aesni_gcm_dec) FUNC_SAVE GCM_INIT %arg6, arg7, arg8, arg9 @@ -1600,7 +1600,7 @@ ENTRY(aesni_gcm_dec) GCM_COMPLETE arg10, arg11 FUNC_RESTORE ret -ENDPROC(aesni_gcm_dec) +SYM_FUNC_END(aesni_gcm_dec) /***************************************************************************** @@ -1680,7 +1680,7 @@ ENDPROC(aesni_gcm_dec) * * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ -ENTRY(aesni_gcm_enc) +SYM_FUNC_START(aesni_gcm_enc) FUNC_SAVE GCM_INIT %arg6, arg7, arg8, arg9 @@ -1689,7 +1689,7 @@ ENTRY(aesni_gcm_enc) GCM_COMPLETE arg10, arg11 FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc) +SYM_FUNC_END(aesni_gcm_enc) /***************************************************************************** * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. @@ -1702,12 +1702,12 @@ ENDPROC(aesni_gcm_enc) * const u8 *aad, // Additional Authentication Data (AAD) * u64 aad_len) // Length of AAD in bytes. */ -ENTRY(aesni_gcm_init) +SYM_FUNC_START(aesni_gcm_init) FUNC_SAVE GCM_INIT %arg3, %arg4,%arg5, %arg6 FUNC_RESTORE ret -ENDPROC(aesni_gcm_init) +SYM_FUNC_END(aesni_gcm_init) /***************************************************************************** * void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. @@ -1717,12 +1717,12 @@ ENDPROC(aesni_gcm_init) * const u8 *in, // Plaintext input * u64 plaintext_len, // Length of data in bytes for encryption. */ -ENTRY(aesni_gcm_enc_update) +SYM_FUNC_START(aesni_gcm_enc_update) FUNC_SAVE GCM_ENC_DEC enc FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc_update) +SYM_FUNC_END(aesni_gcm_enc_update) /***************************************************************************** * void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. @@ -1732,12 +1732,12 @@ ENDPROC(aesni_gcm_enc_update) * const u8 *in, // Plaintext input * u64 plaintext_len, // Length of data in bytes for encryption. */ -ENTRY(aesni_gcm_dec_update) +SYM_FUNC_START(aesni_gcm_dec_update) FUNC_SAVE GCM_ENC_DEC dec FUNC_RESTORE ret -ENDPROC(aesni_gcm_dec_update) +SYM_FUNC_END(aesni_gcm_dec_update) /***************************************************************************** * void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. @@ -1747,19 +1747,18 @@ ENDPROC(aesni_gcm_dec_update) * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), * // 12 or 8. */ -ENTRY(aesni_gcm_finalize) +SYM_FUNC_START(aesni_gcm_finalize) FUNC_SAVE GCM_COMPLETE %arg3 %arg4 FUNC_RESTORE ret -ENDPROC(aesni_gcm_finalize) +SYM_FUNC_END(aesni_gcm_finalize) #endif -.align 4 -_key_expansion_128: -_key_expansion_256a: +SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) +SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -1769,11 +1768,10 @@ _key_expansion_256a: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret -ENDPROC(_key_expansion_128) -ENDPROC(_key_expansion_256a) +SYM_FUNC_END(_key_expansion_256a) +SYM_FUNC_END_ALIAS(_key_expansion_128) -.align 4 -_key_expansion_192a: +SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -1795,10 +1793,9 @@ _key_expansion_192a: movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP ret -ENDPROC(_key_expansion_192a) +SYM_FUNC_END(_key_expansion_192a) -.align 4 -_key_expansion_192b: +SYM_FUNC_START_LOCAL(_key_expansion_192b) pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -1815,10 +1812,9 @@ _key_expansion_192b: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret -ENDPROC(_key_expansion_192b) +SYM_FUNC_END(_key_expansion_192b) -.align 4 -_key_expansion_256b: +SYM_FUNC_START_LOCAL(_key_expansion_256b) pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 pxor %xmm4, %xmm2 @@ -1828,13 +1824,13 @@ _key_expansion_256b: movaps %xmm2, (TKEYP) add $0x10, TKEYP ret -ENDPROC(_key_expansion_256b) +SYM_FUNC_END(_key_expansion_256b) /* * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, * unsigned int key_len) */ -ENTRY(aesni_set_key) +SYM_FUNC_START(aesni_set_key) FRAME_BEGIN #ifndef __x86_64__ pushl KEYP @@ -1943,12 +1939,12 @@ ENTRY(aesni_set_key) #endif FRAME_END ret -ENDPROC(aesni_set_key) +SYM_FUNC_END(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ -ENTRY(aesni_enc) +SYM_FUNC_START(aesni_enc) FRAME_BEGIN #ifndef __x86_64__ pushl KEYP @@ -1967,7 +1963,7 @@ ENTRY(aesni_enc) #endif FRAME_END ret -ENDPROC(aesni_enc) +SYM_FUNC_END(aesni_enc) /* * _aesni_enc1: internal ABI @@ -1981,8 +1977,7 @@ ENDPROC(aesni_enc) * KEY * TKEYP (T1) */ -.align 4 -_aesni_enc1: +SYM_FUNC_START_LOCAL(_aesni_enc1) movaps (KEYP), KEY # key mov KEYP, TKEYP pxor KEY, STATE # round 0 @@ -2025,7 +2020,7 @@ _aesni_enc1: movaps 0x70(TKEYP), KEY AESENCLAST KEY STATE ret -ENDPROC(_aesni_enc1) +SYM_FUNC_END(_aesni_enc1) /* * _aesni_enc4: internal ABI @@ -2045,8 +2040,7 @@ ENDPROC(_aesni_enc1) * KEY * TKEYP (T1) */ -.align 4 -_aesni_enc4: +SYM_FUNC_START_LOCAL(_aesni_enc4) movaps (KEYP), KEY # key mov KEYP, TKEYP pxor KEY, STATE1 # round 0 @@ -2134,12 +2128,12 @@ _aesni_enc4: AESENCLAST KEY STATE3 AESENCLAST KEY STATE4 ret -ENDPROC(_aesni_enc4) +SYM_FUNC_END(_aesni_enc4) /* * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ -ENTRY(aesni_dec) +SYM_FUNC_START(aesni_dec) FRAME_BEGIN #ifndef __x86_64__ pushl KEYP @@ -2159,7 +2153,7 @@ ENTRY(aesni_dec) #endif FRAME_END ret -ENDPROC(aesni_dec) +SYM_FUNC_END(aesni_dec) /* * _aesni_dec1: internal ABI @@ -2173,8 +2167,7 @@ ENDPROC(aesni_dec) * KEY * TKEYP (T1) */ -.align 4 -_aesni_dec1: +SYM_FUNC_START_LOCAL(_aesni_dec1) movaps (KEYP), KEY # key mov KEYP, TKEYP pxor KEY, STATE # round 0 @@ -2217,7 +2210,7 @@ _aesni_dec1: movaps 0x70(TKEYP), KEY AESDECLAST KEY STATE ret -ENDPROC(_aesni_dec1) +SYM_FUNC_END(_aesni_dec1) /* * _aesni_dec4: internal ABI @@ -2237,8 +2230,7 @@ ENDPROC(_aesni_dec1) * KEY * TKEYP (T1) */ -.align 4 -_aesni_dec4: +SYM_FUNC_START_LOCAL(_aesni_dec4) movaps (KEYP), KEY # key mov KEYP, TKEYP pxor KEY, STATE1 # round 0 @@ -2326,13 +2318,13 @@ _aesni_dec4: AESDECLAST KEY STATE3 AESDECLAST KEY STATE4 ret -ENDPROC(_aesni_dec4) +SYM_FUNC_END(_aesni_dec4) /* * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * size_t len) */ -ENTRY(aesni_ecb_enc) +SYM_FUNC_START(aesni_ecb_enc) FRAME_BEGIN #ifndef __x86_64__ pushl LEN @@ -2386,13 +2378,13 @@ ENTRY(aesni_ecb_enc) #endif FRAME_END ret -ENDPROC(aesni_ecb_enc) +SYM_FUNC_END(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * size_t len); */ -ENTRY(aesni_ecb_dec) +SYM_FUNC_START(aesni_ecb_dec) FRAME_BEGIN #ifndef __x86_64__ pushl LEN @@ -2447,13 +2439,13 @@ ENTRY(aesni_ecb_dec) #endif FRAME_END ret -ENDPROC(aesni_ecb_dec) +SYM_FUNC_END(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * size_t len, u8 *iv) */ -ENTRY(aesni_cbc_enc) +SYM_FUNC_START(aesni_cbc_enc) FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2491,13 +2483,13 @@ ENTRY(aesni_cbc_enc) #endif FRAME_END ret -ENDPROC(aesni_cbc_enc) +SYM_FUNC_END(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * size_t len, u8 *iv) */ -ENTRY(aesni_cbc_dec) +SYM_FUNC_START(aesni_cbc_dec) FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2584,7 +2576,7 @@ ENTRY(aesni_cbc_dec) #endif FRAME_END ret -ENDPROC(aesni_cbc_dec) +SYM_FUNC_END(aesni_cbc_dec) #ifdef __x86_64__ .pushsection .rodata @@ -2604,8 +2596,7 @@ ENDPROC(aesni_cbc_dec) * INC: == 1, in little endian * BSWAP_MASK == endian swapping mask */ -.align 4 -_aesni_inc_init: +SYM_FUNC_START_LOCAL(_aesni_inc_init) movaps .Lbswap_mask, BSWAP_MASK movaps IV, CTR PSHUFB_XMM BSWAP_MASK CTR @@ -2613,7 +2604,7 @@ _aesni_inc_init: MOVQ_R64_XMM TCTR_LOW INC MOVQ_R64_XMM CTR TCTR_LOW ret -ENDPROC(_aesni_inc_init) +SYM_FUNC_END(_aesni_inc_init) /* * _aesni_inc: internal ABI @@ -2630,8 +2621,7 @@ ENDPROC(_aesni_inc_init) * CTR: == output IV, in little endian * TCTR_LOW: == lower qword of CTR */ -.align 4 -_aesni_inc: +SYM_FUNC_START_LOCAL(_aesni_inc) paddq INC, CTR add $1, TCTR_LOW jnc .Linc_low @@ -2642,13 +2632,13 @@ _aesni_inc: movaps CTR, IV PSHUFB_XMM BSWAP_MASK IV ret -ENDPROC(_aesni_inc) +SYM_FUNC_END(_aesni_inc) /* * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * size_t len, u8 *iv) */ -ENTRY(aesni_ctr_enc) +SYM_FUNC_START(aesni_ctr_enc) FRAME_BEGIN cmp $16, LEN jb .Lctr_enc_just_ret @@ -2705,7 +2695,7 @@ ENTRY(aesni_ctr_enc) .Lctr_enc_just_ret: FRAME_END ret -ENDPROC(aesni_ctr_enc) +SYM_FUNC_END(aesni_ctr_enc) /* * _aesni_gf128mul_x_ble: internal ABI @@ -2729,7 +2719,7 @@ ENDPROC(aesni_ctr_enc) * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, * bool enc, u8 *iv) */ -ENTRY(aesni_xts_crypt8) +SYM_FUNC_START(aesni_xts_crypt8) FRAME_BEGIN cmpb $0, %cl movl $0, %ecx @@ -2833,6 +2823,6 @@ ENTRY(aesni_xts_crypt8) FRAME_END ret -ENDPROC(aesni_xts_crypt8) +SYM_FUNC_END(aesni_xts_crypt8) #endif diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 91c039ab5699..bfa1c0b3e5b4 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -1775,12 +1775,12 @@ _initial_blocks_done\@: # const u8 *aad, /* Additional Authentication Data (AAD)*/ # u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ ############################################################# -ENTRY(aesni_gcm_init_avx_gen2) +SYM_FUNC_START(aesni_gcm_init_avx_gen2) FUNC_SAVE INIT GHASH_MUL_AVX, PRECOMPUTE_AVX FUNC_RESTORE ret -ENDPROC(aesni_gcm_init_avx_gen2) +SYM_FUNC_END(aesni_gcm_init_avx_gen2) ############################################################################### #void aesni_gcm_enc_update_avx_gen2( @@ -1790,7 +1790,7 @@ ENDPROC(aesni_gcm_init_avx_gen2) # const u8 *in, /* Plaintext input */ # u64 plaintext_len) /* Length of data in Bytes for encryption. */ ############################################################################### -ENTRY(aesni_gcm_enc_update_avx_gen2) +SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) FUNC_SAVE mov keysize, %eax cmp $32, %eax @@ -1809,7 +1809,7 @@ key_256_enc_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc_update_avx_gen2) +SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) ############################################################################### #void aesni_gcm_dec_update_avx_gen2( @@ -1819,7 +1819,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen2) # const u8 *in, /* Ciphertext input */ # u64 plaintext_len) /* Length of data in Bytes for encryption. */ ############################################################################### -ENTRY(aesni_gcm_dec_update_avx_gen2) +SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) FUNC_SAVE mov keysize,%eax cmp $32, %eax @@ -1838,7 +1838,7 @@ key_256_dec_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 FUNC_RESTORE ret -ENDPROC(aesni_gcm_dec_update_avx_gen2) +SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) ############################################################################### #void aesni_gcm_finalize_avx_gen2( @@ -1848,7 +1848,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen2) # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. # Valid values are 16 (most likely), 12 or 8. */ ############################################################################### -ENTRY(aesni_gcm_finalize_avx_gen2) +SYM_FUNC_START(aesni_gcm_finalize_avx_gen2) FUNC_SAVE mov keysize,%eax cmp $32, %eax @@ -1867,7 +1867,7 @@ key_256_finalize: GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 FUNC_RESTORE ret -ENDPROC(aesni_gcm_finalize_avx_gen2) +SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) #endif /* CONFIG_AS_AVX */ @@ -2746,12 +2746,12 @@ _initial_blocks_done\@: # const u8 *aad, /* Additional Authentication Data (AAD)*/ # u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ ############################################################# -ENTRY(aesni_gcm_init_avx_gen4) +SYM_FUNC_START(aesni_gcm_init_avx_gen4) FUNC_SAVE INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 FUNC_RESTORE ret -ENDPROC(aesni_gcm_init_avx_gen4) +SYM_FUNC_END(aesni_gcm_init_avx_gen4) ############################################################################### #void aesni_gcm_enc_avx_gen4( @@ -2761,7 +2761,7 @@ ENDPROC(aesni_gcm_init_avx_gen4) # const u8 *in, /* Plaintext input */ # u64 plaintext_len) /* Length of data in Bytes for encryption. */ ############################################################################### -ENTRY(aesni_gcm_enc_update_avx_gen4) +SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) FUNC_SAVE mov keysize,%eax cmp $32, %eax @@ -2780,7 +2780,7 @@ key_256_enc_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc_update_avx_gen4) +SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) ############################################################################### #void aesni_gcm_dec_update_avx_gen4( @@ -2790,7 +2790,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen4) # const u8 *in, /* Ciphertext input */ # u64 plaintext_len) /* Length of data in Bytes for encryption. */ ############################################################################### -ENTRY(aesni_gcm_dec_update_avx_gen4) +SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) FUNC_SAVE mov keysize,%eax cmp $32, %eax @@ -2809,7 +2809,7 @@ key_256_dec_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 FUNC_RESTORE ret -ENDPROC(aesni_gcm_dec_update_avx_gen4) +SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) ############################################################################### #void aesni_gcm_finalize_avx_gen4( @@ -2819,7 +2819,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen4) # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. # Valid values are 16 (most likely), 12 or 8. */ ############################################################################### -ENTRY(aesni_gcm_finalize_avx_gen4) +SYM_FUNC_START(aesni_gcm_finalize_avx_gen4) FUNC_SAVE mov keysize,%eax cmp $32, %eax @@ -2838,6 +2838,6 @@ key_256_finalize4: GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 FUNC_RESTORE ret -ENDPROC(aesni_gcm_finalize_avx_gen4) +SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) #endif /* CONFIG_AS_AVX2 */ diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S index 8591938eee26..24910b766bdd 100644 --- a/arch/x86/crypto/blake2s-core.S +++ b/arch/x86/crypto/blake2s-core.S @@ -47,7 +47,7 @@ SIGMA2: .text #ifdef CONFIG_AS_SSSE3 -ENTRY(blake2s_compress_ssse3) +SYM_FUNC_START(blake2s_compress_ssse3) testq %rdx,%rdx je .Lendofloop movdqu (%rdi),%xmm0 @@ -173,11 +173,11 @@ ENTRY(blake2s_compress_ssse3) movdqu %xmm14,0x20(%rdi) .Lendofloop: ret -ENDPROC(blake2s_compress_ssse3) +SYM_FUNC_END(blake2s_compress_ssse3) #endif /* CONFIG_AS_SSSE3 */ #ifdef CONFIG_AS_AVX512 -ENTRY(blake2s_compress_avx512) +SYM_FUNC_START(blake2s_compress_avx512) vmovdqu (%rdi),%xmm0 vmovdqu 0x10(%rdi),%xmm1 vmovdqu 0x20(%rdi),%xmm4 @@ -254,5 +254,5 @@ ENTRY(blake2s_compress_avx512) vmovdqu %xmm4,0x20(%rdi) vzeroupper retq -ENDPROC(blake2s_compress_avx512) +SYM_FUNC_END(blake2s_compress_avx512) #endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 330db7a48af8..4222ac6d6584 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -103,7 +103,7 @@ bswapq RX0; \ xorq RX0, (RIO); -ENTRY(__blowfish_enc_blk) +SYM_FUNC_START(__blowfish_enc_blk) /* input: * %rdi: ctx * %rsi: dst @@ -139,9 +139,9 @@ ENTRY(__blowfish_enc_blk) .L__enc_xor: xor_block(); ret; -ENDPROC(__blowfish_enc_blk) +SYM_FUNC_END(__blowfish_enc_blk) -ENTRY(blowfish_dec_blk) +SYM_FUNC_START(blowfish_dec_blk) /* input: * %rdi: ctx * %rsi: dst @@ -171,7 +171,7 @@ ENTRY(blowfish_dec_blk) movq %r11, %r12; ret; -ENDPROC(blowfish_dec_blk) +SYM_FUNC_END(blowfish_dec_blk) /********************************************************************** 4-way blowfish, four blocks parallel @@ -283,7 +283,7 @@ ENDPROC(blowfish_dec_blk) bswapq RX3; \ xorq RX3, 24(RIO); -ENTRY(__blowfish_enc_blk_4way) +SYM_FUNC_START(__blowfish_enc_blk_4way) /* input: * %rdi: ctx * %rsi: dst @@ -330,9 +330,9 @@ ENTRY(__blowfish_enc_blk_4way) popq %rbx; popq %r12; ret; -ENDPROC(__blowfish_enc_blk_4way) +SYM_FUNC_END(__blowfish_enc_blk_4way) -ENTRY(blowfish_dec_blk_4way) +SYM_FUNC_START(blowfish_dec_blk_4way) /* input: * %rdi: ctx * %rsi: dst @@ -365,4 +365,4 @@ ENTRY(blowfish_dec_blk_4way) popq %r12; ret; -ENDPROC(blowfish_dec_blk_4way) +SYM_FUNC_END(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index a14af6eb09cb..d01ddd73de65 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -189,20 +189,20 @@ * larger and would only be 0.5% faster (on sandy-bridge). */ .align 8 -roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: +SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); ret; -ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) +SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 -roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: +SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); ret; -ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) +SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* * IN/OUT: @@ -722,7 +722,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text .align 8 -__camellia_enc_blk16: +SYM_FUNC_START_LOCAL(__camellia_enc_blk16) /* input: * %rdi: ctx, CTX * %rax: temporary storage, 256 bytes @@ -806,10 +806,10 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; -ENDPROC(__camellia_enc_blk16) +SYM_FUNC_END(__camellia_enc_blk16) .align 8 -__camellia_dec_blk16: +SYM_FUNC_START_LOCAL(__camellia_dec_blk16) /* input: * %rdi: ctx, CTX * %rax: temporary storage, 256 bytes @@ -891,9 +891,9 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; -ENDPROC(__camellia_dec_blk16) +SYM_FUNC_END(__camellia_dec_blk16) -ENTRY(camellia_ecb_enc_16way) +SYM_FUNC_START(camellia_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -916,9 +916,9 @@ ENTRY(camellia_ecb_enc_16way) FRAME_END ret; -ENDPROC(camellia_ecb_enc_16way) +SYM_FUNC_END(camellia_ecb_enc_16way) -ENTRY(camellia_ecb_dec_16way) +SYM_FUNC_START(camellia_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -946,9 +946,9 @@ ENTRY(camellia_ecb_dec_16way) FRAME_END ret; -ENDPROC(camellia_ecb_dec_16way) +SYM_FUNC_END(camellia_ecb_dec_16way) -ENTRY(camellia_cbc_dec_16way) +SYM_FUNC_START(camellia_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -997,7 +997,7 @@ ENTRY(camellia_cbc_dec_16way) FRAME_END ret; -ENDPROC(camellia_cbc_dec_16way) +SYM_FUNC_END(camellia_cbc_dec_16way) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -1005,7 +1005,7 @@ ENDPROC(camellia_cbc_dec_16way) vpslldq $8, tmp, tmp; \ vpsubq tmp, x, x; -ENTRY(camellia_ctr_16way) +SYM_FUNC_START(camellia_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1110,7 +1110,7 @@ ENTRY(camellia_ctr_16way) FRAME_END ret; -ENDPROC(camellia_ctr_16way) +SYM_FUNC_END(camellia_ctr_16way) #define gf128mul_x_ble(iv, mask, tmp) \ vpsrad $31, iv, tmp; \ @@ -1120,7 +1120,7 @@ ENDPROC(camellia_ctr_16way) vpxor tmp, iv, iv; .align 8 -camellia_xts_crypt_16way: +SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1254,9 +1254,9 @@ camellia_xts_crypt_16way: FRAME_END ret; -ENDPROC(camellia_xts_crypt_16way) +SYM_FUNC_END(camellia_xts_crypt_16way) -ENTRY(camellia_xts_enc_16way) +SYM_FUNC_START(camellia_xts_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1268,9 +1268,9 @@ ENTRY(camellia_xts_enc_16way) leaq __camellia_enc_blk16, %r9; jmp camellia_xts_crypt_16way; -ENDPROC(camellia_xts_enc_16way) +SYM_FUNC_END(camellia_xts_enc_16way) -ENTRY(camellia_xts_dec_16way) +SYM_FUNC_START(camellia_xts_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1286,4 +1286,4 @@ ENTRY(camellia_xts_dec_16way) leaq __camellia_dec_blk16, %r9; jmp camellia_xts_crypt_16way; -ENDPROC(camellia_xts_dec_16way) +SYM_FUNC_END(camellia_xts_dec_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 4be4c7c3ba27..563ef6e83cdd 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -223,20 +223,20 @@ * larger and would only marginally faster. */ .align 8 -roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: +SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, %rcx, (%r9)); ret; -ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) +SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 -roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: +SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, %rax, (%r9)); ret; -ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) +SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* * IN/OUT: @@ -760,7 +760,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text .align 8 -__camellia_enc_blk32: +SYM_FUNC_START_LOCAL(__camellia_enc_blk32) /* input: * %rdi: ctx, CTX * %rax: temporary storage, 512 bytes @@ -844,10 +844,10 @@ __camellia_enc_blk32: %ymm15, %rax, %rcx, 24); jmp .Lenc_done; -ENDPROC(__camellia_enc_blk32) +SYM_FUNC_END(__camellia_enc_blk32) .align 8 -__camellia_dec_blk32: +SYM_FUNC_START_LOCAL(__camellia_dec_blk32) /* input: * %rdi: ctx, CTX * %rax: temporary storage, 512 bytes @@ -929,9 +929,9 @@ __camellia_dec_blk32: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; -ENDPROC(__camellia_dec_blk32) +SYM_FUNC_END(__camellia_dec_blk32) -ENTRY(camellia_ecb_enc_32way) +SYM_FUNC_START(camellia_ecb_enc_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -958,9 +958,9 @@ ENTRY(camellia_ecb_enc_32way) FRAME_END ret; -ENDPROC(camellia_ecb_enc_32way) +SYM_FUNC_END(camellia_ecb_enc_32way) -ENTRY(camellia_ecb_dec_32way) +SYM_FUNC_START(camellia_ecb_dec_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -992,9 +992,9 @@ ENTRY(camellia_ecb_dec_32way) FRAME_END ret; -ENDPROC(camellia_ecb_dec_32way) +SYM_FUNC_END(camellia_ecb_dec_32way) -ENTRY(camellia_cbc_dec_32way) +SYM_FUNC_START(camellia_cbc_dec_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -1060,7 +1060,7 @@ ENTRY(camellia_cbc_dec_32way) FRAME_END ret; -ENDPROC(camellia_cbc_dec_32way) +SYM_FUNC_END(camellia_cbc_dec_32way) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -1076,7 +1076,7 @@ ENDPROC(camellia_cbc_dec_32way) vpslldq $8, tmp1, tmp1; \ vpsubq tmp1, x, x; -ENTRY(camellia_ctr_32way) +SYM_FUNC_START(camellia_ctr_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -1200,7 +1200,7 @@ ENTRY(camellia_ctr_32way) FRAME_END ret; -ENDPROC(camellia_ctr_32way) +SYM_FUNC_END(camellia_ctr_32way) #define gf128mul_x_ble(iv, mask, tmp) \ vpsrad $31, iv, tmp; \ @@ -1222,7 +1222,7 @@ ENDPROC(camellia_ctr_32way) vpxor tmp1, iv, iv; .align 8 -camellia_xts_crypt_32way: +SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -1367,9 +1367,9 @@ camellia_xts_crypt_32way: FRAME_END ret; -ENDPROC(camellia_xts_crypt_32way) +SYM_FUNC_END(camellia_xts_crypt_32way) -ENTRY(camellia_xts_enc_32way) +SYM_FUNC_START(camellia_xts_enc_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -1382,9 +1382,9 @@ ENTRY(camellia_xts_enc_32way) leaq __camellia_enc_blk32, %r9; jmp camellia_xts_crypt_32way; -ENDPROC(camellia_xts_enc_32way) +SYM_FUNC_END(camellia_xts_enc_32way) -ENTRY(camellia_xts_dec_32way) +SYM_FUNC_START(camellia_xts_dec_32way) /* input: * %rdi: ctx, CTX * %rsi: dst (32 blocks) @@ -1400,4 +1400,4 @@ ENTRY(camellia_xts_dec_32way) leaq __camellia_dec_blk32, %r9; jmp camellia_xts_crypt_32way; -ENDPROC(camellia_xts_dec_32way) +SYM_FUNC_END(camellia_xts_dec_32way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 23528bc18fc6..1372e6408850 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -175,7 +175,7 @@ bswapq RAB0; \ movq RAB0, 4*2(RIO); -ENTRY(__camellia_enc_blk) +SYM_FUNC_START(__camellia_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -220,9 +220,9 @@ ENTRY(__camellia_enc_blk) movq RR12, %r12; ret; -ENDPROC(__camellia_enc_blk) +SYM_FUNC_END(__camellia_enc_blk) -ENTRY(camellia_dec_blk) +SYM_FUNC_START(camellia_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -258,7 +258,7 @@ ENTRY(camellia_dec_blk) movq RR12, %r12; ret; -ENDPROC(camellia_dec_blk) +SYM_FUNC_END(camellia_dec_blk) /********************************************************************** 2-way camellia @@ -409,7 +409,7 @@ ENDPROC(camellia_dec_blk) bswapq RAB1; \ movq RAB1, 12*2(RIO); -ENTRY(__camellia_enc_blk_2way) +SYM_FUNC_START(__camellia_enc_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -456,9 +456,9 @@ ENTRY(__camellia_enc_blk_2way) movq RR12, %r12; popq %rbx; ret; -ENDPROC(__camellia_enc_blk_2way) +SYM_FUNC_END(__camellia_enc_blk_2way) -ENTRY(camellia_dec_blk_2way) +SYM_FUNC_START(camellia_dec_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -496,4 +496,4 @@ ENTRY(camellia_dec_blk_2way) movq RR12, %r12; movq RXOR, %rbx; ret; -ENDPROC(camellia_dec_blk_2way) +SYM_FUNC_END(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index dc55c3332fcc..8a6181b08b59 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -209,7 +209,7 @@ .text .align 16 -__cast5_enc_blk16: +SYM_FUNC_START_LOCAL(__cast5_enc_blk16) /* input: * %rdi: ctx * RL1: blocks 1 and 2 @@ -280,10 +280,10 @@ __cast5_enc_blk16: outunpack_blocks(RR4, RL4, RTMP, RX, RKM); ret; -ENDPROC(__cast5_enc_blk16) +SYM_FUNC_END(__cast5_enc_blk16) .align 16 -__cast5_dec_blk16: +SYM_FUNC_START_LOCAL(__cast5_dec_blk16) /* input: * %rdi: ctx * RL1: encrypted blocks 1 and 2 @@ -357,9 +357,9 @@ __cast5_dec_blk16: .L__skip_dec: vpsrldq $4, RKR, RKR; jmp .L__dec_tail; -ENDPROC(__cast5_dec_blk16) +SYM_FUNC_END(__cast5_dec_blk16) -ENTRY(cast5_ecb_enc_16way) +SYM_FUNC_START(cast5_ecb_enc_16way) /* input: * %rdi: ctx * %rsi: dst @@ -394,9 +394,9 @@ ENTRY(cast5_ecb_enc_16way) popq %r15; FRAME_END ret; -ENDPROC(cast5_ecb_enc_16way) +SYM_FUNC_END(cast5_ecb_enc_16way) -ENTRY(cast5_ecb_dec_16way) +SYM_FUNC_START(cast5_ecb_dec_16way) /* input: * %rdi: ctx * %rsi: dst @@ -432,9 +432,9 @@ ENTRY(cast5_ecb_dec_16way) popq %r15; FRAME_END ret; -ENDPROC(cast5_ecb_dec_16way) +SYM_FUNC_END(cast5_ecb_dec_16way) -ENTRY(cast5_cbc_dec_16way) +SYM_FUNC_START(cast5_cbc_dec_16way) /* input: * %rdi: ctx * %rsi: dst @@ -484,9 +484,9 @@ ENTRY(cast5_cbc_dec_16way) popq %r12; FRAME_END ret; -ENDPROC(cast5_cbc_dec_16way) +SYM_FUNC_END(cast5_cbc_dec_16way) -ENTRY(cast5_ctr_16way) +SYM_FUNC_START(cast5_ctr_16way) /* input: * %rdi: ctx * %rsi: dst @@ -560,4 +560,4 @@ ENTRY(cast5_ctr_16way) popq %r12; FRAME_END ret; -ENDPROC(cast5_ctr_16way) +SYM_FUNC_END(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 4f0a7cdb94d9..932a3ce32a88 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -247,7 +247,7 @@ .text .align 8 -__cast6_enc_blk8: +SYM_FUNC_START_LOCAL(__cast6_enc_blk8) /* input: * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks @@ -292,10 +292,10 @@ __cast6_enc_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; -ENDPROC(__cast6_enc_blk8) +SYM_FUNC_END(__cast6_enc_blk8) .align 8 -__cast6_dec_blk8: +SYM_FUNC_START_LOCAL(__cast6_dec_blk8) /* input: * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks @@ -339,9 +339,9 @@ __cast6_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; -ENDPROC(__cast6_dec_blk8) +SYM_FUNC_END(__cast6_dec_blk8) -ENTRY(cast6_ecb_enc_8way) +SYM_FUNC_START(cast6_ecb_enc_8way) /* input: * %rdi: ctx * %rsi: dst @@ -362,9 +362,9 @@ ENTRY(cast6_ecb_enc_8way) popq %r15; FRAME_END ret; -ENDPROC(cast6_ecb_enc_8way) +SYM_FUNC_END(cast6_ecb_enc_8way) -ENTRY(cast6_ecb_dec_8way) +SYM_FUNC_START(cast6_ecb_dec_8way) /* input: * %rdi: ctx * %rsi: dst @@ -385,9 +385,9 @@ ENTRY(cast6_ecb_dec_8way) popq %r15; FRAME_END ret; -ENDPROC(cast6_ecb_dec_8way) +SYM_FUNC_END(cast6_ecb_dec_8way) -ENTRY(cast6_cbc_dec_8way) +SYM_FUNC_START(cast6_cbc_dec_8way) /* input: * %rdi: ctx * %rsi: dst @@ -411,9 +411,9 @@ ENTRY(cast6_cbc_dec_8way) popq %r12; FRAME_END ret; -ENDPROC(cast6_cbc_dec_8way) +SYM_FUNC_END(cast6_cbc_dec_8way) -ENTRY(cast6_ctr_8way) +SYM_FUNC_START(cast6_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -439,9 +439,9 @@ ENTRY(cast6_ctr_8way) popq %r12; FRAME_END ret; -ENDPROC(cast6_ctr_8way) +SYM_FUNC_END(cast6_ctr_8way) -ENTRY(cast6_xts_enc_8way) +SYM_FUNC_START(cast6_xts_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -466,9 +466,9 @@ ENTRY(cast6_xts_enc_8way) popq %r15; FRAME_END ret; -ENDPROC(cast6_xts_enc_8way) +SYM_FUNC_END(cast6_xts_enc_8way) -ENTRY(cast6_xts_dec_8way) +SYM_FUNC_START(cast6_xts_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -493,4 +493,4 @@ ENTRY(cast6_xts_dec_8way) popq %r15; FRAME_END ret; -ENDPROC(cast6_xts_dec_8way) +SYM_FUNC_END(cast6_xts_dec_8way) diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S index 831e4434fc20..ee9a40ab4109 100644 --- a/arch/x86/crypto/chacha-avx2-x86_64.S +++ b/arch/x86/crypto/chacha-avx2-x86_64.S @@ -34,7 +34,7 @@ CTR4BL: .octa 0x00000000000000000000000000000002 .text -ENTRY(chacha_2block_xor_avx2) +SYM_FUNC_START(chacha_2block_xor_avx2) # %rdi: Input state matrix, s # %rsi: up to 2 data blocks output, o # %rdx: up to 2 data blocks input, i @@ -224,9 +224,9 @@ ENTRY(chacha_2block_xor_avx2) lea -8(%r10),%rsp jmp .Ldone2 -ENDPROC(chacha_2block_xor_avx2) +SYM_FUNC_END(chacha_2block_xor_avx2) -ENTRY(chacha_4block_xor_avx2) +SYM_FUNC_START(chacha_4block_xor_avx2) # %rdi: Input state matrix, s # %rsi: up to 4 data blocks output, o # %rdx: up to 4 data blocks input, i @@ -529,9 +529,9 @@ ENTRY(chacha_4block_xor_avx2) lea -8(%r10),%rsp jmp .Ldone4 -ENDPROC(chacha_4block_xor_avx2) +SYM_FUNC_END(chacha_4block_xor_avx2) -ENTRY(chacha_8block_xor_avx2) +SYM_FUNC_START(chacha_8block_xor_avx2) # %rdi: Input state matrix, s # %rsi: up to 8 data blocks output, o # %rdx: up to 8 data blocks input, i @@ -1018,4 +1018,4 @@ ENTRY(chacha_8block_xor_avx2) jmp .Ldone8 -ENDPROC(chacha_8block_xor_avx2) +SYM_FUNC_END(chacha_8block_xor_avx2) diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 848f9c75fd4f..bb193fde123a 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -24,7 +24,7 @@ CTR8BL: .octa 0x00000003000000020000000100000000 .text -ENTRY(chacha_2block_xor_avx512vl) +SYM_FUNC_START(chacha_2block_xor_avx512vl) # %rdi: Input state matrix, s # %rsi: up to 2 data blocks output, o # %rdx: up to 2 data blocks input, i @@ -187,9 +187,9 @@ ENTRY(chacha_2block_xor_avx512vl) jmp .Ldone2 -ENDPROC(chacha_2block_xor_avx512vl) +SYM_FUNC_END(chacha_2block_xor_avx512vl) -ENTRY(chacha_4block_xor_avx512vl) +SYM_FUNC_START(chacha_4block_xor_avx512vl) # %rdi: Input state matrix, s # %rsi: up to 4 data blocks output, o # %rdx: up to 4 data blocks input, i @@ -453,9 +453,9 @@ ENTRY(chacha_4block_xor_avx512vl) jmp .Ldone4 -ENDPROC(chacha_4block_xor_avx512vl) +SYM_FUNC_END(chacha_4block_xor_avx512vl) -ENTRY(chacha_8block_xor_avx512vl) +SYM_FUNC_START(chacha_8block_xor_avx512vl) # %rdi: Input state matrix, s # %rsi: up to 8 data blocks output, o # %rdx: up to 8 data blocks input, i @@ -833,4 +833,4 @@ ENTRY(chacha_8block_xor_avx512vl) jmp .Ldone8 -ENDPROC(chacha_8block_xor_avx512vl) +SYM_FUNC_END(chacha_8block_xor_avx512vl) diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S index 2d86c7d6dc88..a38ab2512a6f 100644 --- a/arch/x86/crypto/chacha-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha-ssse3-x86_64.S @@ -33,7 +33,7 @@ CTRINC: .octa 0x00000003000000020000000100000000 * * Clobbers: %r8d, %xmm4-%xmm7 */ -chacha_permute: +SYM_FUNC_START_LOCAL(chacha_permute) movdqa ROT8(%rip),%xmm4 movdqa ROT16(%rip),%xmm5 @@ -109,9 +109,9 @@ chacha_permute: jnz .Ldoubleround ret -ENDPROC(chacha_permute) +SYM_FUNC_END(chacha_permute) -ENTRY(chacha_block_xor_ssse3) +SYM_FUNC_START(chacha_block_xor_ssse3) # %rdi: Input state matrix, s # %rsi: up to 1 data block output, o # %rdx: up to 1 data block input, i @@ -197,9 +197,9 @@ ENTRY(chacha_block_xor_ssse3) lea -8(%r10),%rsp jmp .Ldone -ENDPROC(chacha_block_xor_ssse3) +SYM_FUNC_END(chacha_block_xor_ssse3) -ENTRY(hchacha_block_ssse3) +SYM_FUNC_START(hchacha_block_ssse3) # %rdi: Input state matrix, s # %rsi: output (8 32-bit words) # %edx: nrounds @@ -218,9 +218,9 @@ ENTRY(hchacha_block_ssse3) FRAME_END ret -ENDPROC(hchacha_block_ssse3) +SYM_FUNC_END(hchacha_block_ssse3) -ENTRY(chacha_4block_xor_ssse3) +SYM_FUNC_START(chacha_4block_xor_ssse3) # %rdi: Input state matrix, s # %rsi: up to 4 data blocks output, o # %rdx: up to 4 data blocks input, i @@ -788,4 +788,4 @@ ENTRY(chacha_4block_xor_ssse3) jmp .Ldone4 -ENDPROC(chacha_4block_xor_ssse3) +SYM_FUNC_END(chacha_4block_xor_ssse3) diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 1c099dc08cc3..9fd28ff65bc2 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -103,7 +103,7 @@ * size_t len, uint crc32) */ -ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ +SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ movdqa (BUF), %xmm1 movdqa 0x10(BUF), %xmm2 movdqa 0x20(BUF), %xmm3 @@ -238,4 +238,4 @@ fold_64: PEXTRD 0x01, %xmm1, %eax ret -ENDPROC(crc32_pclmul_le_16) +SYM_FUNC_END(crc32_pclmul_le_16) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index d9b734d0c8cc..0e6690e3618c 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -74,7 +74,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); .text -ENTRY(crc_pcl) +SYM_FUNC_START(crc_pcl) #define bufp %rdi #define bufp_dw %edi #define bufp_w %di @@ -311,7 +311,7 @@ do_return: popq %rdi popq %rbx ret -ENDPROC(crc_pcl) +SYM_FUNC_END(crc_pcl) .section .rodata, "a", @progbits ################################################################ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 3d873e67749d..b2533d63030e 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -95,7 +95,7 @@ # Assumes len >= 16. # .align 16 -ENTRY(crc_t10dif_pcl) +SYM_FUNC_START(crc_t10dif_pcl) movdqa .Lbswap_mask(%rip), BSWAP_MASK @@ -280,7 +280,7 @@ ENTRY(crc_t10dif_pcl) jge .Lfold_16_bytes_loop # 32 <= len <= 255 add $16, len jmp .Lhandle_partial_segment # 17 <= len <= 31 -ENDPROC(crc_t10dif_pcl) +SYM_FUNC_END(crc_t10dif_pcl) .section .rodata, "a", @progbits .align 16 diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 7fca43099a5f..fac0fdc3f25d 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -162,7 +162,7 @@ movl left##d, (io); \ movl right##d, 4(io); -ENTRY(des3_ede_x86_64_crypt_blk) +SYM_FUNC_START(des3_ede_x86_64_crypt_blk) /* input: * %rdi: round keys, CTX * %rsi: dst @@ -244,7 +244,7 @@ ENTRY(des3_ede_x86_64_crypt_blk) popq %rbx; ret; -ENDPROC(des3_ede_x86_64_crypt_blk) +SYM_FUNC_END(des3_ede_x86_64_crypt_blk) /*********************************************************************** * 3-way 3DES @@ -418,7 +418,7 @@ ENDPROC(des3_ede_x86_64_crypt_blk) #define __movq(src, dst) \ movq src, dst; -ENTRY(des3_ede_x86_64_crypt_blk_3way) +SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way) /* input: * %rdi: ctx, round keys * %rsi: dst (3 blocks) @@ -529,7 +529,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) popq %rbx; ret; -ENDPROC(des3_ede_x86_64_crypt_blk_3way) +SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) .section .rodata, "a", @progbits .align 16 diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 5d53effe8abe..bb9735fbb865 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -44,7 +44,7 @@ * T2 * T3 */ -__clmul_gf128mul_ble: +SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) movaps DATA, T1 pshufd $0b01001110, DATA, T2 pshufd $0b01001110, SHASH, T3 @@ -87,10 +87,10 @@ __clmul_gf128mul_ble: pxor T2, T1 pxor T1, DATA ret -ENDPROC(__clmul_gf128mul_ble) +SYM_FUNC_END(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const u128 *shash) */ -ENTRY(clmul_ghash_mul) +SYM_FUNC_START(clmul_ghash_mul) FRAME_BEGIN movups (%rdi), DATA movups (%rsi), SHASH @@ -101,13 +101,13 @@ ENTRY(clmul_ghash_mul) movups DATA, (%rdi) FRAME_END ret -ENDPROC(clmul_ghash_mul) +SYM_FUNC_END(clmul_ghash_mul) /* * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, * const u128 *shash); */ -ENTRY(clmul_ghash_update) +SYM_FUNC_START(clmul_ghash_update) FRAME_BEGIN cmp $16, %rdx jb .Lupdate_just_ret # check length @@ -130,4 +130,4 @@ ENTRY(clmul_ghash_update) .Lupdate_just_ret: FRAME_END ret -ENDPROC(clmul_ghash_update) +SYM_FUNC_END(clmul_ghash_update) diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index f7946ea1b704..b22c7b936272 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -69,7 +69,7 @@ * * It's guaranteed that message_len % 16 == 0. */ -ENTRY(nh_avx2) +SYM_FUNC_START(nh_avx2) vmovdqu 0x00(KEY), K0 vmovdqu 0x10(KEY), K1 @@ -154,4 +154,4 @@ ENTRY(nh_avx2) vpaddq T4, T0, T0 vmovdqu T0, (HASH) ret -ENDPROC(nh_avx2) +SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S index 51f52d4ab4bb..d7ae22dd6683 100644 --- a/arch/x86/crypto/nh-sse2-x86_64.S +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -71,7 +71,7 @@ * * It's guaranteed that message_len % 16 == 0. */ -ENTRY(nh_sse2) +SYM_FUNC_START(nh_sse2) movdqu 0x00(KEY), K0 movdqu 0x10(KEY), K1 @@ -120,4 +120,4 @@ ENTRY(nh_sse2) movdqu T0, 0x00(HASH) movdqu T1, 0x10(HASH) ret -ENDPROC(nh_sse2) +SYM_FUNC_END(nh_sse2) diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index 8b341bc29d41..d6063feda9da 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -79,7 +79,7 @@ ORMASK: .octa 0x00000000010000000000000001000000 #define d3 %r12 #define d4 %r13 -ENTRY(poly1305_4block_avx2) +SYM_FUNC_START(poly1305_4block_avx2) # %rdi: Accumulator h[5] # %rsi: 64 byte input block m # %rdx: Poly1305 key r[5] @@ -387,4 +387,4 @@ ENTRY(poly1305_4block_avx2) pop %r12 pop %rbx ret -ENDPROC(poly1305_4block_avx2) +SYM_FUNC_END(poly1305_4block_avx2) diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 5578f846e622..d8ea29b96640 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -46,7 +46,7 @@ ORMASK: .octa 0x00000000010000000000000001000000 #define d3 %r11 #define d4 %r12 -ENTRY(poly1305_block_sse2) +SYM_FUNC_START(poly1305_block_sse2) # %rdi: Accumulator h[5] # %rsi: 16 byte input block m # %rdx: Poly1305 key r[5] @@ -276,7 +276,7 @@ ENTRY(poly1305_block_sse2) pop %r12 pop %rbx ret -ENDPROC(poly1305_block_sse2) +SYM_FUNC_END(poly1305_block_sse2) #define u0 0x00(%r8) @@ -301,7 +301,7 @@ ENDPROC(poly1305_block_sse2) #undef d0 #define d0 %r13 -ENTRY(poly1305_2block_sse2) +SYM_FUNC_START(poly1305_2block_sse2) # %rdi: Accumulator h[5] # %rsi: 16 byte input block m # %rdx: Poly1305 key r[5] @@ -587,4 +587,4 @@ ENTRY(poly1305_2block_sse2) pop %r12 pop %rbx ret -ENDPROC(poly1305_2block_sse2) +SYM_FUNC_END(poly1305_2block_sse2) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index ddc51dbba3af..ba9e4c1e7f5c 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -555,7 +555,7 @@ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -__serpent_enc_blk8_avx: +SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) /* input: * %rdi: ctx, CTX * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks @@ -606,10 +606,10 @@ __serpent_enc_blk8_avx: write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -ENDPROC(__serpent_enc_blk8_avx) +SYM_FUNC_END(__serpent_enc_blk8_avx) .align 8 -__serpent_dec_blk8_avx: +SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) /* input: * %rdi: ctx, CTX * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks @@ -660,9 +660,9 @@ __serpent_dec_blk8_avx: write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; -ENDPROC(__serpent_dec_blk8_avx) +SYM_FUNC_END(__serpent_dec_blk8_avx) -ENTRY(serpent_ecb_enc_8way_avx) +SYM_FUNC_START(serpent_ecb_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -678,9 +678,9 @@ ENTRY(serpent_ecb_enc_8way_avx) FRAME_END ret; -ENDPROC(serpent_ecb_enc_8way_avx) +SYM_FUNC_END(serpent_ecb_enc_8way_avx) -ENTRY(serpent_ecb_dec_8way_avx) +SYM_FUNC_START(serpent_ecb_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -696,9 +696,9 @@ ENTRY(serpent_ecb_dec_8way_avx) FRAME_END ret; -ENDPROC(serpent_ecb_dec_8way_avx) +SYM_FUNC_END(serpent_ecb_dec_8way_avx) -ENTRY(serpent_cbc_dec_8way_avx) +SYM_FUNC_START(serpent_cbc_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -714,9 +714,9 @@ ENTRY(serpent_cbc_dec_8way_avx) FRAME_END ret; -ENDPROC(serpent_cbc_dec_8way_avx) +SYM_FUNC_END(serpent_cbc_dec_8way_avx) -ENTRY(serpent_ctr_8way_avx) +SYM_FUNC_START(serpent_ctr_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -734,9 +734,9 @@ ENTRY(serpent_ctr_8way_avx) FRAME_END ret; -ENDPROC(serpent_ctr_8way_avx) +SYM_FUNC_END(serpent_ctr_8way_avx) -ENTRY(serpent_xts_enc_8way_avx) +SYM_FUNC_START(serpent_xts_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -756,9 +756,9 @@ ENTRY(serpent_xts_enc_8way_avx) FRAME_END ret; -ENDPROC(serpent_xts_enc_8way_avx) +SYM_FUNC_END(serpent_xts_enc_8way_avx) -ENTRY(serpent_xts_dec_8way_avx) +SYM_FUNC_START(serpent_xts_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -778,4 +778,4 @@ ENTRY(serpent_xts_dec_8way_avx) FRAME_END ret; -ENDPROC(serpent_xts_dec_8way_avx) +SYM_FUNC_END(serpent_xts_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 37bc1d48106c..c9648aeae705 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -561,7 +561,7 @@ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -__serpent_enc_blk16: +SYM_FUNC_START_LOCAL(__serpent_enc_blk16) /* input: * %rdi: ctx, CTX * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext @@ -612,10 +612,10 @@ __serpent_enc_blk16: write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -ENDPROC(__serpent_enc_blk16) +SYM_FUNC_END(__serpent_enc_blk16) .align 8 -__serpent_dec_blk16: +SYM_FUNC_START_LOCAL(__serpent_dec_blk16) /* input: * %rdi: ctx, CTX * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext @@ -666,9 +666,9 @@ __serpent_dec_blk16: write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; -ENDPROC(__serpent_dec_blk16) +SYM_FUNC_END(__serpent_dec_blk16) -ENTRY(serpent_ecb_enc_16way) +SYM_FUNC_START(serpent_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -688,9 +688,9 @@ ENTRY(serpent_ecb_enc_16way) FRAME_END ret; -ENDPROC(serpent_ecb_enc_16way) +SYM_FUNC_END(serpent_ecb_enc_16way) -ENTRY(serpent_ecb_dec_16way) +SYM_FUNC_START(serpent_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -710,9 +710,9 @@ ENTRY(serpent_ecb_dec_16way) FRAME_END ret; -ENDPROC(serpent_ecb_dec_16way) +SYM_FUNC_END(serpent_ecb_dec_16way) -ENTRY(serpent_cbc_dec_16way) +SYM_FUNC_START(serpent_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -733,9 +733,9 @@ ENTRY(serpent_cbc_dec_16way) FRAME_END ret; -ENDPROC(serpent_cbc_dec_16way) +SYM_FUNC_END(serpent_cbc_dec_16way) -ENTRY(serpent_ctr_16way) +SYM_FUNC_START(serpent_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -758,9 +758,9 @@ ENTRY(serpent_ctr_16way) FRAME_END ret; -ENDPROC(serpent_ctr_16way) +SYM_FUNC_END(serpent_ctr_16way) -ENTRY(serpent_xts_enc_16way) +SYM_FUNC_START(serpent_xts_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -784,9 +784,9 @@ ENTRY(serpent_xts_enc_16way) FRAME_END ret; -ENDPROC(serpent_xts_enc_16way) +SYM_FUNC_END(serpent_xts_enc_16way) -ENTRY(serpent_xts_dec_16way) +SYM_FUNC_START(serpent_xts_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -810,4 +810,4 @@ ENTRY(serpent_xts_dec_16way) FRAME_END ret; -ENDPROC(serpent_xts_dec_16way) +SYM_FUNC_END(serpent_xts_dec_16way) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index e5c4a4690ca9..6379b99cb722 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -497,7 +497,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -ENTRY(__serpent_enc_blk_4way) +SYM_FUNC_START(__serpent_enc_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -559,9 +559,9 @@ ENTRY(__serpent_enc_blk_4way) xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; -ENDPROC(__serpent_enc_blk_4way) +SYM_FUNC_END(__serpent_enc_blk_4way) -ENTRY(serpent_dec_blk_4way) +SYM_FUNC_START(serpent_dec_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -613,4 +613,4 @@ ENTRY(serpent_dec_blk_4way) write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); ret; -ENDPROC(serpent_dec_blk_4way) +SYM_FUNC_END(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 5e0b3a3e97af..efb6dc17dc90 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -619,7 +619,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -ENTRY(__serpent_enc_blk_8way) +SYM_FUNC_START(__serpent_enc_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -682,9 +682,9 @@ ENTRY(__serpent_enc_blk_8way) xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -ENDPROC(__serpent_enc_blk_8way) +SYM_FUNC_END(__serpent_enc_blk_8way) -ENTRY(serpent_dec_blk_8way) +SYM_FUNC_START(serpent_dec_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -736,4 +736,4 @@ ENTRY(serpent_dec_blk_8way) write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; -ENDPROC(serpent_dec_blk_8way) +SYM_FUNC_END(serpent_dec_blk_8way) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 9f712a7dfd79..6decc85ef7b7 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -634,7 +634,7 @@ _loop3: * param: function's name */ .macro SHA1_VECTOR_ASM name - ENTRY(\name) + SYM_FUNC_START(\name) push %rbx push %r12 @@ -676,7 +676,7 @@ _loop3: ret - ENDPROC(\name) + SYM_FUNC_END(\name) .endm .section .rodata diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index ebbdba72ae07..11efe3a45a1f 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -95,7 +95,7 @@ */ .text .align 32 -ENTRY(sha1_ni_transform) +SYM_FUNC_START(sha1_ni_transform) mov %rsp, RSPSAVE sub $FRAME_SIZE, %rsp and $~0xF, %rsp @@ -291,7 +291,7 @@ ENTRY(sha1_ni_transform) mov RSPSAVE, %rsp ret -ENDPROC(sha1_ni_transform) +SYM_FUNC_END(sha1_ni_transform) .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 .align 16 diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 99c5b8c4dc38..5d03c1173690 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -67,7 +67,7 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - ENTRY(\name) + SYM_FUNC_START(\name) push %rbx push %r12 @@ -101,7 +101,7 @@ pop %rbx ret - ENDPROC(\name) + SYM_FUNC_END(\name) .endm /* diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 001bbcf93c79..22e14c8dd2e4 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -347,7 +347,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -ENTRY(sha256_transform_avx) +SYM_FUNC_START(sha256_transform_avx) .align 32 pushq %rbx pushq %r12 @@ -460,7 +460,7 @@ done_hash: popq %r12 popq %rbx ret -ENDPROC(sha256_transform_avx) +SYM_FUNC_END(sha256_transform_avx) .section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 1420db15dcdd..519b551ad576 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -526,7 +526,7 @@ STACK_SIZE = _RSP + _RSP_SIZE ## arg 3 : Num blocks ######################################################################## .text -ENTRY(sha256_transform_rorx) +SYM_FUNC_START(sha256_transform_rorx) .align 32 pushq %rbx pushq %r12 @@ -713,7 +713,7 @@ done_hash: popq %r12 popq %rbx ret -ENDPROC(sha256_transform_rorx) +SYM_FUNC_END(sha256_transform_rorx) .section .rodata.cst512.K256, "aM", @progbits, 512 .align 64 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c6c05ed2c16a..69cc2f91dc4c 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -353,7 +353,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -ENTRY(sha256_transform_ssse3) +SYM_FUNC_START(sha256_transform_ssse3) .align 32 pushq %rbx pushq %r12 @@ -471,7 +471,7 @@ done_hash: popq %rbx ret -ENDPROC(sha256_transform_ssse3) +SYM_FUNC_END(sha256_transform_ssse3) .section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index fb58f58ecfbc..7abade04a3a3 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -97,7 +97,7 @@ .text .align 32 -ENTRY(sha256_ni_transform) +SYM_FUNC_START(sha256_ni_transform) shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash @@ -327,7 +327,7 @@ ENTRY(sha256_ni_transform) .Ldone_hash: ret -ENDPROC(sha256_ni_transform) +SYM_FUNC_END(sha256_ni_transform) .section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 39235fefe6f7..3704ddd7e5d5 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -277,7 +277,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # message blocks. # L is the message length in SHA512 blocks ######################################################################## -ENTRY(sha512_transform_avx) +SYM_FUNC_START(sha512_transform_avx) cmp $0, msglen je nowork @@ -365,7 +365,7 @@ updateblock: nowork: ret -ENDPROC(sha512_transform_avx) +SYM_FUNC_END(sha512_transform_avx) ######################################################################## ### Binary Data diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index b16d56005162..80d830e7ee09 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -569,7 +569,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # message blocks. # L is the message length in SHA512 blocks ######################################################################## -ENTRY(sha512_transform_rorx) +SYM_FUNC_START(sha512_transform_rorx) # Allocate Stack Space mov %rsp, %rax sub $frame_size, %rsp @@ -682,7 +682,7 @@ done_hash: # Restore Stack Pointer mov frame_RSPSAVE(%rsp), %rsp ret -ENDPROC(sha512_transform_rorx) +SYM_FUNC_END(sha512_transform_rorx) ######################################################################## ### Binary Data diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 66bbd9058a90..838f984e95d9 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -275,7 +275,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # message blocks. # L is the message length in SHA512 blocks. ######################################################################## -ENTRY(sha512_transform_ssse3) +SYM_FUNC_START(sha512_transform_ssse3) cmp $0, msglen je nowork @@ -364,7 +364,7 @@ updateblock: nowork: ret -ENDPROC(sha512_transform_ssse3) +SYM_FUNC_END(sha512_transform_ssse3) ######################################################################## ### Binary Data diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 698b8f2a56e2..a5151393bb2f 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -234,7 +234,7 @@ vpxor x3, wkey, x3; .align 8 -__twofish_enc_blk8: +SYM_FUNC_START_LOCAL(__twofish_enc_blk8) /* input: * %rdi: ctx, CTX * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks @@ -273,10 +273,10 @@ __twofish_enc_blk8: outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; -ENDPROC(__twofish_enc_blk8) +SYM_FUNC_END(__twofish_enc_blk8) .align 8 -__twofish_dec_blk8: +SYM_FUNC_START_LOCAL(__twofish_dec_blk8) /* input: * %rdi: ctx, CTX * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks @@ -313,9 +313,9 @@ __twofish_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; -ENDPROC(__twofish_dec_blk8) +SYM_FUNC_END(__twofish_dec_blk8) -ENTRY(twofish_ecb_enc_8way) +SYM_FUNC_START(twofish_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -333,9 +333,9 @@ ENTRY(twofish_ecb_enc_8way) FRAME_END ret; -ENDPROC(twofish_ecb_enc_8way) +SYM_FUNC_END(twofish_ecb_enc_8way) -ENTRY(twofish_ecb_dec_8way) +SYM_FUNC_START(twofish_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -353,9 +353,9 @@ ENTRY(twofish_ecb_dec_8way) FRAME_END ret; -ENDPROC(twofish_ecb_dec_8way) +SYM_FUNC_END(twofish_ecb_dec_8way) -ENTRY(twofish_cbc_dec_8way) +SYM_FUNC_START(twofish_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -378,9 +378,9 @@ ENTRY(twofish_cbc_dec_8way) FRAME_END ret; -ENDPROC(twofish_cbc_dec_8way) +SYM_FUNC_END(twofish_cbc_dec_8way) -ENTRY(twofish_ctr_8way) +SYM_FUNC_START(twofish_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -405,9 +405,9 @@ ENTRY(twofish_ctr_8way) FRAME_END ret; -ENDPROC(twofish_ctr_8way) +SYM_FUNC_END(twofish_ctr_8way) -ENTRY(twofish_xts_enc_8way) +SYM_FUNC_START(twofish_xts_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -429,9 +429,9 @@ ENTRY(twofish_xts_enc_8way) FRAME_END ret; -ENDPROC(twofish_xts_enc_8way) +SYM_FUNC_END(twofish_xts_enc_8way) -ENTRY(twofish_xts_dec_8way) +SYM_FUNC_START(twofish_xts_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -453,4 +453,4 @@ ENTRY(twofish_xts_dec_8way) FRAME_END ret; -ENDPROC(twofish_xts_dec_8way) +SYM_FUNC_END(twofish_xts_dec_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 290cc4e9a6fe..a6f09e4f2e46 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -207,7 +207,7 @@ xor %esi, d ## D;\ ror $1, d ## D; -ENTRY(twofish_enc_blk) +SYM_FUNC_START(twofish_enc_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -261,9 +261,9 @@ ENTRY(twofish_enc_blk) pop %ebp mov $1, %eax ret -ENDPROC(twofish_enc_blk) +SYM_FUNC_END(twofish_enc_blk) -ENTRY(twofish_dec_blk) +SYM_FUNC_START(twofish_dec_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -318,4 +318,4 @@ ENTRY(twofish_dec_blk) pop %ebp mov $1, %eax ret -ENDPROC(twofish_dec_blk) +SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index e495e07c7f1b..fc23552afe37 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -220,7 +220,7 @@ rorq $32, RAB2; \ outunpack3(mov, RIO, 2, RAB, 2); -ENTRY(__twofish_enc_blk_3way) +SYM_FUNC_START(__twofish_enc_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -267,9 +267,9 @@ ENTRY(__twofish_enc_blk_3way) popq %r12; popq %r13; ret; -ENDPROC(__twofish_enc_blk_3way) +SYM_FUNC_END(__twofish_enc_blk_3way) -ENTRY(twofish_dec_blk_3way) +SYM_FUNC_START(twofish_dec_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -302,4 +302,4 @@ ENTRY(twofish_dec_blk_3way) popq %r12; popq %r13; ret; -ENDPROC(twofish_dec_blk_3way) +SYM_FUNC_END(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index ecef2cb9f43f..d2e56232494a 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -202,7 +202,7 @@ xor %r8d, d ## D;\ ror $1, d ## D; -ENTRY(twofish_enc_blk) +SYM_FUNC_START(twofish_enc_blk) pushq R1 /* %rdi contains the ctx address */ @@ -253,9 +253,9 @@ ENTRY(twofish_enc_blk) popq R1 movl $1,%eax ret -ENDPROC(twofish_enc_blk) +SYM_FUNC_END(twofish_enc_blk) -ENTRY(twofish_dec_blk) +SYM_FUNC_START(twofish_dec_blk) pushq R1 /* %rdi contains the ctx address */ @@ -305,4 +305,4 @@ ENTRY(twofish_dec_blk) popq R1 movl $1,%eax ret -ENDPROC(twofish_dec_blk) +SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 515c0ceeb4a3..0789e13ece90 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with .macro CALL_enter_from_user_mode #ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_JUMP_LABEL - STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 + STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0 #endif call enter_from_user_mode .Lafter_call_\@: diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 3f8e22615812..9747876980b5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -33,6 +33,7 @@ #include <asm/cpufeature.h> #include <asm/fpu/api.h> #include <asm/nospec-branch.h> +#include <asm/io_bitmap.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) /* Reload ti->flags; we may have rescheduled above. */ cached_flags = READ_ONCE(ti->flags); + if (unlikely(cached_flags & _TIF_IO_BITMAP)) + tss_update_io_bitmap(); + fpregs_assert_state_consistent(); if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f83ca5aa8b77..5832b11f01bb 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -172,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -205,64 +205,76 @@ #define CS_FROM_ENTRY_STACK (1 << 31) #define CS_FROM_USER_CR3 (1 << 30) #define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) .macro FIXUP_FRAME /* * The high bits of the CS dword (__csh) are used for CS_FROM_*. * Clear them in case hardware didn't do this for us. */ - andl $0x0000ffff, 3*4(%esp) + andl $0x0000ffff, 4*4(%esp) #ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, 4*4(%esp) + testl $X86_EFLAGS_VM, 5*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ #endif - testl $SEGMENT_RPL_MASK, 3*4(%esp) + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ - orl $CS_FROM_KERNEL, 3*4(%esp) + orl $CS_FROM_KERNEL, 4*4(%esp) /* * When we're here from kernel mode; the (exception) stack looks like: * - * 5*4(%esp) - <previous context> - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 6*4(%esp) - <previous context> + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs * * Lets build a 5 entry IRET frame after that, such that struct pt_regs * is complete and in particular regs->sp is correct. This gives us - * the original 5 enties as gap: + * the original 6 enties as gap: * - * 12*4(%esp) - <previous context> - * 11*4(%esp) - gap / flags - * 10*4(%esp) - gap / cs - * 9*4(%esp) - gap / ip - * 8*4(%esp) - gap / orig_eax - * 7*4(%esp) - gap / gs / function - * 6*4(%esp) - ss - * 5*4(%esp) - sp - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 14*4(%esp) - <previous context> + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs */ pushl %ss # ss pushl %esp # sp (points at ss) - addl $6*4, (%esp) # point sp back at the previous context - pushl 6*4(%esp) # flags - pushl 6*4(%esp) # cs - pushl 6*4(%esp) # ip - pushl 6*4(%esp) # orig_eax - pushl 6*4(%esp) # gs / function + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs .Lfrom_usermode_no_fixup_\@: .endm .macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ testl $CS_FROM_KERNEL, 1*4(%esp) jz .Lfinished_frame_\@ @@ -276,31 +288,40 @@ movl 5*4(%esp), %eax # (modified) regs->sp movl 4*4(%esp), %ecx # flags - movl %ecx, -4(%eax) + movl %ecx, %ss:-1*4(%eax) movl 3*4(%esp), %ecx # cs andl $0x0000ffff, %ecx - movl %ecx, -8(%eax) + movl %ecx, %ss:-2*4(%eax) movl 2*4(%esp), %ecx # ip - movl %ecx, -12(%eax) + movl %ecx, %ss:-3*4(%eax) movl 1*4(%esp), %ecx # eax - movl %ecx, -16(%eax) + movl %ecx, %ss:-4*4(%eax) popl %ecx - lea -16(%eax), %esp + lea -4*4(%eax), %esp popl %eax .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld .if \skip_gs == 0 PUSH_GS .endif - FIXUP_FRAME pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -313,8 +334,6 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs .if \skip_gs == 0 SET_KERNEL_GS %edx .endif @@ -324,8 +343,8 @@ .endif .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -357,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -395,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -709,7 +730,7 @@ * %eax: prev task * %edx: next task */ -ENTRY(__switch_to_asm) +SYM_CODE_START(__switch_to_asm) /* * Save callee-saved registers * This must match the order in struct inactive_task_frame @@ -718,6 +739,11 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ pushfl /* switch stack */ @@ -740,15 +766,16 @@ ENTRY(__switch_to_asm) FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif - /* restore callee-saved registers */ + /* Restore flags or the incoming task to restore AC state. */ popfl + /* restore callee-saved registers */ popl %esi popl %edi popl %ebx popl %ebp jmp __switch_to -END(__switch_to_asm) +SYM_CODE_END(__switch_to_asm) /* * The unwinder expects the last frame on the stack to always be at the same @@ -757,7 +784,7 @@ END(__switch_to_asm) * asmlinkage function so its argument has to be pushed on the stack. This * wrapper creates a proper "end of stack" frame header before the call. */ -ENTRY(schedule_tail_wrapper) +SYM_FUNC_START(schedule_tail_wrapper) FRAME_BEGIN pushl %eax @@ -766,7 +793,7 @@ ENTRY(schedule_tail_wrapper) FRAME_END ret -ENDPROC(schedule_tail_wrapper) +SYM_FUNC_END(schedule_tail_wrapper) /* * A newly forked process directly context switches into this address. * @@ -774,7 +801,7 @@ ENDPROC(schedule_tail_wrapper) * ebx: kernel thread func (NULL for user thread) * edi: kernel thread arg */ -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) call schedule_tail_wrapper testl %ebx, %ebx @@ -797,7 +824,7 @@ ENTRY(ret_from_fork) */ movl $0, PT_EAX(%esp) jmp 2b -END(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, @@ -807,8 +834,7 @@ END(ret_from_fork) */ # userspace resumption stub bypassing syscall exit tracing - ALIGN -ret_from_exception: +SYM_CODE_START_LOCAL(ret_from_exception) preempt_stop(CLBR_ANY) ret_from_intr: #ifdef CONFIG_VM86 @@ -825,15 +851,14 @@ ret_from_intr: cmpl $USER_RPL, %eax jb restore_all_kernel # not returning to v8086 or userspace -ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl %esp, %eax call prepare_exit_to_usermode jmp restore_all -END(ret_from_exception) +SYM_CODE_END(ret_from_exception) -GLOBAL(__begin_SYSENTER_singlestep_region) +SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) /* * All code from here through __end_SYSENTER_singlestep_region is subject * to being single-stepped if a user program sets TF and executes SYSENTER. @@ -848,9 +873,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. */ -ENTRY(xen_sysenter_target) +SYM_CODE_START(xen_sysenter_target) addl $5*4, %esp /* remove xen-provided frame */ jmp .Lsysenter_past_esp +SYM_CODE_END(xen_sysenter_target) #endif /* @@ -885,7 +911,7 @@ ENTRY(xen_sysenter_target) * ebp user stack * 0(%ebp) arg6 */ -ENTRY(entry_SYSENTER_32) +SYM_FUNC_START(entry_SYSENTER_32) /* * On entry-stack with all userspace-regs live - save and * restore eflags and %eax to use it as scratch-reg for the cr3 @@ -1012,8 +1038,8 @@ ENTRY(entry_SYSENTER_32) pushl $X86_EFLAGS_FIXED popfl jmp .Lsysenter_flags_fixed -GLOBAL(__end_SYSENTER_singlestep_region) -ENDPROC(entry_SYSENTER_32) +SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) +SYM_FUNC_END(entry_SYSENTER_32) /* * 32-bit legacy system call entry. @@ -1043,7 +1069,7 @@ ENDPROC(entry_SYSENTER_32) * edi arg5 * ebp arg6 */ -ENTRY(entry_INT80_32) +SYM_FUNC_START(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ @@ -1075,7 +1101,6 @@ restore_all: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code .Lirq_return: - IRET_FRAME /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler and when returning from @@ -1100,7 +1125,7 @@ restore_all_kernel: jmp .Lirq_return .section .fixup, "ax" -ENTRY(iret_exc ) +SYM_CODE_START(iret_exc) pushl $0 # no error code pushl $do_iret_error @@ -1117,9 +1142,10 @@ ENTRY(iret_exc ) #endif jmp common_exception +SYM_CODE_END(iret_exc) .previous _ASM_EXTABLE(.Lirq_return, iret_exc) -ENDPROC(entry_INT80_32) +SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK /* @@ -1128,30 +1154,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1160,7 +1199,7 @@ ENDPROC(entry_INT80_32) * We pack 1 stub into every 8-byte block. */ .align 8 -ENTRY(irq_entries_start) +SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1168,11 +1207,11 @@ ENTRY(irq_entries_start) jmp common_interrupt .align 8 .endr -END(irq_entries_start) +SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC .align 8 -ENTRY(spurious_entries_start) +SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1180,9 +1219,9 @@ ENTRY(spurious_entries_start) jmp common_spurious .align 8 .endr -END(spurious_entries_start) +SYM_CODE_END(spurious_entries_start) -common_spurious: +SYM_CODE_START_LOCAL(common_spurious) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ SAVE_ALL switch_stacks=1 @@ -1191,7 +1230,7 @@ common_spurious: movl %esp, %eax call smp_spurious_interrupt jmp ret_from_intr -ENDPROC(common_spurious) +SYM_CODE_END(common_spurious) #endif /* @@ -1199,7 +1238,7 @@ ENDPROC(common_spurious) * so IRQ-flags tracing has to follow that: */ .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: +SYM_CODE_START_LOCAL(common_interrupt) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ @@ -1209,10 +1248,10 @@ common_interrupt: movl %esp, %eax call do_IRQ jmp ret_from_intr -ENDPROC(common_interrupt) +SYM_CODE_END(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ -ENTRY(name) \ +SYM_FUNC_START(name) \ ASM_CLAC; \ pushl $~(nr); \ SAVE_ALL switch_stacks=1; \ @@ -1221,7 +1260,7 @@ ENTRY(name) \ movl %esp, %eax; \ call fn; \ jmp ret_from_intr; \ -ENDPROC(name) +SYM_FUNC_END(name) #define BUILD_INTERRUPT(name, nr) \ BUILD_INTERRUPT3(name, nr, smp_##name); \ @@ -1229,14 +1268,14 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include <asm/entry_arch.h> -ENTRY(coprocessor_error) +SYM_CODE_START(coprocessor_error) ASM_CLAC pushl $0 pushl $do_coprocessor_error jmp common_exception -END(coprocessor_error) +SYM_CODE_END(coprocessor_error) -ENTRY(simd_coprocessor_error) +SYM_CODE_START(simd_coprocessor_error) ASM_CLAC pushl $0 #ifdef CONFIG_X86_INVD_BUG @@ -1248,104 +1287,99 @@ ENTRY(simd_coprocessor_error) pushl $do_simd_coprocessor_error #endif jmp common_exception -END(simd_coprocessor_error) +SYM_CODE_END(simd_coprocessor_error) -ENTRY(device_not_available) +SYM_CODE_START(device_not_available) ASM_CLAC pushl $-1 # mark this as an int pushl $do_device_not_available jmp common_exception -END(device_not_available) +SYM_CODE_END(device_not_available) #ifdef CONFIG_PARAVIRT -ENTRY(native_iret) +SYM_CODE_START(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) -END(native_iret) +SYM_CODE_END(native_iret) #endif -ENTRY(overflow) +SYM_CODE_START(overflow) ASM_CLAC pushl $0 pushl $do_overflow jmp common_exception -END(overflow) +SYM_CODE_END(overflow) -ENTRY(bounds) +SYM_CODE_START(bounds) ASM_CLAC pushl $0 pushl $do_bounds jmp common_exception -END(bounds) +SYM_CODE_END(bounds) -ENTRY(invalid_op) +SYM_CODE_START(invalid_op) ASM_CLAC pushl $0 pushl $do_invalid_op jmp common_exception -END(invalid_op) +SYM_CODE_END(invalid_op) -ENTRY(coprocessor_segment_overrun) +SYM_CODE_START(coprocessor_segment_overrun) ASM_CLAC pushl $0 pushl $do_coprocessor_segment_overrun jmp common_exception -END(coprocessor_segment_overrun) +SYM_CODE_END(coprocessor_segment_overrun) -ENTRY(invalid_TSS) +SYM_CODE_START(invalid_TSS) ASM_CLAC pushl $do_invalid_TSS jmp common_exception -END(invalid_TSS) +SYM_CODE_END(invalid_TSS) -ENTRY(segment_not_present) +SYM_CODE_START(segment_not_present) ASM_CLAC pushl $do_segment_not_present jmp common_exception -END(segment_not_present) +SYM_CODE_END(segment_not_present) -ENTRY(stack_segment) +SYM_CODE_START(stack_segment) ASM_CLAC pushl $do_stack_segment jmp common_exception -END(stack_segment) +SYM_CODE_END(stack_segment) -ENTRY(alignment_check) +SYM_CODE_START(alignment_check) ASM_CLAC pushl $do_alignment_check jmp common_exception -END(alignment_check) +SYM_CODE_END(alignment_check) -ENTRY(divide_error) +SYM_CODE_START(divide_error) ASM_CLAC pushl $0 # no error code pushl $do_divide_error jmp common_exception -END(divide_error) +SYM_CODE_END(divide_error) #ifdef CONFIG_X86_MCE -ENTRY(machine_check) +SYM_CODE_START(machine_check) ASM_CLAC pushl $0 pushl machine_check_vector jmp common_exception -END(machine_check) +SYM_CODE_END(machine_check) #endif -ENTRY(spurious_interrupt_bug) +SYM_CODE_START(spurious_interrupt_bug) ASM_CLAC pushl $0 pushl $do_spurious_interrupt_bug jmp common_exception -END(spurious_interrupt_bug) +SYM_CODE_END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV -ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - +SYM_FUNC_START(xen_hypervisor_callback) /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1353,22 +1387,23 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall #endif jmp ret_from_intr -ENDPROC(xen_hypervisor_callback) +SYM_FUNC_END(xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1382,7 +1417,7 @@ ENDPROC(xen_hypervisor_callback) * to pop the stack frame we end up in an infinite loop of failsafe callbacks. * We distinguish between categories by maintaining a status value in EAX. */ -ENTRY(xen_failsafe_callback) +SYM_FUNC_START(xen_failsafe_callback) pushl %eax movl $1, %eax 1: mov 4(%esp), %ds @@ -1419,7 +1454,7 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(2b, 7b) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) -ENDPROC(xen_failsafe_callback) +SYM_FUNC_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ #ifdef CONFIG_XEN_PVHVM @@ -1441,18 +1476,17 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, #endif /* CONFIG_HYPERV */ -ENTRY(page_fault) +SYM_CODE_START(page_fault) ASM_CLAC pushl $do_page_fault jmp common_exception_read_cr2 -END(page_fault) +SYM_CODE_END(page_fault) -common_exception_read_cr2: +SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1470,13 +1504,12 @@ common_exception_read_cr2: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception_read_cr2) +SYM_CODE_END(common_exception_read_cr2) -common_exception: +SYM_CODE_START_LOCAL_NOALIGN(common_exception) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1492,9 +1525,9 @@ common_exception: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception) +SYM_CODE_END(common_exception) -ENTRY(debug) +SYM_CODE_START(debug) /* * Entry from sysenter is now handled in common_exception */ @@ -1502,7 +1535,7 @@ ENTRY(debug) pushl $-1 # mark this as an int pushl $do_debug jmp common_exception -END(debug) +SYM_CODE_END(debug) /* * NMI is doubly nasty. It can happen on the first instruction of @@ -1511,10 +1544,14 @@ END(debug) * switched stacks. We handle both conditions by simply checking whether we * interrupted kernel code running on the SYSENTER stack. */ -ENTRY(nmi) +SYM_CODE_START(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1550,6 +1587,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1557,28 +1599,47 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif -END(nmi) +SYM_CODE_END(nmi) -ENTRY(int3) +SYM_CODE_START(int3) ASM_CLAC pushl $-1 # mark this as an int @@ -1589,22 +1650,22 @@ ENTRY(int3) movl %esp, %eax # pt_regs pointer call do_int3 jmp ret_from_exception -END(int3) +SYM_CODE_END(int3) -ENTRY(general_protection) +SYM_CODE_START(general_protection) pushl $do_general_protection jmp common_exception -END(general_protection) +SYM_CODE_END(general_protection) #ifdef CONFIG_KVM_GUEST -ENTRY(async_page_fault) +SYM_CODE_START(async_page_fault) ASM_CLAC pushl $do_async_page_fault jmp common_exception_read_cr2 -END(async_page_fault) +SYM_CODE_END(async_page_fault) #endif -ENTRY(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1613,4 +1674,4 @@ ENTRY(rewind_stack_do_exit) call do_exit 1: jmp 1b -END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b7c3ea4cb19d..76942cbd95a1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -15,7 +15,7 @@ * at the top of the kernel process stack. * * Some macro usage: - * - ENTRY/END: Define functions in the symbol table. + * - SYM_FUNC_START/END:Define functions in the symbol table. * - TRACE_IRQ_*: Trace hardirq state for lock debugging. * - idtentry: Define exception entry points. */ @@ -46,11 +46,11 @@ .section .entry.text, "ax" #ifdef CONFIG_PARAVIRT -ENTRY(native_usergs_sysret64) +SYM_CODE_START(native_usergs_sysret64) UNWIND_HINT_EMPTY swapgs sysretq -END(native_usergs_sysret64) +SYM_CODE_END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ .macro TRACE_IRQS_FLAGS flags:req @@ -142,7 +142,7 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ -ENTRY(entry_SYSCALL_64) +SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* * Interrupts are off on entry. @@ -162,7 +162,7 @@ ENTRY(entry_SYSCALL_64) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ -GLOBAL(entry_SYSCALL_64_after_hwframe) +SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) pushq %rax /* pt_regs->orig_ax */ PUSH_AND_CLEAR_REGS rax=$-ENOSYS @@ -273,13 +273,13 @@ syscall_return_via_sysret: popq %rdi popq %rsp USERGS_SYSRET64 -END(entry_SYSCALL_64) +SYM_CODE_END(entry_SYSCALL_64) /* * %rdi: prev task * %rsi: next task */ -ENTRY(__switch_to_asm) +SYM_CODE_START(__switch_to_asm) UNWIND_HINT_FUNC /* * Save callee-saved registers @@ -321,7 +321,7 @@ ENTRY(__switch_to_asm) popq %rbp jmp __switch_to -END(__switch_to_asm) +SYM_CODE_END(__switch_to_asm) /* * A newly forked process directly context switches into this address. @@ -330,7 +330,7 @@ END(__switch_to_asm) * rbx: kernel thread func (NULL for user thread) * r12: kernel thread arg */ -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -357,14 +357,14 @@ ENTRY(ret_from_fork) */ movq $0, RAX(%rsp) jmp 2b -END(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Build the entry stubs with some assembler magic. * We pack 1 stub into every 8-byte block. */ .align 8 -ENTRY(irq_entries_start) +SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) UNWIND_HINT_IRET_REGS @@ -373,10 +373,10 @@ ENTRY(irq_entries_start) .align 8 vector=vector+1 .endr -END(irq_entries_start) +SYM_CODE_END(irq_entries_start) .align 8 -ENTRY(spurious_entries_start) +SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) UNWIND_HINT_IRET_REGS @@ -385,7 +385,7 @@ ENTRY(spurious_entries_start) .align 8 vector=vector+1 .endr -END(spurious_entries_start) +SYM_CODE_END(spurious_entries_start) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY @@ -511,7 +511,7 @@ END(spurious_entries_start) * | return address | * +----------------------------------------------------+ */ -ENTRY(interrupt_entry) +SYM_CODE_START(interrupt_entry) UNWIND_HINT_FUNC ASM_CLAC cld @@ -579,7 +579,7 @@ ENTRY(interrupt_entry) TRACE_IRQS_OFF ret -END(interrupt_entry) +SYM_CODE_END(interrupt_entry) _ASM_NOKPROBE(interrupt_entry) @@ -589,18 +589,18 @@ _ASM_NOKPROBE(interrupt_entry) * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_spurious/interrupt. */ -common_spurious: +SYM_CODE_START_LOCAL(common_spurious) addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 call smp_spurious_interrupt /* rdi points to pt_regs */ jmp ret_from_intr -END(common_spurious) +SYM_CODE_END(common_spurious) _ASM_NOKPROBE(common_spurious) /* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: +SYM_CODE_START_LOCAL(common_interrupt) addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -616,12 +616,12 @@ ret_from_intr: jz retint_kernel /* Interrupt came from user space */ -GLOBAL(retint_user) +.Lretint_user: mov %rsp,%rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ -GLOBAL(swapgs_restore_regs_and_return_to_usermode) +SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -679,7 +679,7 @@ retint_kernel: */ TRACE_IRQS_IRETQ -GLOBAL(restore_regs_and_return_to_kernel) +SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates kernel mode. */ testb $3, CS(%rsp) @@ -695,7 +695,7 @@ GLOBAL(restore_regs_and_return_to_kernel) */ INTERRUPT_RETURN -ENTRY(native_iret) +SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -706,8 +706,7 @@ ENTRY(native_iret) jnz native_irq_return_ldt #endif -.global native_irq_return_iret -native_irq_return_iret: +SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -789,14 +788,14 @@ native_irq_return_ldt: */ jmp native_irq_return_iret #endif -END(common_interrupt) +SYM_CODE_END(common_interrupt) _ASM_NOKPROBE(common_interrupt) /* * APIC interrupts. */ .macro apicinterrupt3 num sym do_sym -ENTRY(\sym) +SYM_CODE_START(\sym) UNWIND_HINT_IRET_REGS pushq $~(\num) .Lcommon_\sym: @@ -804,7 +803,7 @@ ENTRY(\sym) UNWIND_HINT_REGS indirect=1 call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr -END(\sym) +SYM_CODE_END(\sym) _ASM_NOKPROBE(\sym) .endm @@ -969,7 +968,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 -ENTRY(\sym) +SYM_CODE_START(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ @@ -1019,7 +1018,7 @@ ENTRY(\sym) .endif _ASM_NOKPROBE(\sym) -END(\sym) +SYM_CODE_END(\sym) .endm idtentry divide_error do_divide_error has_error_code=0 @@ -1041,7 +1040,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 * Reload gs selector with exception handling * edi: new selector */ -ENTRY(native_load_gs_index) +SYM_FUNC_START(native_load_gs_index) FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) @@ -1055,13 +1054,13 @@ ENTRY(native_load_gs_index) popfq FRAME_END ret -ENDPROC(native_load_gs_index) +SYM_FUNC_END(native_load_gs_index) EXPORT_SYMBOL(native_load_gs_index) _ASM_EXTABLE(.Lgs_change, .Lbad_gs) .section .fixup, "ax" /* running with kernelgs */ -.Lbad_gs: +SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) SWAPGS /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ @@ -1072,10 +1071,11 @@ EXPORT_SYMBOL(native_load_gs_index) xorl %eax, %eax movl %eax, %gs jmp 2b +SYM_CODE_END(.Lbad_gs) .previous /* Call softirq on interrupt stack. Interrupts are off. */ -ENTRY(do_softirq_own_stack) +SYM_FUNC_START(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp ENTER_IRQ_STACK regs=0 old_rsp=%r11 @@ -1083,7 +1083,7 @@ ENTRY(do_softirq_own_stack) LEAVE_IRQ_STACK regs=0 leaveq ret -ENDPROC(do_softirq_own_stack) +SYM_FUNC_END(do_softirq_own_stack) #ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 @@ -1101,7 +1101,8 @@ idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 * existing activation in its critical region -- if so, we pop the current * activation and restart the handler using the previous one. */ -ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ +/* do_hypervisor_callback(struct *pt_regs) */ +SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -1119,7 +1120,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ call xen_maybe_preempt_hcall #endif jmp error_exit -END(xen_do_hypervisor_callback) +SYM_CODE_END(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1134,7 +1135,7 @@ END(xen_do_hypervisor_callback) * We distinguish between categories by comparing each saved segment register * with its current contents: any discrepancy means we in category 1. */ -ENTRY(xen_failsafe_callback) +SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY movl %ds, %ecx cmpw %cx, 0x10(%rsp) @@ -1164,7 +1165,7 @@ ENTRY(xen_failsafe_callback) PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER jmp error_exit -END(xen_failsafe_callback) +SYM_CODE_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ #ifdef CONFIG_XEN_PVHVM @@ -1214,7 +1215,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ -ENTRY(paranoid_entry) +SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC cld PUSH_AND_CLEAR_REGS save_ret=1 @@ -1248,7 +1249,7 @@ ENTRY(paranoid_entry) FENCE_SWAPGS_KERNEL_ENTRY ret -END(paranoid_entry) +SYM_CODE_END(paranoid_entry) /* * "Paranoid" exit path from exception stack. This is invoked @@ -1262,7 +1263,7 @@ END(paranoid_entry) * * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ -ENTRY(paranoid_exit) +SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF_DEBUG @@ -1272,19 +1273,18 @@ ENTRY(paranoid_exit) /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK - jmp .Lparanoid_exit_restore + jmp restore_regs_and_return_to_kernel .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 -.Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel -END(paranoid_exit) +SYM_CODE_END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. */ -ENTRY(error_entry) +SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld PUSH_AND_CLEAR_REGS save_ret=1 @@ -1364,16 +1364,16 @@ ENTRY(error_entry) call fixup_bad_iret mov %rax, %rsp jmp .Lerror_entry_from_usermode_after_swapgs -END(error_entry) +SYM_CODE_END(error_entry) -ENTRY(error_exit) +SYM_CODE_START_LOCAL(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF testb $3, CS(%rsp) jz retint_kernel - jmp retint_user -END(error_exit) + jmp .Lretint_user +SYM_CODE_END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, @@ -1383,7 +1383,7 @@ END(error_exit) * %r14: Used to save/restore the CR3 of the interrupted context * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ -ENTRY(nmi) +SYM_CODE_START(nmi) UNWIND_HINT_IRET_REGS /* @@ -1718,21 +1718,21 @@ nmi_restore: * about espfix64 on the way back to kernel mode. */ iretq -END(nmi) +SYM_CODE_END(nmi) #ifndef CONFIG_IA32_EMULATION /* * This handles SYSCALL from 32-bit code. There is no way to program * MSRs to fully disable 32-bit SYSCALL. */ -ENTRY(ignore_sysret) +SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret -END(ignore_sysret) +SYM_CODE_END(ignore_sysret) #endif -ENTRY(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_do_exit) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1742,4 +1742,4 @@ ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE call do_exit -END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 39913770a44d..f1d3ccae5dd5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -46,7 +46,7 @@ * ebp user stack * 0(%ebp) arg6 */ -ENTRY(entry_SYSENTER_compat) +SYM_FUNC_START(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS @@ -146,8 +146,8 @@ ENTRY(entry_SYSENTER_compat) pushq $X86_EFLAGS_FIXED popfq jmp .Lsysenter_flags_fixed -GLOBAL(__end_entry_SYSENTER_compat) -ENDPROC(entry_SYSENTER_compat) +SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) +SYM_FUNC_END(entry_SYSENTER_compat) /* * 32-bit SYSCALL entry. @@ -196,7 +196,7 @@ ENDPROC(entry_SYSENTER_compat) * esp user stack * 0(%esp) arg6 */ -ENTRY(entry_SYSCALL_compat) +SYM_CODE_START(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs @@ -215,7 +215,7 @@ ENTRY(entry_SYSCALL_compat) pushq %r11 /* pt_regs->flags */ pushq $__USER32_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ -GLOBAL(entry_SYSCALL_compat_after_hwframe) +SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ @@ -311,7 +311,7 @@ sysret32_from_system_call: xorl %r10d, %r10d swapgs sysretl -END(entry_SYSCALL_compat) +SYM_CODE_END(entry_SYSCALL_compat) /* * 32-bit legacy system call entry. @@ -339,7 +339,7 @@ END(entry_SYSCALL_compat) * edi arg5 * ebp arg6 */ -ENTRY(entry_INT80_compat) +SYM_CODE_START(entry_INT80_compat) /* * Interrupts are off on entry. */ @@ -416,4 +416,4 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode -END(entry_INT80_compat) +SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index aa3336a7cb15..7d17b3addbbb 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,13 +10,11 @@ #ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); - -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); - +#define __sys_ni_syscall __ia32_sys_ni_syscall #else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __sys_ni_syscall sys_ni_syscall #endif /* CONFIG_IA32_EMULATION */ #include <asm/syscalls_32.h> @@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include <asm/syscalls_32.h> }; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index b1bf31713374..adf619a856e8 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -4,11 +4,17 @@ #include <linux/linkage.h> #include <linux/sys.h> #include <linux/cache.h> +#include <linux/syscalls.h> #include <asm/asm-offsets.h> #include <asm/syscall.h> -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +extern asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); #define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include <asm/syscalls_64.h> @@ -23,7 +29,7 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall, #include <asm/syscalls_64.h> }; @@ -40,7 +46,7 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, + [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall, #include <asm/syscalls_64.h> }; diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3fe02546aed3..15908eb9b17e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -124,13 +124,13 @@ 110 i386 iopl sys_iopl __ia32_sys_iopl 111 i386 vhangup sys_vhangup __ia32_sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys_ni_syscall +113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall 114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4 115 i386 swapoff sys_swapoff __ia32_sys_swapoff 116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo 117 i386 ipc sys_ipc __ia32_compat_sys_ipc 118 i386 fsync sys_fsync __ia32_sys_fsync -119 i386 sigreturn sys_sigreturn sys32_sigreturn +119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn 120 i386 clone sys_clone __ia32_compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname @@ -177,14 +177,14 @@ 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 -166 i386 vm86 sys_vm86 sys_ni_syscall +166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll __ia32_sys_poll 169 i386 nfsservctl 170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16 171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16 172 i386 prctl sys_prctl __ia32_sys_prctl -173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 2713490611a3..e010d4ae11f1 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -10,8 +10,7 @@ /* put return address in eax (arg1) */ .macro THUNK name, func, put_ret_addr_in_eax=0 - .globl \name -\name: +SYM_CODE_START_NOALIGN(\name) pushl %eax pushl %ecx pushl %edx @@ -27,6 +26,7 @@ popl %eax ret _ASM_NOKPROBE(\name) +SYM_CODE_END(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index ea5c4167086c..c5c3b6e86e62 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -12,7 +12,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 - ENTRY(\name) +SYM_FUNC_START_NOALIGN(\name) pushq %rbp movq %rsp, %rbp @@ -33,7 +33,7 @@ call \func jmp .L_restore - ENDPROC(\name) +SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm @@ -56,7 +56,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPTION) -.L_restore: +SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %r11 popq %r10 popq %r9 @@ -69,4 +69,5 @@ popq %rbp ret _ASM_NOKPROBE(.L_restore) +SYM_CODE_END(.L_restore) #endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 0f2154106d01..2b75e80f6b41 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -87,11 +87,9 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. # -CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg CFLAGS_REMOVE_vgetcpu.o = -pg -CFLAGS_REMOVE_vvar.o = -pg # # X32 processes use x32 vDSO to access 64bit kernel data. diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 263d7433dea8..de1fff7188aa 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -62,7 +62,7 @@ __kernel_vsyscall: /* Enter using int $0x80 */ int $0x80 -GLOBAL(int80_landing_pad) +SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) /* * Restore EDX and ECX in case they were clobbered. EBP is not diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6e3f0c18908e..9a89d98c55bd 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -49,6 +49,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); u64 __read_mostly hw_cache_event_ids @@ -2181,21 +2182,26 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (x86_pmu.attr_rdpmc_broken) return -ENOTSUPP; - if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) { + if (val != x86_pmu.attr_rdpmc) { /* - * Changing into or out of always available, aka - * perf-event-bypassing mode. This path is extremely slow, + * Changing into or out of never available or always available, + * aka perf-event-bypassing mode. This path is extremely slow, * but only root can trigger it, so it's okay. */ + if (val == 0) + static_branch_inc(&rdpmc_never_available_key); + else if (x86_pmu.attr_rdpmc == 0) + static_branch_dec(&rdpmc_never_available_key); + if (val == 2) static_branch_inc(&rdpmc_always_available_key); - else + else if (x86_pmu.attr_rdpmc == 2) static_branch_dec(&rdpmc_always_available_key); + on_each_cpu(refresh_pce, NULL, 1); + x86_pmu.attr_rdpmc = val; } - x86_pmu.attr_rdpmc = val; - return count; } diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index e01078e93dd3..40e0e322161d 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -194,10 +194,20 @@ do_ex_hypercall: static bool __send_ipi_one(int cpu, int vector) { - struct cpumask mask = CPU_MASK_NONE; + int vp = hv_cpu_number_to_vp_number(cpu); - cpumask_set_cpu(cpu, &mask); - return __send_ipi_mask(&mask, vector); + trace_hyperv_send_ipi_one(cpu, vector); + + if (!hv_hypercall_pg || (vp == VP_INVAL)) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + if (vp >= 64) + return __send_ipi_mask_ex(cpumask_of(cpu), vector); + + return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); } static void hv_send_ipi(int cpu, int vector) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2db3972c0e0f..50ff030d9224 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -311,6 +311,12 @@ void __init hyperv_init(void) hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + /* + * Ignore any errors in setting up stimer clockevents + * as we can run with the LAPIC timer as a fallback. + */ + (void)hv_stimer_alloc(); + hv_apic_init(); x86_init.pci.arch_init = hv_pci_init; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 1cee10091b9f..30416d7f19d4 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,6 +21,7 @@ #include <linux/personality.h> #include <linux/compat.h> #include <linux/binfmts.h> +#include <linux/syscalls.h> #include <asm/ucontext.h> #include <linux/uaccess.h> #include <asm/fpu/internal.h> @@ -118,7 +119,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, return err; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); @@ -144,7 +145,7 @@ badframe: return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 1b563f9167ea..cd339b88d5d4 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -141,9 +141,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -172,9 +169,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h deleted file mode 100644 index facd374a1bf7..000000000000 --- a/arch/x86/include/asm/calgary.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Derived from include/asm-powerpc/iommu.h - * - * Copyright IBM Corporation, 2006-2007 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - */ - -#ifndef _ASM_X86_CALGARY_H -#define _ASM_X86_CALGARY_H - -#include <linux/spinlock.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <linux/timer.h> -#include <asm/types.h> - -struct iommu_table { - const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ - unsigned long it_base; /* mapped address of tce table */ - unsigned long it_hint; /* Hint for next alloc */ - unsigned long *it_map; /* A simple allocation bitmap for now */ - void __iomem *bbar; /* Bridge BAR */ - u64 tar_val; /* Table Address Register */ - struct timer_list watchdog_timer; - spinlock_t it_lock; /* Protects it_map */ - unsigned int it_size; /* Size of iommu table in entries */ - unsigned char it_busno; /* Bus number this table belongs to */ -}; - -struct cal_chipset_ops { - void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); - void (*tce_cache_blast)(struct iommu_table *tbl); - void (*dump_error_regs)(struct iommu_table *tbl); -}; - -#define TCE_TABLE_SIZE_UNSPECIFIED ~0 -#define TCE_TABLE_SIZE_64K 0 -#define TCE_TABLE_SIZE_128K 1 -#define TCE_TABLE_SIZE_256K 2 -#define TCE_TABLE_SIZE_512K 3 -#define TCE_TABLE_SIZE_1M 4 -#define TCE_TABLE_SIZE_2M 5 -#define TCE_TABLE_SIZE_4M 6 -#define TCE_TABLE_SIZE_8M 7 - -extern int use_calgary; - -#ifdef CONFIG_CALGARY_IOMMU -extern int detect_calgary(void); -#else -static inline int detect_calgary(void) { return -ENODEV; } -#endif - -#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 8348f7d69fd5..ea866c7bf31d 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -78,8 +78,12 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; /* @@ -94,7 +98,6 @@ struct cpu_entry_area { */ struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -105,11 +108,13 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -117,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); +/* Single page reserved for the readonly IDT mapping: */ #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) #define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) extern struct cpu_entry_area *get_cpu_entry_area(int cpu); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c4fbe379cc0b..e9b62498fe75 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -292,6 +292,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21..f58de66091e5 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,10 +2,17 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); -int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); +#ifdef CONFIG_KEXEC_CORE +void __init crash_reserve_low_1M(void); +#else +static inline void __init crash_reserve_low_1M(void) { } +#endif + #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index a5ea841cc6d2..8e1d0bb46361 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -22,7 +22,7 @@ # define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) #endif -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP # define DISABLE_UMIP 0 #else # define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h index c3aa4b5e49e2..314f75d886d0 100644 --- a/arch/x86/include/asm/e820/types.h +++ b/arch/x86/include/asm/e820/types.h @@ -29,6 +29,14 @@ enum e820_type { E820_TYPE_PRAM = 12, /* + * Special-purpose memory is indicated to the system via the + * EFI_MEMORY_SP attribute. Define an e820 translation of this + * memory type for the purpose of reserving this range and + * marking it with the IORES_DESC_SOFT_RESERVED designation. + */ + E820_TYPE_SOFT_RESERVED = 0xefffffff, + + /* * Reserved RAM used by the kernel itself if * CONFIG_INTEL_TXT=y is enabled, memory of this type * will be included in the S3 integrity calculation diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 43a82e59c59d..d028e9acdf1c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -extern void efi_reserve_boot_services(void); struct efi_setup_data { u64 fw_vendor; @@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); +extern void efi_find_mirror(void); +extern void efi_reserve_boot_services(void); #else static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline bool efi_reboot_required(void) @@ -254,6 +255,20 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } +static inline void efi_find_mirror(void) +{ +} +static inline void efi_reserve_boot_services(void) +{ +} #endif /* CONFIG_EFI */ +#ifdef CONFIG_EFI_FAKE_MEMMAP +extern void __init efi_fake_memmap_early(void); +#else +static inline void efi_fake_memmap_early(void) +{ +} +#endif + #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 0c47aa82e2e2..28183ee3cc42 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -156,7 +156,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 7741e211f7f5..5f10f7f2098d 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -86,6 +86,8 @@ #define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) /* AccessReenlightenmentControls privilege */ #define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) +/* AccessTscInvariantControls privilege */ +#define HV_X64_ACCESS_TSC_INVARIANT BIT(15) /* * Feature identification: indicates which flags were specified at partition @@ -278,6 +280,9 @@ #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +/* TSC invariant control */ +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + /* * Declare the MSR used to setup pages used to communicate with the hypervisor. */ diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..02c6ef8f7667 --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include <linux/refcount.h> +#include <asm/processor.h> + +struct io_bitmap { + u64 sequence; + refcount_t refcnt; + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +struct task_struct; + +#ifdef CONFIG_X86_IOPL_IOPERM +void io_bitmap_share(struct task_struct *tsk); +void io_bitmap_exit(void); + +void tss_update_io_bitmap(void); +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(void) { } +static inline void tss_update_io_bitmap(void) { } +#endif + +#endif diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5e7d6b46de97..6802c59e8252 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -66,10 +66,6 @@ struct kimage; # define KEXEC_ARCH KEXEC_ARCH_X86_64 #endif -/* Memory to backup during crash kdump */ -#define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ - /* * This function is responsible for capturing register states if coming * via panic otherwise just fix up the ss and sp if coming via kernel @@ -154,12 +150,6 @@ struct kimage_arch { pud_t *pud; pmd_t *pmd; pte_t *pte; - /* Details of backup region */ - unsigned long backup_src_start; - unsigned long backup_src_sz; - - /* Physical address of backup segment */ - unsigned long backup_load_addr; /* Core ELF header buffer */ void *elf_headers; diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7f..365111789cc6 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,10 +13,6 @@ #ifdef __ASSEMBLY__ -#define GLOBAL(name) \ - .globl name; \ - name: - #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) #define __ALIGN .p2align 4, 0x90 #define __ALIGN_STR __stringify(__ALIGN) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 16ae821483c8..5f33924e200f 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -26,12 +26,14 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #ifdef CONFIG_PERF_EVENTS +DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || - atomic_read(&mm->context.perf_rdpmc_allowed)) + (!static_branch_unlikely(&rdpmc_never_available_key) && + atomic_read(&mm->context.perf_rdpmc_allowed))) cr4_set_bits_irqsoff(X86_CR4_PCE); else cr4_clear_bits_irqsoff(X86_CR4_PCE); diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 7948a17febb4..c215d2762488 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -15,6 +15,8 @@ struct mod_arch_specific { #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " #elif defined CONFIG_M486 #define MODULE_PROC_FAMILY "486 " #elif defined CONFIG_M586 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 69089d46f128..86e7317eb31f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } -static inline void set_iopl_mask(unsigned mask) -{ - PVOP_VCALL1(cpu.set_iopl_mask, mask); -} static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 70b654f3ffe5..84812964d3dd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,8 +140,6 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); - void (*set_iopl_mask)(unsigned mask); - void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index e662f987dfa2..90d0731fdcb6 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -12,8 +12,6 @@ #include <asm/pat.h> #include <asm/x86_init.h> -#ifdef __KERNEL__ - struct pci_sysdata { int domain; /* PCI domain */ int node; /* NUMA node */ @@ -118,11 +116,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif -#endif /* __KERNEL__ */ - -#ifdef CONFIG_X86_64 -#include <asm/pci_64.h> -#endif /* generic pci stuff */ #include <asm-generic/pci.h> diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h deleted file mode 100644 index f5411de0ae11..000000000000 --- a/arch/x86/include/asm/pci_64.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PCI_64_H -#define _ASM_X86_PCI_64_H - -#ifdef __KERNEL__ - -#ifdef CONFIG_CALGARY_IOMMU -static inline void *pci_iommu(struct pci_bus *bus) -{ - struct pci_sysdata *sd = bus->sysdata; - return sd->iommu; -} - -static inline void set_pci_iommu(struct pci_bus *bus, void *val) -{ - struct pci_sysdata *sd = bus->sysdata; - sd->iommu = val; -} -#endif /* CONFIG_CALGARY_IOMMU */ - -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, - int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, - int reg, int len, u32 value); - -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e3633795fb22..5afb5e0fe903 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) #define pmd_read_atomic pmd_read_atomic /* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are * allowed, if the mmap_sem is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * function to know if the pmd is null or not, and in turn to know if + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So + * Without THP if the mmap_sem is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is hold for reading, the pmd can become + * With THP if the mmap_sem is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and * it would be a whole lot simpler), but to avoid using cmpxchg8b we * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd. */ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..19f5807260c3 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) #define LDT_BASE_ADDR \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 54f5d54280f6..e51afbb0cbfb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct io_bitmap; struct vm86; #include <asm/math_emu.h> @@ -93,7 +94,15 @@ struct cpuinfo_x86 { __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; - __u32 x86_capability[NCAPINTS + NBUGINTS]; + /* + * Align to size of unsigned long because the x86_capability array + * is passed to bitops which require the alignment. Use unnamed + * union to enforce the array is aligned to size of unsigned long. + */ + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ @@ -328,10 +337,32 @@ struct x86_hw_tss { * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET 0x8000 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) + +#define IO_BITMAP_OFFSET_VALID_MAP \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + +#ifdef CONFIG_X86_IOPL_IOPERM +/* + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. + */ +# define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { unsigned long words[64]; @@ -341,13 +372,21 @@ struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); -struct tss_struct { +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + /* - * The fixed hardware portion. This must not cross a page boundary - * at risk of violating the SDM's advice and potentially triggering - * errata. + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. */ - struct x86_hw_tss x86_tss; + unsigned int prev_max; /* * The extra 1 is there because the CPU will access an @@ -355,21 +394,28 @@ struct tss_struct { * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) - /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; @@ -480,10 +526,14 @@ struct thread_struct { struct vm86 *vm86; #endif /* IO permissions: */ - unsigned long *io_bitmap_ptr; - unsigned long iopl; - /* Max allowed port in the bitmap, in bytes: */ - unsigned io_bitmap_max; + struct io_bitmap *io_bitmap; + + /* + * IOPL. Priviledge level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. + */ + unsigned long iopl_emul; mm_segment_t addr_limit; @@ -515,25 +565,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ -#ifdef CONFIG_X86_32 - unsigned int reg; - - asm volatile ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif -} - static inline void native_load_sp0(unsigned long sp0) { @@ -573,7 +604,6 @@ static inline void load_sp0(unsigned long sp0) native_load_sp0(sp0); } -#define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ @@ -841,7 +871,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ .addr_limit = KERNEL_DS, \ } @@ -958,7 +987,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); -extern void free_kernel_image_pages(void *begin, void *end); +extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 332eb3525867..5057a8ed100b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -361,5 +361,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t) (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 92c34e517da1..5528e9325049 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -6,16 +6,6 @@ #include <linux/purgatory.h> extern void purgatory(void); -/* - * These forward declarations serve two purposes: - * - * 1) Make sparse happy when checking arch/purgatory - * 2) Document that these are required to be global so the symbol - * lookup in kexec works - */ -extern unsigned long purgatory_backup_dest; -extern unsigned long purgatory_backup_src; -extern unsigned long purgatory_backup_sz; #endif /* __ASSEMBLY__ */ #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index 232f856e0db0..000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include <linux/refcount.h> -#include <asm/bug.h> - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[var], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ - asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : "ir" (i) - : "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "er", i, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - int c, result; - - c = atomic_read(&(r->refs)); - do { - if (unlikely(c == 0)) - return false; - - result = c + i; - - /* Did we try to increment from/to an undesirable state? */ - if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR - : : [var] "m" (r->refs.counter) - : "cc", "cx"); - break; - } - - } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - - return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h deleted file mode 100644 index 0a21986d2238..000000000000 --- a/arch/x86/include/asm/rio.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Derived from include/asm-x86/mach-summit/mach_mpparse.h - * and include/asm-x86/mach-default/bios_ebda.h - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - */ - -#ifndef _ASM_X86_RIO_H -#define _ASM_X86_RIO_H - -#define RIO_TABLE_VERSION 3 - -struct rio_table_hdr { - u8 version; /* Version number of this data structure */ - u8 num_scal_dev; /* # of Scalability devices */ - u8 num_rio_dev; /* # of RIO I/O devices */ -} __attribute__((packed)); - -struct scal_detail { - u8 node_id; /* Scalability Node ID */ - u32 CBAR; /* Address of 1MB register space */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF = None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port2node; /* Node ID port connected to: 0xFF = None */ - u8 port2port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - u8 node_id; /* RIO Node ID */ - u32 BBAR; /* Address of 1MB register space */ - u8 type; /* Type of device */ - u8 owner_id; /* Node ID of Hurricane that owns this */ - /* node */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF=None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 first_slot; /* Lowest slot number below this Calgary */ - u8 status; /* Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie: */ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - u8 WP_index; /* instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - u8 chassis_num; /* 1 based Chassis number */ -} __attribute__((packed)); - -enum { - HURR_SCALABILTY = 0, /* Hurricane Scalability info */ - HURR_RIOIB = 2, /* Hurricane RIOIB info */ - COMPAT_CALGARY = 4, /* Compatibility Calgary */ - ALT_CALGARY = 5, /* Second Planar Calgary */ -}; - -#endif /* _ASM_X86_RIO_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 71b32f2570ab..036c360910c5 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,7 +6,6 @@ #include <asm/extable.h> extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[]; extern char __end_rodata_aligned[]; #if defined(CONFIG_X86_64) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..6669164abadc 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 18a4b6890fa8..0e059b73437b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task) if (static_cpu_has(X86_FEATURE_XENPV)) load_sp0(task_top_of_stack(task)); #endif +} +static inline void kthread_frame_init(struct inactive_task_frame *frame, + unsigned long fun, unsigned long arg) +{ + frame->bx = fun; +#ifdef CONFIG_X86_32 + frame->di = arg; +#else + frame->r12 = arg; +#endif } #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..e2389ce9bf58 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,8 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +struct pt_regs; + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -28,13 +30,21 @@ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this * case as well. */ +#define __IA32_COMPAT_SYS_STUB0(x, name) \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name(); \ + } + #define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #define __IA32_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ @@ -48,16 +58,23 @@ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias * named __ia32_sys_*() */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) -#define COND_SYSCALL(name) \ - cond_syscall(__x64_sys_##name); \ - cond_syscall(__ia32_sys_##name) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } \ + asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ + { \ + return sys_ni_syscall(); \ + } #define SYS_NI(name) \ SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ @@ -75,15 +92,24 @@ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ +#define __X32_COMPAT_SYS_STUB0(x, name, ...) \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name();\ + } + #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUB0(x, name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #endif /* CONFIG_X86_X32 */ @@ -94,6 +120,17 @@ * mapping of registers to parameters, we need to generate stubs for each * of them. */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long __se_compat_sys_##name(void); \ + static inline long __do_compat_sys_##name(void); \ + __IA32_COMPAT_SYS_STUB0(x, name) \ + __X32_COMPAT_SYS_STUB0(x, name) \ + static long __se_compat_sys_##name(void) \ + { \ + return __do_compat_sys_##name(); \ + } \ + static inline long __do_compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -181,15 +218,19 @@ * macros to work correctly. */ #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } #endif #ifndef SYS_NI @@ -201,7 +242,6 @@ * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -struct pt_regs; asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); asmlinkage long __x64_sys_time(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h deleted file mode 100644 index 6ed2deacf1d0..000000000000 --- a/arch/x86/include/asm/tce.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file is derived from asm-powerpc/tce.h. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * Author: Jon Mason <jdmason@us.ibm.com> - */ - -#ifndef _ASM_X86_TCE_H -#define _ASM_X86_TCE_H - -extern unsigned int specified_table_size; -struct iommu_table; - -#define TCE_ENTRY_SIZE 8 /* in bytes */ - -#define TCE_READ_SHIFT 0 -#define TCE_WRITE_SHIFT 1 -#define TCE_HUBID_SHIFT 2 /* unused */ -#define TCE_RSVD_SHIFT 8 /* unused */ -#define TCE_RPN_SHIFT 12 -#define TCE_UNUSED_SHIFT 48 /* unused */ - -#define TCE_RPN_MASK 0x0000fffffffff000ULL - -extern void tce_build(struct iommu_table *tbl, unsigned long index, - unsigned int npages, unsigned long uaddr, int direction); -extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages); -extern void * __init alloc_tce_table(void); -extern void __init free_tce_table(void *tbl); -extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); - -#endif /* _ASM_X86_TCE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f9453536f9bb..d779366ce3f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -143,8 +143,8 @@ struct thread_info { _TIF_NOHZ) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_BASE \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* @@ -156,8 +156,14 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ + _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index ace464f09681..4d705cb4d63b 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask, __entry->ncpus, __entry->vector) ); +TRACE_EVENT(hyperv_send_ipi_one, + TP_PROTO(int cpu, + int vector), + TP_ARGS(cpu, vector), + TP_STRUCT__entry( + __field(int, cpu) + __field(int, vector) + ), + TP_fast_assign(__entry->cpu = cpu; + __entry->vector = vector; + ), + TP_printk("cpu %d vector %x", + __entry->cpu, __entry->vector) + ); + #endif /* CONFIG_HYPERV */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h index db43f2a0d92c..aeed98c3c9e1 100644 --- a/arch/x86/include/asm/umip.h +++ b/arch/x86/include/asm/umip.h @@ -4,9 +4,9 @@ #include <linux/types.h> #include <asm/ptrace.h> -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP bool fixup_umip_exception(struct pt_regs *regs); #else static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } -#endif /* CONFIG_X86_INTEL_UMIP */ +#endif /* CONFIG_X86_UMIP */ #endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 6e7caf65fa40..389174eaec79 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); -extern void uv_bios_init(void); +extern int uv_bios_init(void); extern unsigned long sn_rtc_cycles_per_second; extern int uv_type; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6bc6d89d8e2a..45ea95ce79b4 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -12,6 +12,16 @@ struct mm_struct; #ifdef CONFIG_X86_UV #include <linux/efi.h> +#define UV_PROC_NODE "sgi_uv" + +static inline int uv(int uvtype) +{ + /* uv(0) is "any" */ + if (uvtype >= 0 && uvtype <= 30) + return 1 << uvtype; + return 1; +} + extern unsigned long uv_systab_phys; extern enum uv_system_type get_uv_system_type(void); @@ -20,7 +30,8 @@ static inline bool is_early_uv_system(void) return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR; } extern int is_uv_system(void); -extern int is_uv_hubless(void); +extern int is_uv_hubbed(int uvtype); +extern int is_uv_hubless(int uvtype); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); @@ -32,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } -static inline int is_uv_hubless(void) { return 0; } +static inline int is_uv_hubbed(int uv) { return 0; } +static inline int is_uv_hubless(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 44cf6d6deb7a..950cd1395d5d 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -19,6 +19,7 @@ #include <linux/topology.h> #include <asm/types.h> #include <asm/percpu.h> +#include <asm/uv/uv.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/bios.h> #include <asm/irq_vectors.h> @@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version) #define UV4_HUB_REVISION_BASE 7 #define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ -#ifdef UV1_HUB_IS_SUPPORTED +/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */ static inline int is_uv1_hub(void) { - return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; -} +#ifdef UV1_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(1)); #else -static inline int is_uv1_hub(void) -{ return 0; -} #endif +} -#ifdef UV2_HUB_IS_SUPPORTED static inline int is_uv2_hub(void) { - return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); -} +#ifdef UV2_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(2)); #else -static inline int is_uv2_hub(void) -{ return 0; -} #endif +} -#ifdef UV3_HUB_IS_SUPPORTED static inline int is_uv3_hub(void) { - return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)); -} +#ifdef UV3_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(3)); #else -static inline int is_uv3_hub(void) -{ return 0; -} #endif +} /* First test "is UV4A", then "is UV4" */ -#ifdef UV4A_HUB_IS_SUPPORTED -static inline int is_uv4a_hub(void) -{ - return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); -} -#else static inline int is_uv4a_hub(void) { +#ifdef UV4A_HUB_IS_SUPPORTED + if (is_uv_hubbed(uv(4))) + return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE); +#endif return 0; } -#endif -#ifdef UV4_HUB_IS_SUPPORTED static inline int is_uv4_hub(void) { - return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE; -} +#ifdef UV4_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(4)); #else -static inline int is_uv4_hub(void) -{ return 0; -} #endif +} static inline int is_uvx_hub(void) { - if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) - return uv_hub_info->hub_revision; - - return 0; + return (is_uv_hubbed(-2) >= uv(2)); } static inline int is_uv_hub(void) { -#ifdef UV1_HUB_IS_SUPPORTED - return uv_hub_info->hub_revision; -#endif - return is_uvx_hub(); + return is_uv1_hub() || is_uvx_hub(); } union uvh_apicid { diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 42e1245af0d8..ff4b52e37e60 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif -extern void xen_set_iopl_mask(unsigned mask); - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index c895df5482c5..8669c6bdbb84 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data types */ +/* setup_data/setup_indirect types */ #define SETUP_NONE 0 #define SETUP_E820_EXT 1 #define SETUP_DTB 2 @@ -11,6 +11,11 @@ #define SETUP_APPLE_PROPERTIES 5 #define SETUP_JAILHOUSE 6 +#define SETUP_INDIRECT (1<<31) + +/* SETUP_INDIRECT | max(SETUP_*) */ +#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE) + /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 @@ -49,6 +54,14 @@ struct setup_data { __u8 data[0]; }; +/* extensible setup indirect data node */ +struct setup_indirect { + __u32 type; + __u32 reserved; /* Reserved, must be set to zero. */ + __u64 len; + __u64 addr; +}; + struct setup_header { __u8 setup_sects; __u16 root_flags; @@ -88,6 +101,7 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; + __u32 kernel_info_offset; } __attribute__((packed)); struct sys_desc_table { @@ -139,15 +153,22 @@ struct boot_e820_entry { * setup data structure. */ struct jailhouse_setup_data { - __u16 version; - __u16 compatible_version; - __u16 pm_timer_address; - __u16 num_cpus; - __u64 pci_mmconfig_base; - __u32 tsc_khz; - __u32 apic_khz; - __u8 standard_ioapic; - __u8 cpu_ids[255]; + struct { + __u16 version; + __u16 compatible_version; + } __attribute__((packed)) hdr; + struct { + __u16 pm_timer_address; + __u16 num_cpus; + __u64 pci_mmconfig_base; + __u32 tsc_khz; + __u32 apic_khz; + __u8 standard_ioapic; + __u8 cpu_ids[255]; + } __attribute__((packed)) v1; + struct { + __u32 flags; + } __attribute__((packed)) v2; } __attribute__((packed)); /* The so-called "zeropage" */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3578ad248bc9..32acb970f416 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -134,7 +134,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o -obj-$(CONFIG_X86_INTEL_UMIP) += umip.o +obj-$(CONFIG_X86_UMIP) += umip.o obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o @@ -146,7 +146,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o - obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index e95e95960156..daf88f8143c5 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -9,8 +9,7 @@ .code32 ALIGN -ENTRY(wakeup_pmode_return) -wakeup_pmode_return: +SYM_CODE_START(wakeup_pmode_return) movw $__KERNEL_DS, %ax movw %ax, %ss movw %ax, %fs @@ -39,6 +38,7 @@ wakeup_pmode_return: # jump to place where we left off movl saved_eip, %eax jmp *%eax +SYM_CODE_END(wakeup_pmode_return) bogus_magic: jmp bogus_magic @@ -72,7 +72,7 @@ restore_registers: popfl ret -ENTRY(do_suspend_lowlevel) +SYM_CODE_START(do_suspend_lowlevel) call save_processor_state call save_registers pushl $3 @@ -87,10 +87,11 @@ ret_point: call restore_registers call restore_processor_state ret +SYM_CODE_END(do_suspend_lowlevel) .data ALIGN -ENTRY(saved_magic) .long 0 +SYM_DATA(saved_magic, .long 0) saved_eip: .long 0 # saved registers diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 7f9ade13bbcf..c8daa92f38dc 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -14,7 +14,7 @@ /* * Hooray, we are in Long 64-bit mode (but still running in low memory) */ -ENTRY(wakeup_long64) +SYM_FUNC_START(wakeup_long64) movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax @@ -40,9 +40,9 @@ ENTRY(wakeup_long64) movq saved_rip, %rax jmp *%rax -ENDPROC(wakeup_long64) +SYM_FUNC_END(wakeup_long64) -ENTRY(do_suspend_lowlevel) +SYM_FUNC_START(do_suspend_lowlevel) FRAME_BEGIN subq $8, %rsp xorl %eax, %eax @@ -125,7 +125,7 @@ ENTRY(do_suspend_lowlevel) addq $8, %rsp FRAME_END jmp restore_processor_state -ENDPROC(do_suspend_lowlevel) +SYM_FUNC_END(do_suspend_lowlevel) .data saved_rbp: .quad 0 @@ -136,4 +136,4 @@ saved_rbx: .quad 0 saved_rip: .quad 0 saved_rsp: .quad 0 -ENTRY(saved_magic) .quad 0 +SYM_DATA(saved_magic, .quad 0) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 05feaec5d3d7..28446fa6bf18 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2337,7 +2337,7 @@ static int cpuid_to_apicid[] = { #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @id: APIC ID to check + * @apicid: APIC ID to check */ bool apic_id_is_primary_thread(unsigned int apicid) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d6af97fd170a..913c88617848 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1725,19 +1725,20 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { - if (unlikely(masked)) { + if (unlikely(moveit)) { /* Only migrate the irq if the ack has been received. * * On rare occasions the broadcast level triggered ack gets @@ -1766,15 +1767,17 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { } #endif @@ -1783,11 +1786,11 @@ static void ioapic_ack_level(struct irq_data *irq_data) { struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; - bool masked; + bool moveit; int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(irq_data); + moveit = ioapic_prepare_move(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -1842,7 +1845,7 @@ static void ioapic_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(cfg->vector, irq_data->chip_data); } - ioapic_irqd_unmask(irq_data, masked); + ioapic_finish_move(irq_data, moveit); } static void ioapic_ir_ack_level(struct irq_data *irq_data) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e6230af19864..d5b51a740524 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -14,6 +14,8 @@ #include <linux/memory.h> #include <linux/export.h> #include <linux/pci.h> +#include <linux/acpi.h> +#include <linux/efi.h> #include <asm/e820/api.h> #include <asm/uv/uv_mmrs.h> @@ -25,12 +27,17 @@ static DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; -static bool uv_hubless_system; +static int uv_hubbed_system; +static int uv_hubless_system; static u64 gru_start_paddr, gru_end_paddr; static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; static u64 gru_dist_lmask, gru_dist_umask; static union uvh_apicid uvh_apicid; +/* Unpack OEM/TABLE ID's to be NULL terminated strings */ +static u8 oem_id[ACPI_OEM_ID_SIZE + 1]; +static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; + /* Information derived from CPUID: */ static struct { unsigned int apicid_shift; @@ -248,17 +255,35 @@ static void __init uv_set_apicid_hibit(void) } } -static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static void __init uv_stringify(int len, char *to, char *from) +{ + /* Relies on 'to' being NULL chars so result will be NULL terminated */ + strncpy(to, from, len-1); +} + +static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id) { int pnodeid; int uv_apic; + uv_stringify(sizeof(oem_id), oem_id, _oem_id); + uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id); + if (strncmp(oem_id, "SGI", 3) != 0) { - if (strncmp(oem_id, "NSGI", 4) == 0) { - uv_hubless_system = true; - pr_info("UV: OEM IDs %s/%s, HUBLESS\n", - oem_id, oem_table_id); - } + if (strncmp(oem_id, "NSGI", 4) != 0) + return 0; + + /* UV4 Hubless, CH, (0x11:UV4+Any) */ + if (strncmp(oem_id, "NSGI4", 5) == 0) + uv_hubless_system = 0x11; + + /* UV3 Hubless, UV300/MC990X w/o hub (0x9:UV3+Any) */ + else + uv_hubless_system = 0x9; + + pr_info("UV: OEM IDs %s/%s, HUBLESS(0x%x)\n", + oem_id, oem_table_id, uv_hubless_system); + return 0; } @@ -286,6 +311,24 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (uv_hub_info->hub_revision == 0) goto badbios; + switch (uv_hub_info->hub_revision) { + case UV4_HUB_REVISION_BASE: + uv_hubbed_system = 0x11; + break; + + case UV3_HUB_REVISION_BASE: + uv_hubbed_system = 0x9; + break; + + case UV2_HUB_REVISION_BASE: + uv_hubbed_system = 0x5; + break; + + case UV1_HUB_REVISION_BASE: + uv_hubbed_system = 0x3; + break; + } + pnodeid = early_get_pnodeid(); early_get_apic_socketid_shift(); @@ -336,9 +379,15 @@ int is_uv_system(void) } EXPORT_SYMBOL_GPL(is_uv_system); -int is_uv_hubless(void) +int is_uv_hubbed(int uvtype) +{ + return (uv_hubbed_system & uvtype); +} +EXPORT_SYMBOL_GPL(is_uv_hubbed); + +int is_uv_hubless(int uvtype) { - return uv_hubless_system; + return (uv_hubless_system & uvtype); } EXPORT_SYMBOL_GPL(is_uv_hubless); @@ -1255,7 +1304,8 @@ static int __init decode_uv_systab(void) struct uv_systab *st; int i; - if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE) + /* If system is uv3 or lower, there is no extended UVsystab */ + if (is_uv_hubbed(0xfffffe) < uv(4) && is_uv_hubless(0xfffffe) < uv(4)) return 0; /* No extended UVsystab required */ st = uv_systab; @@ -1434,6 +1484,103 @@ static void __init build_socket_tables(void) } } +/* Check which reboot to use */ +static void check_efi_reboot(void) +{ + /* If EFI reboot not available, use ACPI reboot */ + if (!efi_enabled(EFI_BOOT)) + reboot_type = BOOT_ACPI; +} + +/* Setup user proc fs files */ +static int proc_hubbed_show(struct seq_file *file, void *data) +{ + seq_printf(file, "0x%x\n", uv_hubbed_system); + return 0; +} + +static int proc_hubless_show(struct seq_file *file, void *data) +{ + seq_printf(file, "0x%x\n", uv_hubless_system); + return 0; +} + +static int proc_oemid_show(struct seq_file *file, void *data) +{ + seq_printf(file, "%s/%s\n", oem_id, oem_table_id); + return 0; +} + +static int proc_hubbed_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_hubbed_show, (void *)NULL); +} + +static int proc_hubless_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_hubless_show, (void *)NULL); +} + +static int proc_oemid_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_oemid_show, (void *)NULL); +} + +/* (struct is "non-const" as open function is set at runtime) */ +static struct file_operations proc_version_fops = { + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_oemid_fops = { + .open = proc_oemid_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static __init void uv_setup_proc_files(int hubless) +{ + struct proc_dir_entry *pde; + char *name = hubless ? "hubless" : "hubbed"; + + pde = proc_mkdir(UV_PROC_NODE, NULL); + proc_create("oemid", 0, pde, &proc_oemid_fops); + proc_create(name, 0, pde, &proc_version_fops); + if (hubless) + proc_version_fops.open = proc_hubless_open; + else + proc_version_fops.open = proc_hubbed_open; +} + +/* Initialize UV hubless systems */ +static __init int uv_system_init_hubless(void) +{ + int rc; + + /* Setup PCH NMI handler */ + uv_nmi_setup_hubless(); + + /* Init kernel/BIOS interface */ + rc = uv_bios_init(); + if (rc < 0) + return rc; + + /* Process UVsystab */ + rc = decode_uv_systab(); + if (rc < 0) + return rc; + + /* Create user access node */ + if (rc >= 0) + uv_setup_proc_files(1); + + check_efi_reboot(); + + return rc; +} + static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; @@ -1559,32 +1706,27 @@ static void __init uv_system_init_hub(void) uv_nmi_setup(); uv_cpu_init(); uv_scir_register_cpu_notifier(); - proc_mkdir("sgi_uv", NULL); + uv_setup_proc_files(0); /* Register Legacy VGA I/O redirection handler: */ pci_register_set_vga_state(uv_set_vga_state); - /* - * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as - * EFI is not enabled in the kdump kernel: - */ - if (is_kdump_kernel()) - reboot_type = BOOT_ACPI; + check_efi_reboot(); } /* - * There is a small amount of UV specific code needed to initialize a - * UV system that does not have a "UV HUB" (referred to as "hubless"). + * There is a different code path needed to initialize a UV system that does + * not have a "UV HUB" (referred to as "hubless"). */ void __init uv_system_init(void) { - if (likely(!is_uv_system() && !is_uv_hubless())) + if (likely(!is_uv_system() && !is_uv_hubless(1))) return; if (is_uv_system()) uv_system_init_hub(); else - uv_nmi_setup_hubless(); + uv_system_init_hubless(); } apic_driver(apic_x2apic_uv_x); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4c7b0fa15a19..8bf64899f56a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ @@ -108,6 +109,12 @@ void __init check_bugs(void) mds_select_mitigation(); taa_select_mitigation(); + /* + * As MDS and TAA mitigations are inter-related, print MDS + * mitigation until after TAA mitigation selection is done. + */ + mds_print_mitigation(); + arch_smt_update(); #ifdef CONFIG_X86_32 @@ -245,6 +252,12 @@ static void __init mds_select_mitigation(void) (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } +} + +static void __init mds_print_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + return; pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -304,8 +317,12 @@ static void __init taa_select_mitigation(void) return; } - /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ - if (taa_mitigation == TAA_MITIGATION_OFF) + /* + * TAA mitigation via VERW is turned off if both + * tsx_async_abort=off and mds=off are specified. + */ + if (taa_mitigation == TAA_MITIGATION_OFF && + mds_mitigation == MDS_MITIGATION_OFF) goto out; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) @@ -339,6 +356,15 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); + /* + * Update MDS mitigation, if necessary, as the mds_user_clear is + * now enabled for TAA mitigation. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } out: pr_info("%s\n", taa_strings[taa_mitigation]); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fffe21945374..baa2fed8deb6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -53,10 +53,7 @@ #include <asm/microcode_intel.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> - -#ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> -#endif #include "cpu.h" @@ -565,8 +562,9 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; -__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; +/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */ +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); +__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); void load_percpu_segment(int cpu) { @@ -1780,7 +1778,7 @@ static void wait_for_master_cpu(int cpu) } #ifdef CONFIG_X86_64 -static void setup_getcpu(int cpu) +static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); struct desc_struct d = { }; @@ -1800,7 +1798,59 @@ static void setup_getcpu(int cpu) write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S); } + +static inline void ucode_cpu_init(int cpu) +{ + if (cpu) + load_ucode_ap(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) +{ + /* Set up the per-CPU TSS IST stacks */ + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); +} + +static inline void gdt_setup_doublefault_tss(int cpu) { } + +#else /* CONFIG_X86_64 */ + +static inline void setup_getcpu(int cpu) { } + +static inline void ucode_cpu_init(int cpu) +{ + show_ucode_info_early(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) { } + +static inline void gdt_setup_doublefault_tss(int cpu) +{ +#ifdef CONFIG_DOUBLEFAULT + /* Set up the doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif +} +#endif /* !CONFIG_X86_64 */ + +static inline void tss_setup_io_bitmap(struct tss_struct *tss) +{ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + +#ifdef CONFIG_X86_IOPL_IOPERM + tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; #endif +} /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1808,21 +1858,15 @@ static void setup_getcpu(int cpu) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -#ifdef CONFIG_X86_64 - void cpu_init(void) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - struct task_struct *me; - struct tss_struct *t; - int i; wait_for_master_cpu(cpu); - if (cpu) - load_ucode_ap(); - - t = &per_cpu(cpu_tss_rw, cpu); + ucode_cpu_init(cpu); #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && @@ -1831,63 +1875,47 @@ void cpu_init(void) #endif setup_getcpu(cpu); - me = current; - pr_debug("Initializing CPU#%d\n", cpu); - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - switch_to_new_gdt(cpu); - loadsegment(fs, 0); - load_current_idt(); - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); + if (IS_ENABLED(CONFIG_X86_64)) { + loadsegment(fs, 0); + memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); - x86_configure_nx(); - x2apic_setup(); + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); - /* - * set up and load the per-CPU TSS - */ - if (!t->x86_tss.ist[0]) { - t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); - t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); - t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); - t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); + x2apic_setup(); } - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - mmgrab(&init_mm); - me->active_mm = &init_mm; - BUG_ON(me->mm); + cur->active_mm = &init_mm; + BUG_ON(cur->mm); initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, me); + enter_lazy_tlb(&init_mm, cur); - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ + /* Initialize the TSS. */ + tss_setup_ist(tss); + tss_setup_io_bitmap(tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + load_TR_desc(); + /* + * sp0 points to the entry trampoline stack regardless of what task + * is running. + */ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1895,6 +1923,8 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); + gdt_setup_doublefault_tss(cpu); + fpu__init_cpu(); if (is_uv_system()) @@ -1903,63 +1933,6 @@ void cpu_init(void) load_fixmap_gdt(cpu); } -#else - -void cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); - - wait_for_master_cpu(cpu); - - show_ucode_info_early(); - - pr_info("Initializing CPU#%d\n", cpu); - - if (cpu_feature_enabled(X86_FEATURE_VME) || - boot_cpu_has(X86_FEATURE_TSC) || - boot_cpu_has(X86_FEATURE_DE)) - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_current_idt(); - switch_to_new_gdt(cpu); - - /* - * Set up and load the per-CPU TSS and LDT - */ - mmgrab(&init_mm); - curr->active_mm = &init_mm; - BUG_ON(curr->mm); - initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, curr); - - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ - set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); - load_TR_desc(); - load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); - - load_mm_ldt(&init_mm); - - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - - fpu__init_cpu(); - - load_fixmap_gdt(cpu); -} -#endif - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 11d5c5950e2d..4a900804a023 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -819,7 +819,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, - { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, + { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, @@ -847,7 +847,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, - { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, + { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, { 0x00, 0, 0 } }; @@ -859,8 +859,8 @@ static void intel_tlb_lookup(const unsigned char desc) return; /* look up this descriptor in the table */ - for (k = 0; intel_tlb_table[k].descriptor != desc && \ - intel_tlb_table[k].descriptor != 0; k++) + for (k = 0; intel_tlb_table[k].descriptor != desc && + intel_tlb_table[k].descriptor != 0; k++) ; if (intel_tlb_table[k].tlb_type == 0) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c656d92cd708..caa032ce3fe3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif - mark_tsc_unstable("running on Hyper-V"); + if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) { + wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1); + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); + } else { + mark_tsc_unstable("running on Hyper-V"); + } /* * Generation 2 instances don't support reading the NMI status from diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 5c900f9527ff..c4be62058dd9 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -29,7 +29,8 @@ __setup("nordrand", x86_rdrand_setup); #ifdef CONFIG_ARCH_RANDOM void x86_init_rdrand(struct cpuinfo_x86 *c) { - unsigned long tmp; + unsigned int changed = 0; + unsigned long tmp, prev; int i; if (!cpu_has(c, X86_FEATURE_RDRAND)) @@ -42,5 +43,24 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) return; } } + + /* + * Stupid sanity-check whether RDRAND does *actually* generate + * some at least random-looking data. + */ + prev = tmp; + for (i = 0; i < SANITY_CHECK_LOOPS; i++) { + if (rdrand_long(&tmp)) { + if (prev != tmp) + changed++; + + prev = tmp; + } + } + + if (WARN_ON_ONCE(!changed)) + pr_emerg( +"RDRAND gives funky smelling output, might consider not using it by booting with \"nordrand\""); + } #endif diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index eb651fbde92a..00fc55ac7ffa 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/hardirq.h> @@ -39,6 +40,7 @@ #include <asm/virtext.h> #include <asm/intel_pt.h> #include <asm/crash.h> +#include <asm/cmdline.h> /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { @@ -68,6 +70,19 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) rcu_read_unlock(); } +/* + * When the crashkernel option is specified, only use the low + * 1M for the real mode trampoline. + */ +void __init crash_reserve_low_1M(void) +{ + if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0) + return; + + memblock_reserve(0, 1<<20); + pr_info("Reserving the low 1M of memory for crashkernel\n"); +} + #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) @@ -173,8 +188,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_KEXEC_FILE -static unsigned long crash_zero_bytes; - static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -189,8 +202,7 @@ static struct crash_mem *fill_up_crash_elf_data(void) unsigned int nr_ranges = 0; struct crash_mem *cmem; - walk_system_ram_res(0, -1, &nr_ranges, - get_nr_ram_ranges_callback); + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); if (!nr_ranges) return NULL; @@ -217,15 +229,19 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) { int ret = 0; + /* Exclude the low 1M because it is always reserved */ + ret = crash_exclude_mem_range(cmem, 0, 1<<20); + if (ret) + return ret; + /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (ret) return ret; - if (crashk_low_res.end) { + if (crashk_low_res.end) ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - } + crashk_low_res.end); return ret; } @@ -246,16 +262,13 @@ static int prepare_elf_headers(struct kimage *image, void **addr, unsigned long *sz) { struct crash_mem *cmem; - Elf64_Ehdr *ehdr; - Elf64_Phdr *phdr; - int ret, i; + int ret; cmem = fill_up_crash_elf_data(); if (!cmem) return -ENOMEM; - ret = walk_system_ram_res(0, -1, cmem, - prepare_elf64_ram_headers_callback); + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); if (ret) goto out; @@ -265,24 +278,8 @@ static int prepare_elf_headers(struct kimage *image, void **addr, goto out; /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, - IS_ENABLED(CONFIG_X86_64), addr, sz); - if (ret) - goto out; + ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); - /* - * If a range matches backup region, adjust offset to backup - * segment. - */ - ehdr = (Elf64_Ehdr *)*addr; - phdr = (Elf64_Phdr *)(ehdr + 1); - for (i = 0; i < ehdr->e_phnum; phdr++, i++) - if (phdr->p_type == PT_LOAD && - phdr->p_paddr == image->arch.backup_src_start && - phdr->p_memsz == image->arch.backup_src_sz) { - phdr->p_offset = image->arch.backup_load_addr; - break; - } out: vfree(cmem); return ret; @@ -296,8 +293,7 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE) return 1; - memcpy(¶ms->e820_table[nr_e820_entries], entry, - sizeof(struct e820_entry)); + memcpy(¶ms->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry)); params->e820_entries++; return 0; } @@ -321,19 +317,11 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, unsigned long long mend) { unsigned long start, end; - int ret = 0; cmem->ranges[0].start = mstart; cmem->ranges[0].end = mend; cmem->nr_ranges = 1; - /* Exclude Backup region */ - start = image->arch.backup_load_addr; - end = start + image->arch.backup_src_sz - 1; - ret = crash_exclude_mem_range(cmem, start, end); - if (ret) - return ret; - /* Exclude elf header region */ start = image->arch.elf_load_addr; end = start + image->arch.elf_headers_sz - 1; @@ -356,28 +344,28 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) memset(&cmd, 0, sizeof(struct crash_memmap_data)); cmd.params = params; - /* Add first 640K segment */ - ei.addr = image->arch.backup_src_start; - ei.size = image->arch.backup_src_sz; - ei.type = E820_TYPE_RAM; - add_e820_entry(params, &ei); + /* Add the low 1M */ + cmd.type = E820_TYPE_RAM; + flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd, + memmap_entry_callback); /* Add ACPI tables */ cmd.type = E820_TYPE_ACPI; flags = IORESOURCE_MEM | IORESOURCE_BUSY; walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, - memmap_entry_callback); + memmap_entry_callback); /* Add ACPI Non-volatile Storage */ cmd.type = E820_TYPE_NVS; walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, - memmap_entry_callback); + memmap_entry_callback); /* Add e820 reserved ranges */ cmd.type = E820_TYPE_RESERVED; flags = IORESOURCE_MEM; walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, - memmap_entry_callback); + memmap_entry_callback); /* Add crashk_low_res region */ if (crashk_low_res.end) { @@ -388,8 +376,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) } /* Exclude some ranges from crashk_res and add rest to memmap */ - ret = memmap_exclude_ranges(image, cmem, crashk_res.start, - crashk_res.end); + ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end); if (ret) goto out; @@ -409,55 +396,12 @@ out: return ret; } -static int determine_backup_region(struct resource *res, void *arg) -{ - struct kimage *image = arg; - - image->arch.backup_src_start = res->start; - image->arch.backup_src_sz = resource_size(res); - - /* Expecting only one range for backup region */ - return 1; -} - int crash_load_segments(struct kimage *image) { int ret; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ULONG_MAX, .top_down = false }; - /* - * Determine and load a segment for backup area. First 640K RAM - * region is backup source - */ - - ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, - image, determine_backup_region); - - /* Zero or postive return values are ok */ - if (ret < 0) - return ret; - - /* Add backup segment. */ - if (image->arch.backup_src_sz) { - kbuf.buffer = &crash_zero_bytes; - kbuf.bufsz = sizeof(crash_zero_bytes); - kbuf.memsz = image->arch.backup_src_sz; - kbuf.buf_align = PAGE_SIZE; - /* - * Ideally there is no source for backup segment. This is - * copied in purgatory after crash. Just add a zero filled - * segment for now to make sure checksum logic works fine. - */ - ret = kexec_add_buffer(&kbuf); - if (ret) - return ret; - image->arch.backup_load_addr = kbuf.mem; - pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n", - image->arch.backup_load_addr, - image->arch.backup_src_start, kbuf.memsz); - } - /* Prepare elf headers and add a segment */ ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz); if (ret) diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0b8cedb20d6d..0d6c657593f8 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .sp0 = STACK_START, .ss0 = __KERNEL_DS, .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, .ip = (unsigned long) doublefault_fn, /* 0x2 bit is always set */ @@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .ss = __KERNEL_DS, .ds = __USER_DS, .fs = __KERNEL_PERCPU, +#ifndef CONFIG_X86_32_LAZY_GS + .gs = __KERNEL_STACK_CANARY, +#endif .__cr3 = __pa_nodebug(swapper_pg_dir), }; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7da2bcd2b8eb..c5399e80c59c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type) case E820_TYPE_RAM: /* Fall through: */ case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; case E820_TYPE_RESERVED: pr_cont("reserved"); break; + case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break; case E820_TYPE_ACPI: pr_cont("ACPI data"); break; case E820_TYPE_NVS: pr_cont("ACPI NVS"); break; case E820_TYPE_UNUSABLE: pr_cont("unusable"); break; @@ -999,6 +1000,17 @@ void __init e820__reserve_setup_data(void) data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { + e820__range_update(((struct setup_indirect *)data->data)->addr, + ((struct setup_indirect *)data->data)->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(((struct setup_indirect *)data->data)->addr, + ((struct setup_indirect *)data->data)->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + } + pa_data = data->next; early_memunmap(data, sizeof(*data)); } @@ -1037,6 +1049,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry) case E820_TYPE_PRAM: return "Persistent Memory (legacy)"; case E820_TYPE_PMEM: return "Persistent Memory"; case E820_TYPE_RESERVED: return "Reserved"; + case E820_TYPE_SOFT_RESERVED: return "Soft Reserved"; default: return "Unknown E820 type"; } } @@ -1052,6 +1065,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry) case E820_TYPE_PRAM: /* Fall-through: */ case E820_TYPE_PMEM: /* Fall-through: */ case E820_TYPE_RESERVED: /* Fall-through: */ + case E820_TYPE_SOFT_RESERVED: /* Fall-through: */ default: return IORESOURCE_MEM; } } @@ -1064,6 +1078,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; + case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED; case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */ @@ -1078,11 +1093,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res) return true; /* - * Treat persistent memory like device memory, i.e. reserve it - * for exclusive use of a driver + * Treat persistent memory and other special memory ranges like + * device memory, i.e. reserve it for exclusive use of a driver */ switch (type) { case E820_TYPE_RESERVED: + case E820_TYPE_SOFT_RESERVED: case E820_TYPE_PRAM: case E820_TYPE_PMEM: return false; @@ -1285,6 +1301,9 @@ void __init e820__memblock_setup(void) if (end != (resource_size_t)end) continue; + if (entry->type == E820_TYPE_SOFT_RESERVED) + memblock_reserve(entry->addr, entry->size); + if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) continue; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e5cb67d67c03..319be936c348 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -60,7 +60,7 @@ u64 xfeatures_mask __read_mostly; static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; +static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; /* * The XSAVE area of kernel can be in standard or compacted format; @@ -254,10 +254,13 @@ static void __init setup_xstate_features(void) * in the fixed offsets in the xsave area in either compacted form * or standard form. */ - xstate_offsets[0] = 0; - xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space); - xstate_offsets[1] = xstate_sizes[0]; - xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space); + xstate_offsets[XFEATURE_FP] = 0; + xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, + xmm_space); + + xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; + xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state, + xmm_space); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (!xfeature_enabled(i)) @@ -342,7 +345,7 @@ static int xfeature_is_aligned(int xfeature_nr) */ static void __init setup_xstate_comp(void) { - unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; + unsigned int xstate_comp_sizes[XFEATURE_MAX]; int i; /* @@ -350,8 +353,9 @@ static void __init setup_xstate_comp(void) * in the fixed offsets in the xsave area in either compacted form * or standard form. */ - xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); + xstate_comp_offsets[XFEATURE_FP] = 0; + xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, + xmm_space); if (!boot_cpu_has(X86_FEATURE_XSAVES)) { for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { @@ -840,7 +844,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) /* * We should not ever be requesting features that we - * have not enabled. Remember that pcntxt_mask is + * have not enabled. Remember that xfeatures_mask is * what we write to the XCR0 register. */ WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 073aab525d80..e8a9f8370112 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -12,20 +12,18 @@ #include <asm/frame.h> #include <asm/asm-offsets.h> -# define function_hook __fentry__ -EXPORT_SYMBOL(__fentry__) - #ifdef CONFIG_FRAME_POINTER # define MCOUNT_FRAME 1 /* using frame = true */ #else # define MCOUNT_FRAME 0 /* using frame = false */ #endif -ENTRY(function_hook) +SYM_FUNC_START(__fentry__) ret -END(function_hook) +SYM_FUNC_END(__fentry__) +EXPORT_SYMBOL(__fentry__) -ENTRY(ftrace_caller) +SYM_CODE_START(ftrace_caller) #ifdef CONFIG_FRAME_POINTER /* @@ -85,11 +83,11 @@ ftrace_graph_call: #endif /* This is weak to keep gas from relaxing the jumps */ -WEAK(ftrace_stub) +SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) ret -END(ftrace_caller) +SYM_CODE_END(ftrace_caller) -ENTRY(ftrace_regs_caller) +SYM_CODE_START(ftrace_regs_caller) /* * We're here from an mcount/fentry CALL, and the stack frame looks like: * @@ -138,7 +136,7 @@ ENTRY(ftrace_regs_caller) movl function_trace_op, %ecx # 3rd argument: ftrace_pos pushl %esp # 4th argument: pt_regs -GLOBAL(ftrace_regs_call) +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub addl $4, %esp # skip 4th argument @@ -163,9 +161,10 @@ GLOBAL(ftrace_regs_call) popl %eax jmp .Lftrace_ret +SYM_CODE_END(ftrace_regs_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) +SYM_CODE_START(ftrace_graph_caller) pushl %eax pushl %ecx pushl %edx @@ -179,7 +178,7 @@ ENTRY(ftrace_graph_caller) popl %ecx popl %eax ret -END(ftrace_graph_caller) +SYM_CODE_END(ftrace_graph_caller) .globl return_to_handler return_to_handler: diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 809d54397dba..6e8961ca3605 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -14,9 +14,6 @@ .code64 .section .entry.text, "ax" -# define function_hook __fentry__ -EXPORT_SYMBOL(__fentry__) - #ifdef CONFIG_FRAME_POINTER /* Save parent and function stack frames (rip and rbp) */ # define MCOUNT_FRAME_SIZE (8+16*2) @@ -132,22 +129,23 @@ EXPORT_SYMBOL(__fentry__) #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(function_hook) +SYM_FUNC_START(__fentry__) retq -ENDPROC(function_hook) +SYM_FUNC_END(__fentry__) +EXPORT_SYMBOL(__fentry__) -ENTRY(ftrace_caller) +SYM_FUNC_START(ftrace_caller) /* save_mcount_regs fills in first two parameters */ save_mcount_regs -GLOBAL(ftrace_caller_op_ptr) +SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx /* regs go into 4th parameter (but make it NULL) */ movq $0, %rcx -GLOBAL(ftrace_call) +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub restore_mcount_regs @@ -157,10 +155,10 @@ GLOBAL(ftrace_call) * think twice before adding any new code or changing the * layout here. */ -GLOBAL(ftrace_epilogue) +SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -GLOBAL(ftrace_graph_call) +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) jmp ftrace_stub #endif @@ -168,11 +166,11 @@ GLOBAL(ftrace_graph_call) * This is weak to keep gas from relaxing the jumps. * It is also used to copy the retq for trampolines. */ -WEAK(ftrace_stub) +SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) retq -ENDPROC(ftrace_caller) +SYM_FUNC_END(ftrace_caller) -ENTRY(ftrace_regs_caller) +SYM_FUNC_START(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ pushfq @@ -180,7 +178,7 @@ ENTRY(ftrace_regs_caller) save_mcount_regs 8 /* save_mcount_regs fills in first two parameters */ -GLOBAL(ftrace_regs_caller_op_ptr) +SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -209,7 +207,7 @@ GLOBAL(ftrace_regs_caller_op_ptr) /* regs go into 4th parameter */ leaq (%rsp), %rcx -GLOBAL(ftrace_regs_call) +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub /* Copy flags back to SS, to restore them */ @@ -239,16 +237,16 @@ GLOBAL(ftrace_regs_call) * The trampoline will add the code to jump * to the return. */ -GLOBAL(ftrace_regs_caller_end) +SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) jmp ftrace_epilogue -ENDPROC(ftrace_regs_caller) +SYM_FUNC_END(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(function_hook) +SYM_FUNC_START(__fentry__) cmpq $ftrace_stub, ftrace_trace_function jnz trace @@ -261,7 +259,7 @@ fgraph_trace: jnz ftrace_graph_caller #endif -GLOBAL(ftrace_stub) +SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) retq trace: @@ -279,11 +277,12 @@ trace: restore_mcount_regs jmp fgraph_trace -ENDPROC(function_hook) +SYM_FUNC_END(__fentry__) +EXPORT_SYMBOL(__fentry__) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) +SYM_FUNC_START(ftrace_graph_caller) /* Saves rbp into %rdx and fills first parameter */ save_mcount_regs @@ -294,9 +293,9 @@ ENTRY(ftrace_graph_caller) restore_mcount_regs retq -ENDPROC(ftrace_graph_caller) +SYM_FUNC_END(ftrace_graph_caller) -ENTRY(return_to_handler) +SYM_CODE_START(return_to_handler) UNWIND_HINT_EMPTY subq $24, %rsp @@ -312,5 +311,5 @@ ENTRY(return_to_handler) movq (%rsp), %rax addq $24, %rsp JMP_NOSPEC %rdi -END(return_to_handler) +SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30f9cb2c0b55..3923ab4630d7 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -64,7 +64,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * can. */ __HEAD -ENTRY(startup_32) +SYM_CODE_START(startup_32) movl pa(initial_stack),%ecx /* test KEEP_SEGMENTS flag to see if the bootloader is asking @@ -156,7 +156,7 @@ ENTRY(startup_32) jmp *%eax .Lbad_subarch: -WEAK(xen_entry) +SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK) /* Unknown implementation; there's really nothing we can do at this point. */ ud2a @@ -172,6 +172,7 @@ num_subarch_entries = (. - subarch_entries) / 4 #else jmp .Ldefault_entry #endif /* CONFIG_PARAVIRT */ +SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU /* @@ -179,12 +180,12 @@ num_subarch_entries = (. - subarch_entries) / 4 * up already except stack. We just set up stack here. Then call * start_secondary(). */ -ENTRY(start_cpu0) +SYM_FUNC_START(start_cpu0) movl initial_stack, %ecx movl %ecx, %esp call *(initial_code) 1: jmp 1b -ENDPROC(start_cpu0) +SYM_FUNC_END(start_cpu0) #endif /* @@ -195,7 +196,7 @@ ENDPROC(start_cpu0) * If cpu hotplug is not supported then this code can go in init section * which will be freed later */ -ENTRY(startup_32_smp) +SYM_FUNC_START(startup_32_smp) cld movl $(__BOOT_DS),%eax movl %eax,%ds @@ -362,7 +363,7 @@ ENTRY(startup_32_smp) call *(initial_code) 1: jmp 1b -ENDPROC(startup_32_smp) +SYM_FUNC_END(startup_32_smp) #include "verify_cpu.S" @@ -392,7 +393,7 @@ setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ ret -ENTRY(early_idt_handler_array) +SYM_FUNC_START(early_idt_handler_array) # 36(%esp) %eflags # 32(%esp) %cs # 28(%esp) %eip @@ -407,9 +408,9 @@ ENTRY(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handler_array) +SYM_FUNC_END(early_idt_handler_array) -early_idt_handler_common: +SYM_CODE_START_LOCAL(early_idt_handler_common) /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -460,10 +461,10 @@ early_idt_handler_common: decl %ss:early_recursion_flag addl $4, %esp /* pop pt_regs->orig_ax */ iret -ENDPROC(early_idt_handler_common) +SYM_CODE_END(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ -ENTRY(early_ignore_irq) +SYM_FUNC_START(early_ignore_irq) cld #ifdef CONFIG_PRINTK pushl %eax @@ -498,19 +499,16 @@ ENTRY(early_ignore_irq) hlt_loop: hlt jmp hlt_loop -ENDPROC(early_ignore_irq) +SYM_FUNC_END(early_ignore_irq) __INITDATA .align 4 -GLOBAL(early_recursion_flag) - .long 0 +SYM_DATA(early_recursion_flag, .long 0) __REFDATA .align 4 -ENTRY(initial_code) - .long i386_start_kernel -ENTRY(setup_once_ref) - .long setup_once +SYM_DATA(initial_code, .long i386_start_kernel) +SYM_DATA(setup_once_ref, .long setup_once) #ifdef CONFIG_PAGE_TABLE_ISOLATION #define PGD_ALIGN (2 * PAGE_SIZE) @@ -553,7 +551,7 @@ EXPORT_SYMBOL(empty_zero_page) __PAGE_ALIGNED_DATA /* Page-aligned for the benefit of paravirt? */ .align PGD_ALIGN -ENTRY(initial_page_table) +SYM_DATA_START(initial_page_table) .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 @@ -571,17 +569,28 @@ ENTRY(initial_page_table) # error "Kernel PMDs should be 1, 2 or 3" # endif .align PAGE_SIZE /* needs to be page-sized too */ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * PTI needs another page so sync_initial_pagetable() works correctly + * and does not scribble over the data which is placed behind the + * actual initial_page_table. See clone_pgd_range(). + */ + .fill 1024, 4, 0 +#endif + +SYM_DATA_END(initial_page_table) #endif .data .balign 4 -ENTRY(initial_stack) - /* - * The SIZEOF_PTREGS gap is a convention which helps the in-kernel - * unwinder reliably detect the end of the stack. - */ - .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \ - TOP_OF_KERNEL_STACK_PADDING; +/* + * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder + * reliably detect the end of the stack. + */ +SYM_DATA(initial_stack, + .long init_thread_union + THREAD_SIZE - + SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING) __INITRODATA int_msg: @@ -597,27 +606,28 @@ int_msg: */ .data -.globl boot_gdt_descr - ALIGN # early boot GDT descriptor (must use 1:1 address mapping) .word 0 # 32 bit align gdt_desc.address -boot_gdt_descr: +SYM_DATA_START_LOCAL(boot_gdt_descr) .word __BOOT_DS+7 .long boot_gdt - __PAGE_OFFSET +SYM_DATA_END(boot_gdt_descr) # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -ENTRY(early_gdt_descr) +SYM_DATA_START(early_gdt_descr) .word GDT_ENTRIES*8-1 .long gdt_page /* Overwritten for secondary CPUs */ +SYM_DATA_END(early_gdt_descr) /* * The boot_gdt must mirror the equivalent in setup.S and is * used only for booting. */ .align L1_CACHE_BYTES -ENTRY(boot_gdt) +SYM_DATA_START(boot_gdt) .fill GDT_ENTRY_BOOT_CS,8,0 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ +SYM_DATA_END(boot_gdt) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index f3d3e9646a99..4bbc770af632 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -49,8 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) .text __HEAD .code64 - .globl startup_64 -startup_64: +SYM_CODE_START_NOALIGN(startup_64) UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, @@ -90,7 +89,9 @@ startup_64: /* Form the CR3 value being sure to include the CR3 modifier */ addq $(early_top_pgt - __START_KERNEL_map), %rax jmp 1f -ENTRY(secondary_startup_64) +SYM_CODE_END(startup_64) + +SYM_CODE_START(secondary_startup_64) UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, @@ -240,7 +241,7 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq .Lafter_lret: -END(secondary_startup_64) +SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -250,30 +251,28 @@ END(secondary_startup_64) * up already except stack. We just set up stack here. Then call * start_secondary() via .Ljump_to_C_code. */ -ENTRY(start_cpu0) +SYM_CODE_START(start_cpu0) UNWIND_HINT_EMPTY movq initial_stack(%rip), %rsp jmp .Ljump_to_C_code -END(start_cpu0) +SYM_CODE_END(start_cpu0) #endif /* Both SMP bootup and ACPI suspend change these variables */ __REFDATA .balign 8 - GLOBAL(initial_code) - .quad x86_64_start_kernel - GLOBAL(initial_gs) - .quad INIT_PER_CPU_VAR(fixed_percpu_data) - GLOBAL(initial_stack) - /* - * The SIZEOF_PTREGS gap is a convention which helps the in-kernel - * unwinder reliably detect the end of the stack. - */ - .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS +SYM_DATA(initial_code, .quad x86_64_start_kernel) +SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data)) + +/* + * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder + * reliably detect the end of the stack. + */ +SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS) __FINITDATA __INIT -ENTRY(early_idt_handler_array) +SYM_CODE_START(early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 @@ -289,9 +288,9 @@ ENTRY(early_idt_handler_array) .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr UNWIND_HINT_IRET_REGS offset=16 -END(early_idt_handler_array) +SYM_CODE_END(early_idt_handler_array) -early_idt_handler_common: +SYM_CODE_START_LOCAL(early_idt_handler_common) /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -333,17 +332,11 @@ early_idt_handler_common: 20: decl early_recursion_flag(%rip) jmp restore_regs_and_return_to_kernel -END(early_idt_handler_common) +SYM_CODE_END(early_idt_handler_common) - __INITDATA - .balign 4 -GLOBAL(early_recursion_flag) - .long 0 - -#define NEXT_PAGE(name) \ - .balign PAGE_SIZE; \ -GLOBAL(name) +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) #ifdef CONFIG_PAGE_TABLE_ISOLATION /* @@ -358,11 +351,11 @@ GLOBAL(name) */ #define PTI_USER_PGD_FILL 512 /* This ensures they are 8k-aligned: */ -#define NEXT_PGD_PAGE(name) \ - .balign 2 * PAGE_SIZE; \ -GLOBAL(name) +#define SYM_DATA_START_PTI_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign 2 * PAGE_SIZE) #else -#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define SYM_DATA_START_PTI_ALIGNED(name) \ + SYM_DATA_START_PAGE_ALIGNED(name) #define PTI_USER_PGD_FILL 0 #endif @@ -375,17 +368,23 @@ GLOBAL(name) .endr __INITDATA -NEXT_PGD_PAGE(early_top_pgt) + .balign 4 + +SYM_DATA_START_PTI_ALIGNED(early_top_pgt) .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 +SYM_DATA_END(early_top_pgt) -NEXT_PAGE(early_dynamic_pgts) +SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 +SYM_DATA_END(early_dynamic_pgts) + +SYM_DATA(early_recursion_flag, .long 0) .data #if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH) -NEXT_PGD_PAGE(init_top_pgt) +SYM_DATA_START_PTI_ALIGNED(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC @@ -393,11 +392,13 @@ NEXT_PGD_PAGE(init_top_pgt) /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .fill PTI_USER_PGD_FILL,8,0 +SYM_DATA_END(init_top_pgt) -NEXT_PAGE(level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .fill 511, 8, 0 -NEXT_PAGE(level2_ident_pgt) +SYM_DATA_END(level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt) /* * Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. @@ -407,25 +408,29 @@ NEXT_PAGE(level2_ident_pgt) * the CPU should ignore the bit. */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(level2_ident_pgt) #else -NEXT_PGD_PAGE(init_top_pgt) +SYM_DATA_START_PTI_ALIGNED(init_top_pgt) .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 +SYM_DATA_END(init_top_pgt) #endif #ifdef CONFIG_X86_5LEVEL -NEXT_PAGE(level4_kernel_pgt) +SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) .fill 511,8,0 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(level4_kernel_pgt) #endif -NEXT_PAGE(level3_kernel_pgt) +SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(level3_kernel_pgt) -NEXT_PAGE(level2_kernel_pgt) +SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt) /* * 512 MB kernel mapping. We spend a full page on this pagetable * anyway. @@ -442,8 +447,9 @@ NEXT_PAGE(level2_kernel_pgt) */ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) +SYM_DATA_END(level2_kernel_pgt) -NEXT_PAGE(level2_fixmap_pgt) +SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt) .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 pgtno = 0 .rept (FIXMAP_PMD_NUM) @@ -453,31 +459,32 @@ NEXT_PAGE(level2_fixmap_pgt) .endr /* 6 MB reserved space + a 2MB hole */ .fill 4,8,0 +SYM_DATA_END(level2_fixmap_pgt) -NEXT_PAGE(level1_fixmap_pgt) +SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) .rept (FIXMAP_PMD_NUM) .fill 512,8,0 .endr +SYM_DATA_END(level1_fixmap_pgt) #undef PMDS .data .align 16 - .globl early_gdt_descr -early_gdt_descr: - .word GDT_ENTRIES*8-1 -early_gdt_descr_base: - .quad INIT_PER_CPU_VAR(gdt_page) - -ENTRY(phys_base) - /* This must match the first entry in level2_kernel_pgt */ - .quad 0x0000000000000000 + +SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1) +SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page)) + + .align 16 +/* This must match the first entry in level2_kernel_pgt */ +SYM_DATA(phys_base, .quad 0x0) EXPORT_SYMBOL(phys_base) #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS -NEXT_PAGE(empty_zero_page) +SYM_DATA_START_PAGE_ALIGNED(empty_zero_page) .skip PAGE_SIZE +SYM_DATA_END(empty_zero_page) EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 61a89d3c0382..8abeee0dd7bf 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -3,32 +3,69 @@ * This contains the io-permission bitmap code - written by obz, with changes * by Linus. 32/64 bits code unification by Miguel Botón. */ - -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/kernel.h> #include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/ioport.h> #include <linux/security.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/thread_info.h> #include <linux/syscalls.h> #include <linux/bitmap.h> -#include <asm/syscalls.h> +#include <linux/ioport.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/io_bitmap.h> #include <asm/desc.h> +#ifdef CONFIG_X86_IOPL_IOPERM + +static atomic64_t io_bitmap_sequence; + +void io_bitmap_share(struct task_struct *tsk) +{ + /* Can be NULL when current->thread.iopl_emul == 3 */ + if (current->thread.io_bitmap) { + /* + * Take a refcount on current's bitmap. It can be used by + * both tasks as long as none of them changes the bitmap. + */ + refcount_inc(¤t->thread.io_bitmap->refcnt); + tsk->thread.io_bitmap = current->thread.io_bitmap; + } + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); +} + +static void task_update_io_bitmap(void) +{ + struct thread_struct *t = ¤t->thread; + + if (t->iopl_emul == 3 || t->io_bitmap) { + /* TSS update is handled on exit to user space */ + set_thread_flag(TIF_IO_BITMAP); + } else { + clear_thread_flag(TIF_IO_BITMAP); + /* Invalidate TSS */ + preempt_disable(); + tss_update_io_bitmap(); + preempt_enable(); + } +} + +void io_bitmap_exit(void) +{ + struct io_bitmap *iobm = current->thread.io_bitmap; + + current->thread.io_bitmap = NULL; + task_update_io_bitmap(); + if (iobm && refcount_dec_and_test(&iobm->refcnt)) + kfree(iobm); +} + /* - * this changes the io permissions bitmap in the current task. + * This changes the io permissions bitmap in the current task. */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; - struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; + unsigned int i, max_long; + struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -41,59 +78,72 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ - if (!t->io_bitmap_ptr) { - unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - - if (!bitmap) + iobm = t->io_bitmap; + if (!iobm) { + /* No point to allocate a bitmap just to clear permissions */ + if (!turn_on) + return 0; + iobm = kmalloc(sizeof(*iobm), GFP_KERNEL); + if (!iobm) return -ENOMEM; - memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); + memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); + refcount_set(&iobm->refcnt, 1); + } - /* - * Now that we have an IO bitmap, we need our TSS limit to be - * correct. It's fine if we are preempted after doing this: - * with TIF_IO_BITMAP set, context switches will keep our TSS - * limit correct. - */ - preempt_disable(); - refresh_tss_limit(); - preempt_enable(); + /* + * If the bitmap is not shared, then nothing can take a refcount as + * current can obviously not fork at the same time. If it's shared + * duplicate it and drop the refcount on the original one. + */ + if (refcount_read(&iobm->refcnt) > 1) { + iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); + if (!iobm) + return -ENOMEM; + refcount_set(&iobm->refcnt, 1); + io_bitmap_exit(); } /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: + * Store the bitmap pointer (might be the same if the task already + * head one). Must be done here so freeing the bitmap when all + * permissions are dropped has the pointer set up. */ - tss = &per_cpu(cpu_tss_rw, get_cpu()); + t->io_bitmap = iobm; + /* Mark it active for context switching and exit to user mode */ + set_thread_flag(TIF_IO_BITMAP); + /* + * Update the tasks bitmap. The update of the TSS bitmap happens on + * exit to user mode. So this needs no protection. + */ if (turn_on) - bitmap_clear(t->io_bitmap_ptr, from, num); + bitmap_clear(iobm->bitmap, from, num); else - bitmap_set(t->io_bitmap_ptr, from, num); + bitmap_set(iobm->bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ - max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) + max_long = UINT_MAX; + for (i = 0; i < IO_BITMAP_LONGS; i++) { + if (iobm->bitmap[i] != ~0UL) max_long = i; + } + /* All permissions dropped? */ + if (max_long == UINT_MAX) { + io_bitmap_exit(); + return 0; + } - bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; - - /* Update the TSS: */ - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + iobm->max = (max_long + 1) * sizeof(unsigned long); - put_cpu(); + /* + * Update the sequence number to force a TSS update on return to + * user mode. + */ + iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); return 0; } @@ -104,38 +154,61 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) } /* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. + * The sys_iopl functionality depends on the level argument, which if + * granted for the task is used to enable access to all 65536 I/O ports. + * + * This does not use the IOPL mechanism provided by the CPU as that would + * also allow the user space task to use the CLI/STI instructions. * - * Here we just change the flags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. + * Disabling interrupts in a user space task is dangerous as it might lock + * up the machine and the semantics vs. syscalls and exceptions is + * undefined. + * + * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3 + * 3 enables them. + * + * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { - struct pt_regs *regs = current_pt_regs(); struct thread_struct *t = ¤t->thread; - - /* - * Careful: the IOPL bits in regs->flags are undefined under Xen PV - * and changing them has no effect. - */ - unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + unsigned int old; if (level > 3) return -EINVAL; + + old = t->iopl_emul; + + /* No point in going further if nothing changes */ + if (level == old) + return 0; + /* Trying to gain more privileges? */ if (level > old) { if (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - t->iopl = level << X86_EFLAGS_IOPL_BIT; - set_iopl_mask(t->iopl); + + t->iopl_emul = level; + task_update_io_bitmap(); return 0; } + +#else /* CONFIG_X86_IOPL_IOPERM */ + +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -ENOSYS; +} +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return -ENOSYS; +} + +SYSCALL_DEFINE1(iopl, unsigned int, level) +{ + return -ENOSYS; +} +#endif diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S index ddeeaac8adda..0db0375235b4 100644 --- a/arch/x86/kernel/irqflags.S +++ b/arch/x86/kernel/irqflags.S @@ -7,20 +7,20 @@ /* * unsigned long native_save_fl(void) */ -ENTRY(native_save_fl) +SYM_FUNC_START(native_save_fl) pushf pop %_ASM_AX ret -ENDPROC(native_save_fl) +SYM_FUNC_END(native_save_fl) EXPORT_SYMBOL(native_save_fl) /* * void native_restore_fl(unsigned long flags) * %eax/%rdi: flags */ -ENTRY(native_restore_fl) +SYM_FUNC_START(native_restore_fl) push %_ASM_ARG1 popf ret -ENDPROC(native_restore_fl) +SYM_FUNC_END(native_restore_fl) EXPORT_SYMBOL(native_restore_fl) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 3ad34f01de2a..6eb8b50ea07e 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -11,6 +11,7 @@ #include <linux/acpi_pmtmr.h> #include <linux/kernel.h> #include <linux/reboot.h> +#include <linux/serial_8250.h> #include <asm/apic.h> #include <asm/cpu.h> #include <asm/hypervisor.h> @@ -21,9 +22,24 @@ #include <asm/setup.h> #include <asm/jailhouse_para.h> -static __initdata struct jailhouse_setup_data setup_data; +static struct jailhouse_setup_data setup_data; +#define SETUP_DATA_V1_LEN (sizeof(setup_data.hdr) + sizeof(setup_data.v1)) +#define SETUP_DATA_V2_LEN (SETUP_DATA_V1_LEN + sizeof(setup_data.v2)) + static unsigned int precalibrated_tsc_khz; +static void jailhouse_setup_irq(unsigned int irq) +{ + struct mpc_intsrc mp_irq = { + .type = MP_INTSRC, + .irqtype = mp_INT, + .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE, + .srcbusirq = irq, + .dstirq = irq, + }; + mp_save_irq(&mp_irq); +} + static uint32_t jailhouse_cpuid_base(void) { if (boot_cpu_data.cpuid_level < 0 || @@ -45,7 +61,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now) static void __init jailhouse_timer_init(void) { - lapic_timer_period = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.v1.apic_khz * (1000 / HZ); } static unsigned long jailhouse_get_tsc(void) @@ -77,33 +93,28 @@ static void __init jailhouse_get_smp_config(unsigned int early) .type = IOAPIC_DOMAIN_STRICT, .ops = &mp_ioapic_irqdomain_ops, }; - struct mpc_intsrc mp_irq = { - .type = MP_INTSRC, - .irqtype = mp_INT, - .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE, - }; unsigned int cpu; jailhouse_x2apic_init(); register_lapic_address(0xfee00000); - for (cpu = 0; cpu < setup_data.num_cpus; cpu++) { - generic_processor_info(setup_data.cpu_ids[cpu], + for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++) { + generic_processor_info(setup_data.v1.cpu_ids[cpu], boot_cpu_apic_version); } smp_found_config = 1; - if (setup_data.standard_ioapic) { + if (setup_data.v1.standard_ioapic) { mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg); - /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */ - mp_irq.srcbusirq = mp_irq.dstirq = 3; - mp_save_irq(&mp_irq); - - mp_irq.srcbusirq = mp_irq.dstirq = 4; - mp_save_irq(&mp_irq); + if (IS_ENABLED(CONFIG_SERIAL_8250) && + setup_data.hdr.version < 2) { + /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */ + jailhouse_setup_irq(3); + jailhouse_setup_irq(4); + } } } @@ -126,9 +137,9 @@ static int __init jailhouse_pci_arch_init(void) pcibios_last_bus = 0xff; #ifdef CONFIG_PCI_MMCONFIG - if (setup_data.pci_mmconfig_base) { + if (setup_data.v1.pci_mmconfig_base) { pci_mmconfig_add(0, 0, pcibios_last_bus, - setup_data.pci_mmconfig_base); + setup_data.v1.pci_mmconfig_base); pci_mmcfg_arch_init(); } #endif @@ -136,9 +147,57 @@ static int __init jailhouse_pci_arch_init(void) return 0; } +#ifdef CONFIG_SERIAL_8250 +static inline bool jailhouse_uart_enabled(unsigned int uart_nr) +{ + return setup_data.v2.flags & BIT(uart_nr); +} + +static void jailhouse_serial_fixup(int port, struct uart_port *up, + u32 *capabilities) +{ + static const u16 pcuart_base[] = {0x3f8, 0x2f8, 0x3e8, 0x2e8}; + unsigned int n; + + for (n = 0; n < ARRAY_SIZE(pcuart_base); n++) { + if (pcuart_base[n] != up->iobase) + continue; + + if (jailhouse_uart_enabled(n)) { + pr_info("Enabling UART%u (port 0x%lx)\n", n, + up->iobase); + jailhouse_setup_irq(up->irq); + } else { + /* Deactivate UART if access isn't allowed */ + up->iobase = 0; + } + break; + } +} + +static void __init jailhouse_serial_workaround(void) +{ + /* + * There are flags inside setup_data that indicate availability of + * platform UARTs since setup data version 2. + * + * In case of version 1, we don't know which UARTs belong Linux. In + * this case, unconditionally register 1:1 mapping for legacy UART IRQs + * 3 and 4. + */ + if (setup_data.hdr.version > 1) + serial8250_set_isa_configurator(jailhouse_serial_fixup); +} +#else /* !CONFIG_SERIAL_8250 */ +static inline void jailhouse_serial_workaround(void) +{ +} +#endif /* CONFIG_SERIAL_8250 */ + static void __init jailhouse_init_platform(void) { u64 pa_data = boot_params.hdr.setup_data; + unsigned long setup_data_len; struct setup_data header; void *mapping; @@ -163,16 +222,8 @@ static void __init jailhouse_init_platform(void) memcpy(&header, mapping, sizeof(header)); early_memunmap(mapping, sizeof(header)); - if (header.type == SETUP_JAILHOUSE && - header.len >= sizeof(setup_data)) { - pa_data += offsetof(struct setup_data, data); - - mapping = early_memremap(pa_data, sizeof(setup_data)); - memcpy(&setup_data, mapping, sizeof(setup_data)); - early_memunmap(mapping, sizeof(setup_data)); - + if (header.type == SETUP_JAILHOUSE) break; - } pa_data = header.next; } @@ -180,13 +231,28 @@ static void __init jailhouse_init_platform(void) if (!pa_data) panic("Jailhouse: No valid setup data found"); - if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION) - panic("Jailhouse: Unsupported setup data structure"); - - pmtmr_ioport = setup_data.pm_timer_address; + /* setup data must at least contain the header */ + if (header.len < sizeof(setup_data.hdr)) + goto unsupported; + + pa_data += offsetof(struct setup_data, data); + setup_data_len = min_t(unsigned long, sizeof(setup_data), + (unsigned long)header.len); + mapping = early_memremap(pa_data, setup_data_len); + memcpy(&setup_data, mapping, setup_data_len); + early_memunmap(mapping, setup_data_len); + + if (setup_data.hdr.version == 0 || + setup_data.hdr.compatible_version != + JAILHOUSE_SETUP_REQUIRED_VERSION || + (setup_data.hdr.version == 1 && header.len < SETUP_DATA_V1_LEN) || + (setup_data.hdr.version >= 2 && header.len < SETUP_DATA_V2_LEN)) + goto unsupported; + + pmtmr_ioport = setup_data.v1.pm_timer_address; pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); - precalibrated_tsc_khz = setup_data.tsc_khz; + precalibrated_tsc_khz = setup_data.v1.tsc_khz; setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); pci_probe = 0; @@ -196,6 +262,12 @@ static void __init jailhouse_init_platform(void) * are none in a non-root cell. */ disable_acpi(); + + jailhouse_serial_workaround(); + return; + +unsupported: + panic("Jailhouse: Unsupported setup data structure"); } bool jailhouse_paravirt(void) diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index edaa30b20841..64b6da95af98 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -44,7 +44,12 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, if (count > node->len - pos) count = node->len - pos; - pa = node->paddr + sizeof(struct setup_data) + pos; + pa = node->paddr + pos; + + /* Is it direct data or invalid indirect one? */ + if (!(node->type & SETUP_INDIRECT) || node->type == SETUP_INDIRECT) + pa += sizeof(struct setup_data); + p = memremap(pa, count, MEMREMAP_WB); if (!p) return -ENOMEM; @@ -108,9 +113,17 @@ static int __init create_setup_data_nodes(struct dentry *parent) goto err_dir; } - node->paddr = pa_data; - node->type = data->type; - node->len = data->len; + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { + node->paddr = ((struct setup_indirect *)data->data)->addr; + node->type = ((struct setup_indirect *)data->data)->type; + node->len = ((struct setup_indirect *)data->data)->len; + } else { + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + } + create_setup_data_node(d, no, node); pa_data = data->next; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 7969da939213..d0a19121c6a4 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -100,7 +100,12 @@ static int __init get_setup_data_size(int nr, size_t *size) if (!data) return -ENOMEM; if (nr == i) { - *size = data->len; + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) + *size = ((struct setup_indirect *)data->data)->len; + else + *size = data->len; + memunmap(data); return 0; } @@ -130,7 +135,10 @@ static ssize_t type_show(struct kobject *kobj, if (!data) return -ENOMEM; - ret = sprintf(buf, "0x%x\n", data->type); + if (data->type == SETUP_INDIRECT) + ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type); + else + ret = sprintf(buf, "0x%x\n", data->type); memunmap(data); return ret; } @@ -142,7 +150,7 @@ static ssize_t setup_data_data_read(struct file *fp, loff_t off, size_t count) { int nr, ret = 0; - u64 paddr; + u64 paddr, len; struct setup_data *data; void *p; @@ -157,19 +165,28 @@ static ssize_t setup_data_data_read(struct file *fp, if (!data) return -ENOMEM; - if (off > data->len) { + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { + paddr = ((struct setup_indirect *)data->data)->addr; + len = ((struct setup_indirect *)data->data)->len; + } else { + paddr += sizeof(*data); + len = data->len; + } + + if (off > len) { ret = -EINVAL; goto out; } - if (count > data->len - off) - count = data->len - off; + if (count > len - off) + count = len - off; if (!count) goto out; ret = count; - p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB); + p = memremap(paddr, len, MEMREMAP_WB); if (!p) { ret = -ENOMEM; goto out; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 5dcd438ad8f2..16e125a50b33 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -298,48 +298,6 @@ static void load_segments(void) ); } -#ifdef CONFIG_KEXEC_FILE -/* Update purgatory as needed after various image segments have been prepared */ -static int arch_update_purgatory(struct kimage *image) -{ - int ret = 0; - - if (!image->file_mode) - return 0; - - /* Setup copying of backup region */ - if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, - "purgatory_backup_dest", - &image->arch.backup_load_addr, - sizeof(image->arch.backup_load_addr), 0); - if (ret) - return ret; - - ret = kexec_purgatory_get_set_symbol(image, - "purgatory_backup_src", - &image->arch.backup_src_start, - sizeof(image->arch.backup_src_start), 0); - if (ret) - return ret; - - ret = kexec_purgatory_get_set_symbol(image, - "purgatory_backup_sz", - &image->arch.backup_src_sz, - sizeof(image->arch.backup_src_sz), 0); - if (ret) - return ret; - } - - return ret; -} -#else /* !CONFIG_KEXEC_FILE */ -static inline int arch_update_purgatory(struct kimage *image) -{ - return 0; -} -#endif /* CONFIG_KEXEC_FILE */ - int machine_kexec_prepare(struct kimage *image) { unsigned long start_pgtable; @@ -353,11 +311,6 @@ int machine_kexec_prepare(struct kimage *image) if (result) return result; - /* update purgatory as needed */ - result = arch_update_purgatory(image); - if (result) - return result; - return 0; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 59d3d2763a9e..789f5e4f89de 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, - .cpu.set_iopl_mask = native_set_iopl_mask, - .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c deleted file mode 100644 index 23fdec030c37..000000000000 --- a/arch/x86/kernel/pci-calgary_64.c +++ /dev/null @@ -1,1586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Derived from arch/powerpc/kernel/iommu.c - * - * Copyright IBM Corporation, 2006-2007 - * Copyright (C) 2006 Jon Mason <jdmason@kudzu.us> - * - * Author: Jon Mason <jdmason@kudzu.us> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - - */ - -#define pr_fmt(fmt) "Calgary: " fmt - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/crash_dump.h> -#include <linux/dma-mapping.h> -#include <linux/dma-direct.h> -#include <linux/bitmap.h> -#include <linux/pci_ids.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/scatterlist.h> -#include <linux/iommu-helper.h> - -#include <asm/iommu.h> -#include <asm/calgary.h> -#include <asm/tce.h> -#include <asm/pci-direct.h> -#include <asm/dma.h> -#include <asm/rio.h> -#include <asm/bios_ebda.h> -#include <asm/x86_init.h> -#include <asm/iommu_table.h> - -#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT -int use_calgary __read_mostly = 1; -#else -int use_calgary __read_mostly = 0; -#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ - -#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 -#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308 - -/* register offsets inside the host bridge space */ -#define CALGARY_CONFIG_REG 0x0108 -#define PHB_CSR_OFFSET 0x0110 /* Channel Status */ -#define PHB_PLSSR_OFFSET 0x0120 -#define PHB_CONFIG_RW_OFFSET 0x0160 -#define PHB_IOBASE_BAR_LOW 0x0170 -#define PHB_IOBASE_BAR_HIGH 0x0180 -#define PHB_MEM_1_LOW 0x0190 -#define PHB_MEM_1_HIGH 0x01A0 -#define PHB_IO_ADDR_SIZE 0x01B0 -#define PHB_MEM_1_SIZE 0x01C0 -#define PHB_MEM_ST_OFFSET 0x01D0 -#define PHB_AER_OFFSET 0x0200 -#define PHB_CONFIG_0_HIGH 0x0220 -#define PHB_CONFIG_0_LOW 0x0230 -#define PHB_CONFIG_0_END 0x0240 -#define PHB_MEM_2_LOW 0x02B0 -#define PHB_MEM_2_HIGH 0x02C0 -#define PHB_MEM_2_SIZE_HIGH 0x02D0 -#define PHB_MEM_2_SIZE_LOW 0x02E0 -#define PHB_DOSHOLE_OFFSET 0x08E0 - -/* CalIOC2 specific */ -#define PHB_SAVIOR_L2 0x0DB0 -#define PHB_PAGE_MIG_CTRL 0x0DA8 -#define PHB_PAGE_MIG_DEBUG 0x0DA0 -#define PHB_ROOT_COMPLEX_STATUS 0x0CB0 - -/* PHB_CONFIG_RW */ -#define PHB_TCE_ENABLE 0x20000000 -#define PHB_SLOT_DISABLE 0x1C000000 -#define PHB_DAC_DISABLE 0x01000000 -#define PHB_MEM2_ENABLE 0x00400000 -#define PHB_MCSR_ENABLE 0x00100000 -/* TAR (Table Address Register) */ -#define TAR_SW_BITS 0x0000ffffffff800fUL -#define TAR_VALID 0x0000000000000008UL -/* CSR (Channel/DMA Status Register) */ -#define CSR_AGENT_MASK 0xffe0ffff -/* CCR (Calgary Configuration Register) */ -#define CCR_2SEC_TIMEOUT 0x000000000000000EUL -/* PMCR/PMDR (Page Migration Control/Debug Registers */ -#define PMR_SOFTSTOP 0x80000000 -#define PMR_SOFTSTOPFAULT 0x40000000 -#define PMR_HARDSTOP 0x20000000 - -/* - * The maximum PHB bus number. - * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384 - * x3950M2: 4 chassis, 48 PHBs per chassis = 192 - * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 - * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 - */ -#define MAX_PHB_BUS_NUM 256 - -#define PHBS_PER_CALGARY 4 - -/* register offsets in Calgary's internal register space */ -static const unsigned long tar_offsets[] = { - 0x0580 /* TAR0 */, - 0x0588 /* TAR1 */, - 0x0590 /* TAR2 */, - 0x0598 /* TAR3 */ -}; - -static const unsigned long split_queue_offsets[] = { - 0x4870 /* SPLIT QUEUE 0 */, - 0x5870 /* SPLIT QUEUE 1 */, - 0x6870 /* SPLIT QUEUE 2 */, - 0x7870 /* SPLIT QUEUE 3 */ -}; - -static const unsigned long phb_offsets[] = { - 0x8000 /* PHB0 */, - 0x9000 /* PHB1 */, - 0xA000 /* PHB2 */, - 0xB000 /* PHB3 */ -}; - -/* PHB debug registers */ - -static const unsigned long phb_debug_offsets[] = { - 0x4000 /* PHB 0 DEBUG */, - 0x5000 /* PHB 1 DEBUG */, - 0x6000 /* PHB 2 DEBUG */, - 0x7000 /* PHB 3 DEBUG */ -}; - -/* - * STUFF register for each debug PHB, - * byte 1 = start bus number, byte 2 = end bus number - */ - -#define PHB_DEBUG_STUFF_OFFSET 0x0020 - -unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; -static int translate_empty_slots __read_mostly = 0; -static int calgary_detected __read_mostly = 0; - -static struct rio_table_hdr *rio_table_hdr __initdata; -static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; - -struct calgary_bus_info { - void *tce_space; - unsigned char translation_disabled; - signed char phbid; - void __iomem *bbar; -}; - -static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); -static void calgary_tce_cache_blast(struct iommu_table *tbl); -static void calgary_dump_error_regs(struct iommu_table *tbl); -static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); -static void calioc2_tce_cache_blast(struct iommu_table *tbl); -static void calioc2_dump_error_regs(struct iommu_table *tbl); -static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); -static void get_tce_space_from_tar(void); - -static const struct cal_chipset_ops calgary_chip_ops = { - .handle_quirks = calgary_handle_quirks, - .tce_cache_blast = calgary_tce_cache_blast, - .dump_error_regs = calgary_dump_error_regs -}; - -static const struct cal_chipset_ops calioc2_chip_ops = { - .handle_quirks = calioc2_handle_quirks, - .tce_cache_blast = calioc2_tce_cache_blast, - .dump_error_regs = calioc2_dump_error_regs -}; - -static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; - -static inline int translation_enabled(struct iommu_table *tbl) -{ - /* only PHBs with translation enabled have an IOMMU table */ - return (tbl != NULL); -} - -static void iommu_range_reserve(struct iommu_table *tbl, - unsigned long start_addr, unsigned int npages) -{ - unsigned long index; - unsigned long end; - unsigned long flags; - - index = start_addr >> PAGE_SHIFT; - - /* bail out if we're asked to reserve a region we don't cover */ - if (index >= tbl->it_size) - return; - - end = index + npages; - if (end > tbl->it_size) /* don't go off the table */ - end = tbl->it_size; - - spin_lock_irqsave(&tbl->it_lock, flags); - - bitmap_set(tbl->it_map, index, npages); - - spin_unlock_irqrestore(&tbl->it_lock, flags); -} - -static unsigned long iommu_range_alloc(struct device *dev, - struct iommu_table *tbl, - unsigned int npages) -{ - unsigned long flags; - unsigned long offset; - unsigned long boundary_size; - - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - - BUG_ON(npages == 0); - - spin_lock_irqsave(&tbl->it_lock, flags); - - offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint, - npages, 0, boundary_size, 0); - if (offset == ~0UL) { - tbl->chip_ops->tce_cache_blast(tbl); - - offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, - npages, 0, boundary_size, 0); - if (offset == ~0UL) { - pr_warn("IOMMU full\n"); - spin_unlock_irqrestore(&tbl->it_lock, flags); - if (panic_on_overflow) - panic("Calgary: fix the allocator.\n"); - else - return DMA_MAPPING_ERROR; - } - } - - tbl->it_hint = offset + npages; - BUG_ON(tbl->it_hint > tbl->it_size); - - spin_unlock_irqrestore(&tbl->it_lock, flags); - - return offset; -} - -static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - void *vaddr, unsigned int npages, int direction) -{ - unsigned long entry; - dma_addr_t ret; - - entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == DMA_MAPPING_ERROR)) { - pr_warn("failed to allocate %u pages in iommu %p\n", - npages, tbl); - return DMA_MAPPING_ERROR; - } - - /* set the return dma address */ - ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); - - /* put the TCEs in the HW table */ - tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, - direction); - return ret; -} - -static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned int npages) -{ - unsigned long entry; - unsigned long flags; - - /* were we called with bad_dma_address? */ - if (unlikely(dma_addr == DMA_MAPPING_ERROR)) { - WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " - "address 0x%Lx\n", dma_addr); - return; - } - - entry = dma_addr >> PAGE_SHIFT; - - BUG_ON(entry + npages > tbl->it_size); - - tce_free(tbl, entry, npages); - - spin_lock_irqsave(&tbl->it_lock, flags); - - bitmap_clear(tbl->it_map, entry, npages); - - spin_unlock_irqrestore(&tbl->it_lock, flags); -} - -static inline struct iommu_table *find_iommu_table(struct device *dev) -{ - struct pci_dev *pdev; - struct pci_bus *pbus; - struct iommu_table *tbl; - - pdev = to_pci_dev(dev); - - /* search up the device tree for an iommu */ - pbus = pdev->bus; - do { - tbl = pci_iommu(pbus); - if (tbl && tbl->it_busno == pbus->number) - break; - tbl = NULL; - pbus = pbus->parent; - } while (pbus); - - BUG_ON(tbl && (tbl->it_busno != pbus->number)); - - return tbl; -} - -static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems,enum dma_data_direction dir, - unsigned long attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - struct scatterlist *s; - int i; - - if (!translation_enabled(tbl)) - return; - - for_each_sg(sglist, s, nelems, i) { - unsigned int npages; - dma_addr_t dma = s->dma_address; - unsigned int dmalen = s->dma_length; - - if (dmalen == 0) - break; - - npages = iommu_num_pages(dma, dmalen, PAGE_SIZE); - iommu_free(tbl, dma, npages); - } -} - -static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - struct scatterlist *s; - unsigned long vaddr; - unsigned int npages; - unsigned long entry; - int i; - - for_each_sg(sg, s, nelems, i) { - BUG_ON(!sg_page(s)); - - vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); - - entry = iommu_range_alloc(dev, tbl, npages); - if (entry == DMA_MAPPING_ERROR) { - /* makes sure unmap knows to stop */ - s->dma_length = 0; - goto error; - } - - s->dma_address = (entry << PAGE_SHIFT) | s->offset; - - /* insert into HW table */ - tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir); - - s->dma_length = s->length; - } - - return nelems; -error: - calgary_unmap_sg(dev, sg, nelems, dir, 0); - for_each_sg(sg, s, nelems, i) { - sg->dma_address = DMA_MAPPING_ERROR; - sg->dma_length = 0; - } - return 0; -} - -static dma_addr_t calgary_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - void *vaddr = page_address(page) + offset; - unsigned long uaddr; - unsigned int npages; - struct iommu_table *tbl = find_iommu_table(dev); - - uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, PAGE_SIZE); - - return iommu_alloc(dev, tbl, vaddr, npages, dir); -} - -static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - unsigned int npages; - - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - iommu_free(tbl, dma_addr, npages); -} - -static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) -{ - void *ret = NULL; - dma_addr_t mapping; - unsigned int npages, order; - struct iommu_table *tbl = find_iommu_table(dev); - - size = PAGE_ALIGN(size); /* size rounded up to full pages */ - npages = size >> PAGE_SHIFT; - order = get_order(size); - - /* alloc enough pages (and possibly more) */ - ret = (void *)__get_free_pages(flag, order); - if (!ret) - goto error; - memset(ret, 0, size); - - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == DMA_MAPPING_ERROR) - goto free; - *dma_handle = mapping; - return ret; -free: - free_pages((unsigned long)ret, get_order(size)); - ret = NULL; -error: - return ret; -} - -static void calgary_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - unsigned int npages; - struct iommu_table *tbl = find_iommu_table(dev); - - size = PAGE_ALIGN(size); - npages = size >> PAGE_SHIFT; - - iommu_free(tbl, dma_handle, npages); - free_pages((unsigned long)vaddr, get_order(size)); -} - -static const struct dma_map_ops calgary_dma_ops = { - .alloc = calgary_alloc_coherent, - .free = calgary_free_coherent, - .map_sg = calgary_map_sg, - .unmap_sg = calgary_unmap_sg, - .map_page = calgary_map_page, - .unmap_page = calgary_unmap_page, - .dma_supported = dma_direct_supported, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, -}; - -static inline void __iomem * busno_to_bbar(unsigned char num) -{ - return bus_info[num].bbar; -} - -static inline int busno_to_phbid(unsigned char num) -{ - return bus_info[num].phbid; -} - -static inline unsigned long split_queue_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return split_queue_offsets[idx]; -} - -static inline unsigned long tar_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return tar_offsets[idx]; -} - -static inline unsigned long phb_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return phb_offsets[idx]; -} - -static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset) -{ - unsigned long target = ((unsigned long)bar) | offset; - return (void __iomem*)target; -} - -static inline int is_calioc2(unsigned short device) -{ - return (device == PCI_DEVICE_ID_IBM_CALIOC2); -} - -static inline int is_calgary(unsigned short device) -{ - return (device == PCI_DEVICE_ID_IBM_CALGARY); -} - -static inline int is_cal_pci_dev(unsigned short device) -{ - return (is_calgary(device) || is_calioc2(device)); -} - -static void calgary_tce_cache_blast(struct iommu_table *tbl) -{ - u64 val; - u32 aer; - int i = 0; - void __iomem *bbar = tbl->bbar; - void __iomem *target; - - /* disable arbitration on the bus */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); - aer = readl(target); - writel(0, target); - - /* read plssr to ensure it got there */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET); - val = readl(target); - - /* poll split queues until all DMA activity is done */ - target = calgary_reg(bbar, split_queue_offset(tbl->it_busno)); - do { - val = readq(target); - i++; - } while ((val & 0xff) != 0xff && i < 100); - if (i == 100) - pr_warn("PCI bus not quiesced, continuing anyway\n"); - - /* invalidate TCE cache */ - target = calgary_reg(bbar, tar_offset(tbl->it_busno)); - writeq(tbl->tar_val, target); - - /* enable arbitration */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); - writel(aer, target); - (void)readl(target); /* flush */ -} - -static void calioc2_tce_cache_blast(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u64 val64; - u32 val; - int i = 0; - int count = 1; - unsigned char bus = tbl->it_busno; - -begin: - printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast " - "sequence - count %d\n", bus, count); - - /* 1. using the Page Migration Control reg set SoftStop */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target); - val |= PMR_SOFTSTOP; - printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target); - writel(cpu_to_be32(val), target); - - /* 2. poll split queues until all DMA activity is done */ - printk(KERN_DEBUG "2a. starting to poll split queues\n"); - target = calgary_reg(bbar, split_queue_offset(bus)); - do { - val64 = readq(target); - i++; - } while ((val64 & 0xff) != 0xff && i < 100); - if (i == 100) - pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n"); - - /* 3. poll Page Migration DEBUG for SoftStopFault */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target); - - /* 4. if SoftStopFault - goto (1) */ - if (val & PMR_SOFTSTOPFAULT) { - if (++count < 100) - goto begin; - else { - pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n"); - return; /* pray for the best */ - } - } - - /* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target); - - /* 6. invalidate TCE cache */ - printk(KERN_DEBUG "6. invalidating TCE cache\n"); - target = calgary_reg(bbar, tar_offset(bus)); - writeq(tbl->tar_val, target); - - /* 7. Re-read PMCR */ - printk(KERN_DEBUG "7a. Re-reading PMCR\n"); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target); - - /* 8. Remove HardStop */ - printk(KERN_DEBUG "8a. removing HardStop from PMCR\n"); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = 0; - printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target); - writel(cpu_to_be32(val), target); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target); -} - -static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start, - u64 limit) -{ - unsigned int numpages; - - limit = limit | 0xfffff; - limit++; - - numpages = ((limit - start) >> PAGE_SHIFT); - iommu_range_reserve(pci_iommu(dev->bus), start, numpages); -} - -static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev) -{ - void __iomem *target; - u64 low, high, sizelow; - u64 start, limit; - struct iommu_table *tbl = pci_iommu(dev->bus); - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - - /* peripheral MEM_1 region */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW); - low = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH); - high = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE); - sizelow = be32_to_cpu(readl(target)); - - start = (high << 32) | low; - limit = sizelow; - - calgary_reserve_mem_region(dev, start, limit); -} - -static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) -{ - void __iomem *target; - u32 val32; - u64 low, high, sizelow, sizehigh; - u64 start, limit; - struct iommu_table *tbl = pci_iommu(dev->bus); - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - - /* is it enabled? */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - if (!(val32 & PHB_MEM2_ENABLE)) - return; - - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW); - low = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH); - high = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW); - sizelow = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH); - sizehigh = be32_to_cpu(readl(target)); - - start = (high << 32) | low; - limit = (sizehigh << 32) | sizelow; - - calgary_reserve_mem_region(dev, start, limit); -} - -/* - * some regions of the IO address space do not get translated, so we - * must not give devices IO addresses in those regions. The regions - * are the 640KB-1MB region and the two PCI peripheral memory holes. - * Reserve all of them in the IOMMU bitmap to avoid giving them out - * later. - */ -static void __init calgary_reserve_regions(struct pci_dev *dev) -{ - unsigned int npages; - u64 start; - struct iommu_table *tbl = pci_iommu(dev->bus); - - /* avoid the BIOS/VGA first 640KB-1MB region */ - /* for CalIOC2 - avoid the entire first MB */ - if (is_calgary(dev->device)) { - start = (640 * 1024); - npages = ((1024 - 640) * 1024) >> PAGE_SHIFT; - } else { /* calioc2 */ - start = 0; - npages = (1 * 1024 * 1024) >> PAGE_SHIFT; - } - iommu_range_reserve(tbl, start, npages); - - /* reserve the two PCI peripheral memory regions in IO space */ - calgary_reserve_peripheral_mem_1(dev); - calgary_reserve_peripheral_mem_2(dev); -} - -static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) -{ - u64 val64; - u64 table_phys; - void __iomem *target; - int ret; - struct iommu_table *tbl; - - /* build TCE tables for each PHB */ - ret = build_tce_table(dev, bbar); - if (ret) - return ret; - - tbl = pci_iommu(dev->bus); - tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; - - if (is_kdump_kernel()) - calgary_init_bitmap_from_tce_table(tbl); - else - tce_free(tbl, 0, tbl->it_size); - - if (is_calgary(dev->device)) - tbl->chip_ops = &calgary_chip_ops; - else if (is_calioc2(dev->device)) - tbl->chip_ops = &calioc2_chip_ops; - else - BUG(); - - calgary_reserve_regions(dev); - - /* set TARs for each PHB */ - target = calgary_reg(bbar, tar_offset(dev->bus->number)); - val64 = be64_to_cpu(readq(target)); - - /* zero out all TAR bits under sw control */ - val64 &= ~TAR_SW_BITS; - table_phys = (u64)__pa(tbl->it_base); - - val64 |= table_phys; - - BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M); - val64 |= (u64) specified_table_size; - - tbl->tar_val = cpu_to_be64(val64); - - writeq(tbl->tar_val, target); - readq(target); /* flush */ - - return 0; -} - -static void __init calgary_free_bus(struct pci_dev *dev) -{ - u64 val64; - struct iommu_table *tbl = pci_iommu(dev->bus); - void __iomem *target; - unsigned int bitmapsz; - - target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); - val64 = be64_to_cpu(readq(target)); - val64 &= ~TAR_SW_BITS; - writeq(cpu_to_be64(val64), target); - readq(target); /* flush */ - - bitmapsz = tbl->it_size / BITS_PER_BYTE; - free_pages((unsigned long)tbl->it_map, get_order(bitmapsz)); - tbl->it_map = NULL; - - kfree(tbl); - - set_pci_iommu(dev->bus, NULL); - - /* Can't free bootmem allocated memory after system is up :-( */ - bus_info[dev->bus->number].tce_space = NULL; -} - -static void calgary_dump_error_regs(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u32 csr, plssr; - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET); - csr = be32_to_cpu(readl(target)); - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET); - plssr = be32_to_cpu(readl(target)); - - /* If no error, the agent ID in the CSR is not valid */ - pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", - tbl->it_busno, csr, plssr); -} - -static void calioc2_dump_error_regs(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - u32 csr, csmr, plssr, mck, rcstat; - void __iomem *target; - unsigned long phboff = phb_offset(tbl->it_busno); - unsigned long erroff; - u32 errregs[7]; - int i; - - /* dump CSR */ - target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET); - csr = be32_to_cpu(readl(target)); - /* dump PLSSR */ - target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET); - plssr = be32_to_cpu(readl(target)); - /* dump CSMR */ - target = calgary_reg(bbar, phboff | 0x290); - csmr = be32_to_cpu(readl(target)); - /* dump mck */ - target = calgary_reg(bbar, phboff | 0x800); - mck = be32_to_cpu(readl(target)); - - pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); - - pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", - csr, plssr, csmr, mck); - - /* dump rest of error regs */ - pr_emerg(""); - for (i = 0; i < ARRAY_SIZE(errregs); i++) { - /* err regs are at 0x810 - 0x870 */ - erroff = (0x810 + (i * 0x10)); - target = calgary_reg(bbar, phboff | erroff); - errregs[i] = be32_to_cpu(readl(target)); - pr_cont("0x%08x@0x%lx ", errregs[i], erroff); - } - pr_cont("\n"); - - /* root complex status */ - target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); - rcstat = be32_to_cpu(readl(target)); - printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat, - PHB_ROOT_COMPLEX_STATUS); -} - -static void calgary_watchdog(struct timer_list *t) -{ - struct iommu_table *tbl = from_timer(tbl, t, watchdog_timer); - void __iomem *bbar = tbl->bbar; - u32 val32; - void __iomem *target; - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET); - val32 = be32_to_cpu(readl(target)); - - /* If no error, the agent ID in the CSR is not valid */ - if (val32 & CSR_AGENT_MASK) { - tbl->chip_ops->dump_error_regs(tbl); - - /* reset error */ - writel(0, target); - - /* Disable bus that caused the error */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | - PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 |= PHB_SLOT_DISABLE; - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - } else { - /* Reset the timer */ - mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ); - } -} - -static void __init calgary_set_split_completion_timeout(void __iomem *bbar, - unsigned char busnum, unsigned long timeout) -{ - u64 val64; - void __iomem *target; - unsigned int phb_shift = ~0; /* silence gcc */ - u64 mask; - - switch (busno_to_phbid(busnum)) { - case 0: phb_shift = (63 - 19); - break; - case 1: phb_shift = (63 - 23); - break; - case 2: phb_shift = (63 - 27); - break; - case 3: phb_shift = (63 - 35); - break; - default: - BUG_ON(busno_to_phbid(busnum)); - } - - target = calgary_reg(bbar, CALGARY_CONFIG_REG); - val64 = be64_to_cpu(readq(target)); - - /* zero out this PHB's timer bits */ - mask = ~(0xFUL << phb_shift); - val64 &= mask; - val64 |= (timeout << phb_shift); - writeq(cpu_to_be64(val64), target); - readq(target); /* flush */ -} - -static void __init calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) -{ - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u32 val; - - /* - * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1 - */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2); - val = cpu_to_be32(readl(target)); - val |= 0x00800000; - writel(cpu_to_be32(val), target); -} - -static void __init calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) -{ - unsigned char busnum = dev->bus->number; - - /* - * Give split completion a longer timeout on bus 1 for aic94xx - * http://bugzilla.kernel.org/show_bug.cgi?id=7180 - */ - if (is_calgary(dev->device) && (busnum == 1)) - calgary_set_split_completion_timeout(tbl->bbar, busnum, - CCR_2SEC_TIMEOUT); -} - -static void __init calgary_enable_translation(struct pci_dev *dev) -{ - u32 val32; - unsigned char busnum; - void __iomem *target; - void __iomem *bbar; - struct iommu_table *tbl; - - busnum = dev->bus->number; - tbl = pci_iommu(dev->bus); - bbar = tbl->bbar; - - /* enable TCE in PHB Config Register */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE; - - printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n", - (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ? - "Calgary" : "CalIOC2", busnum); - printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this " - "bus.\n"); - - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - - timer_setup(&tbl->watchdog_timer, calgary_watchdog, 0); - mod_timer(&tbl->watchdog_timer, jiffies); -} - -static void __init calgary_disable_translation(struct pci_dev *dev) -{ - u32 val32; - unsigned char busnum; - void __iomem *target; - void __iomem *bbar; - struct iommu_table *tbl; - - busnum = dev->bus->number; - tbl = pci_iommu(dev->bus); - bbar = tbl->bbar; - - /* disable TCE in PHB Config Register */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE); - - printk(KERN_INFO "Calgary: disabling translation on PHB %#x!\n", busnum); - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - - del_timer_sync(&tbl->watchdog_timer); -} - -static void __init calgary_init_one_nontraslated(struct pci_dev *dev) -{ - pci_dev_get(dev); - set_pci_iommu(dev->bus, NULL); - - /* is the device behind a bridge? */ - if (dev->bus->parent) - dev->bus->parent->self = dev; - else - dev->bus->self = dev; -} - -static int __init calgary_init_one(struct pci_dev *dev) -{ - void __iomem *bbar; - struct iommu_table *tbl; - int ret; - - bbar = busno_to_bbar(dev->bus->number); - ret = calgary_setup_tar(dev, bbar); - if (ret) - goto done; - - pci_dev_get(dev); - - if (dev->bus->parent) { - if (dev->bus->parent->self) - printk(KERN_WARNING "Calgary: IEEEE, dev %p has " - "bus->parent->self!\n", dev); - dev->bus->parent->self = dev; - } else - dev->bus->self = dev; - - tbl = pci_iommu(dev->bus); - tbl->chip_ops->handle_quirks(tbl, dev); - - calgary_enable_translation(dev); - - return 0; - -done: - return ret; -} - -static int __init calgary_locate_bbars(void) -{ - int ret; - int rioidx, phb, bus; - void __iomem *bbar; - void __iomem *target; - unsigned long offset; - u8 start_bus, end_bus; - u32 val; - - ret = -ENODATA; - for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { - struct rio_detail *rio = rio_devs[rioidx]; - - if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) - continue; - - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); - if (!bbar) - goto error; - - for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { - offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; - target = calgary_reg(bbar, offset); - - val = be32_to_cpu(readl(target)); - - start_bus = (u8)((val & 0x00FF0000) >> 16); - end_bus = (u8)((val & 0x0000FF00) >> 8); - - if (end_bus) { - for (bus = start_bus; bus <= end_bus; bus++) { - bus_info[bus].bbar = bbar; - bus_info[bus].phbid = phb; - } - } else { - bus_info[start_bus].bbar = bbar; - bus_info[start_bus].phbid = phb; - } - } - } - - return 0; - -error: - /* scan bus_info and iounmap any bbars we previously ioremap'd */ - for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) - if (bus_info[bus].bbar) - iounmap(bus_info[bus].bbar); - - return ret; -} - -static int __init calgary_init(void) -{ - int ret; - struct pci_dev *dev = NULL; - struct calgary_bus_info *info; - - ret = calgary_locate_bbars(); - if (ret) - return ret; - - /* Purely for kdump kernel case */ - if (is_kdump_kernel()) - get_tce_space_from_tar(); - - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) { - calgary_init_one_nontraslated(dev); - continue; - } - - if (!info->tce_space && !translate_empty_slots) - continue; - - ret = calgary_init_one(dev); - if (ret) - goto error; - } while (1); - - dev = NULL; - for_each_pci_dev(dev) { - struct iommu_table *tbl; - - tbl = find_iommu_table(&dev->dev); - - if (translation_enabled(tbl)) - dev->dev.dma_ops = &calgary_dma_ops; - } - - return ret; - -error: - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) { - pci_dev_put(dev); - continue; - } - if (!info->tce_space && !translate_empty_slots) - continue; - - calgary_disable_translation(dev); - calgary_free_bus(dev); - pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ - dev->dev.dma_ops = NULL; - } while (1); - - return ret; -} - -static inline int __init determine_tce_table_size(void) -{ - int ret; - - if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) - return specified_table_size; - - if (is_kdump_kernel() && saved_max_pfn) { - /* - * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to - * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each - * larger table size has twice as many entries, so shift the - * max ram address by 13 to divide by 8K and then look at the - * order of the result to choose between 0-7. - */ - ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13); - if (ret > TCE_TABLE_SIZE_8M) - ret = TCE_TABLE_SIZE_8M; - } else { - /* - * Use 8M by default (suggested by Muli) if it's not - * kdump kernel and saved_max_pfn isn't set. - */ - ret = TCE_TABLE_SIZE_8M; - } - - return ret; -} - -static int __init build_detail_arrays(void) -{ - unsigned long ptr; - unsigned numnodes, i; - int scal_detail_size, rio_detail_size; - - numnodes = rio_table_hdr->num_scal_dev; - if (numnodes > MAX_NUMNODES){ - printk(KERN_WARNING - "Calgary: MAX_NUMNODES too low! Defined as %d, " - "but system has %d nodes.\n", - MAX_NUMNODES, numnodes); - return -ENODEV; - } - - switch (rio_table_hdr->version){ - case 2: - scal_detail_size = 11; - rio_detail_size = 13; - break; - case 3: - scal_detail_size = 12; - rio_detail_size = 15; - break; - default: - printk(KERN_WARNING - "Calgary: Invalid Rio Grande Table Version: %d\n", - rio_table_hdr->version); - return -EPROTO; - } - - ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < numnodes; i++, ptr += scal_detail_size) - scal_devs[i] = (struct scal_detail *)ptr; - - for (i = 0; i < rio_table_hdr->num_rio_dev; - i++, ptr += rio_detail_size) - rio_devs[i] = (struct rio_detail *)ptr; - - return 0; -} - -static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) -{ - int dev; - u32 val; - - if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { - /* - * FIXME: properly scan for devices across the - * PCI-to-PCI bridge on every CalIOC2 port. - */ - return 1; - } - - for (dev = 1; dev < 8; dev++) { - val = read_pci_config(bus, dev, 0, 0); - if (val != 0xffffffff) - break; - } - return (val != 0xffffffff); -} - -/* - * calgary_init_bitmap_from_tce_table(): - * Function for kdump case. In the second/kdump kernel initialize - * the bitmap based on the tce table entries obtained from first kernel - */ -static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) -{ - u64 *tp; - unsigned int index; - tp = ((u64 *)tbl->it_base); - for (index = 0 ; index < tbl->it_size; index++) { - if (*tp != 0x0) - set_bit(index, tbl->it_map); - tp++; - } -} - -/* - * get_tce_space_from_tar(): - * Function for kdump case. Get the tce tables from first kernel - * by reading the contents of the base address register of calgary iommu - */ -static void __init get_tce_space_from_tar(void) -{ - int bus; - void __iomem *target; - unsigned long tce_space; - - for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { - struct calgary_bus_info *info = &bus_info[bus]; - unsigned short pci_device; - u32 val; - - val = read_pci_config(bus, 0, 0, 0); - pci_device = (val & 0xFFFF0000) >> 16; - - if (!is_cal_pci_dev(pci_device)) - continue; - if (info->translation_disabled) - continue; - - if (calgary_bus_has_devices(bus, pci_device) || - translate_empty_slots) { - target = calgary_reg(bus_info[bus].bbar, - tar_offset(bus)); - tce_space = be64_to_cpu(readq(target)); - tce_space = tce_space & TAR_SW_BITS; - - tce_space = tce_space & (~specified_table_size); - info->tce_space = (u64 *)__va(tce_space); - } - } - return; -} - -static int __init calgary_iommu_init(void) -{ - int ret; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - return 0; -} - -int __init detect_calgary(void) -{ - int bus; - void *tbl; - int calgary_found = 0; - unsigned long ptr; - unsigned int offset, prev_offset; - int ret; - - /* - * if the user specified iommu=off or iommu=soft or we found - * another HW IOMMU already, bail out. - */ - if (no_iommu || iommu_detected) - return -ENODEV; - - if (!use_calgary) - return -ENODEV; - - if (!early_pci_allowed()) - return -ENODEV; - - printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); - - ptr = (unsigned long)phys_to_virt(get_bios_ebda()); - - rio_table_hdr = NULL; - prev_offset = 0; - offset = 0x180; - /* - * The next offset is stored in the 1st word. - * Only parse up until the offset increases: - */ - while (offset > prev_offset) { - /* The block id is stored in the 2nd word */ - if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ - /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); - break; - } - prev_offset = offset; - offset = *((unsigned short *)(ptr + offset)); - } - if (!rio_table_hdr) { - printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " - "in EBDA - bailing!\n"); - return -ENODEV; - } - - ret = build_detail_arrays(); - if (ret) { - printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); - return -ENOMEM; - } - - specified_table_size = determine_tce_table_size(); - - for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { - struct calgary_bus_info *info = &bus_info[bus]; - unsigned short pci_device; - u32 val; - - val = read_pci_config(bus, 0, 0, 0); - pci_device = (val & 0xFFFF0000) >> 16; - - if (!is_cal_pci_dev(pci_device)) - continue; - - if (info->translation_disabled) - continue; - - if (calgary_bus_has_devices(bus, pci_device) || - translate_empty_slots) { - /* - * If it is kdump kernel, find and use tce tables - * from first kernel, else allocate tce tables here - */ - if (!is_kdump_kernel()) { - tbl = alloc_tce_table(); - if (!tbl) - goto cleanup; - info->tce_space = tbl; - } - calgary_found = 1; - } - } - - printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n", - calgary_found ? "found" : "not found"); - - if (calgary_found) { - iommu_detected = 1; - calgary_detected = 1; - printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", - specified_table_size); - - x86_init.iommu.iommu_init = calgary_iommu_init; - } - return calgary_found; - -cleanup: - for (--bus; bus >= 0; --bus) { - struct calgary_bus_info *info = &bus_info[bus]; - - if (info->tce_space) - free_tce_table(info->tce_space); - } - return -ENOMEM; -} - -static int __init calgary_parse_options(char *p) -{ - unsigned int bridge; - unsigned long val; - size_t len; - ssize_t ret; - - while (*p) { - if (!strncmp(p, "64k", 3)) - specified_table_size = TCE_TABLE_SIZE_64K; - else if (!strncmp(p, "128k", 4)) - specified_table_size = TCE_TABLE_SIZE_128K; - else if (!strncmp(p, "256k", 4)) - specified_table_size = TCE_TABLE_SIZE_256K; - else if (!strncmp(p, "512k", 4)) - specified_table_size = TCE_TABLE_SIZE_512K; - else if (!strncmp(p, "1M", 2)) - specified_table_size = TCE_TABLE_SIZE_1M; - else if (!strncmp(p, "2M", 2)) - specified_table_size = TCE_TABLE_SIZE_2M; - else if (!strncmp(p, "4M", 2)) - specified_table_size = TCE_TABLE_SIZE_4M; - else if (!strncmp(p, "8M", 2)) - specified_table_size = TCE_TABLE_SIZE_8M; - - len = strlen("translate_empty_slots"); - if (!strncmp(p, "translate_empty_slots", len)) - translate_empty_slots = 1; - - len = strlen("disable"); - if (!strncmp(p, "disable", len)) { - p += len; - if (*p == '=') - ++p; - if (*p == '\0') - break; - ret = kstrtoul(p, 0, &val); - if (ret) - break; - - bridge = val; - if (bridge < MAX_PHB_BUS_NUM) { - printk(KERN_INFO "Calgary: disabling " - "translation for PHB %#x\n", bridge); - bus_info[bridge].translation_disabled = 1; - } - } - - p = strpbrk(p, ","); - if (!p) - break; - - p++; /* skip ',' */ - } - return 1; -} -__setup("calgary=", calgary_parse_options); - -static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) -{ - struct iommu_table *tbl; - unsigned int npages; - int i; - - tbl = pci_iommu(dev->bus); - - for (i = 0; i < 4; i++) { - struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i]; - - /* Don't give out TCEs that map MEM resources */ - if (!(r->flags & IORESOURCE_MEM)) - continue; - - /* 0-based? we reserve the whole 1st MB anyway */ - if (!r->start) - continue; - - /* cover the whole region */ - npages = resource_size(r) >> PAGE_SHIFT; - npages++; - - iommu_range_reserve(tbl, r->start, npages); - } -} - -static int __init calgary_fixup_tce_spaces(void) -{ - struct pci_dev *dev = NULL; - struct calgary_bus_info *info; - - if (no_iommu || swiotlb || !calgary_detected) - return -ENODEV; - - printk(KERN_DEBUG "Calgary: fixing up tce spaces\n"); - - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) - continue; - - if (!info->tce_space) - continue; - - calgary_fixup_one_tce_space(dev); - - } while (1); - - return 0; -} - -/* - * We need to be call after pcibios_assign_resources (fs_initcall level) - * and before device_initcall. - */ -rootfs_initcall(calgary_fixup_tce_spaces); - -IOMMU_INIT_POST(detect_calgary); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index fa4352dce491..57de2ebff7e2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -12,7 +12,6 @@ #include <asm/dma.h> #include <asm/iommu.h> #include <asm/gart.h> -#include <asm/calgary.h> #include <asm/x86_init.h> #include <asm/iommu_table.h> @@ -112,11 +111,6 @@ static __init int iommu_setup(char *p) gart_parse_options(p); -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e94c4354d4e..bd2a11ca5dd6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,7 @@ #include <asm/desc.h> #include <asm/prctl.h> #include <asm/spec-ctrl.h> +#include <asm/io_bitmap.h> #include <asm/proto.h> #include "process.h" @@ -72,18 +73,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, #endif + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, }, -#ifdef CONFIG_X86_32 - /* - * Note that the .io_bitmap member must be extra-big. This is because - * the CPU will access an additional byte beyond the end of the IO - * permission bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, -#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); @@ -110,28 +102,89 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void exit_thread(struct task_struct *tsk) { struct thread_struct *t = &tsk->thread; - unsigned long *bp = t->io_bitmap_ptr; struct fpu *fpu = &t->fpu; - if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); - - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); - t->io_bitmap_max = 0; - put_cpu(); - kfree(bp); - } + if (test_thread_flag(TIF_IO_BITMAP)) + io_bitmap_exit(); free_vm86(t); fpu__drop(fpu); } +static int set_new_tls(struct task_struct *p, unsigned long tls) +{ + struct user_desc __user *utls = (struct user_desc __user *)tls; + + if (in_ia32_syscall()) + return do_set_thread_area(p, -1, utls, 0); + else + return do_set_thread_area_64(p, ARCH_SET_FS, tls); +} + +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct *p, unsigned long tls) +{ + struct inactive_task_frame *frame; + struct fork_frame *fork_frame; + struct pt_regs *childregs; + int ret = 0; + + childregs = task_pt_regs(p); + fork_frame = container_of(childregs, struct fork_frame, regs); + frame = &fork_frame->frame; + + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; + p->thread.io_bitmap = NULL; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + +#ifdef CONFIG_X86_64 + savesegment(gs, p->thread.gsindex); + p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase; + savesegment(fs, p->thread.fsindex); + p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase; + savesegment(es, p->thread.es); + savesegment(ds, p->thread.ds); +#else + p->thread.sp0 = (unsigned long) (childregs + 1); + /* + * Clear all status flags including IF and set fixed bit. 64bit + * does not have this initialization as the frame does not contain + * flags. The flags consistency (especially vs. AC) is there + * ensured via objtool, which lacks 32bit support. + */ + frame->flags = X86_EFLAGS_FIXED; +#endif + + /* Kernel thread ? */ + if (unlikely(p->flags & PF_KTHREAD)) { + memset(childregs, 0, sizeof(struct pt_regs)); + kthread_frame_init(frame, sp, arg); + return 0; + } + + frame->bx = 0; + *childregs = *current_pt_regs(); + childregs->ax = 0; + if (sp) + childregs->sp = sp; + +#ifdef CONFIG_X86_32 + task_user_gs(p) = get_user_gs(current_pt_regs()); +#endif + + /* Set a new TLS for the child thread? */ + if (clone_flags & CLONE_SETTLS) + ret = set_new_tls(p, tls); + + if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) + io_bitmap_share(p); + + return ret; +} + void flush_thread(void) { struct task_struct *tsk = current; @@ -269,31 +322,96 @@ void arch_setup_new_exec(void) } } -static inline void switch_to_bitmap(struct thread_struct *prev, - struct thread_struct *next, - unsigned long tifp, unsigned long tifn) +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) { - struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; +} - if (tifn & _TIF_IO_BITMAP) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); +static inline void switch_to_bitmap(unsigned long tifp) +{ + /* + * Invalidate I/O bitmap if the previous task used it. This prevents + * any possible leakage of an active I/O bitmap. + * + * If the next task has an I/O bitmap it will handle it on exit to + * user mode. + */ + if (tifp & _TIF_IO_BITMAP) + tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw)); +} + +static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) +{ + /* + * Copy at least the byte range of the incoming tasks bitmap which + * covers the permitted I/O ports. + * + * If the previous task which used an I/O bitmap had more bits + * permitted, then the copy needs to cover those as well so they + * get turned off. + */ + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, + max(tss->io_bitmap.prev_max, iobm->max)); + + /* + * Store the new max and the sequence number of this bitmap + * and a pointer to the bitmap itself. + */ + tss->io_bitmap.prev_max = iobm->max; + tss->io_bitmap.prev_sequence = iobm->sequence; +} + +/** + * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode + */ +void tss_update_io_bitmap(void) +{ + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + u16 *base = &tss->x86_tss.io_bitmap_base; + + if (test_thread_flag(TIF_IO_BITMAP)) { + struct thread_struct *t = ¤t->thread; + + if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) { + *base = IO_BITMAP_OFFSET_VALID_ALL; + } else { + struct io_bitmap *iobm = t->io_bitmap; + /* + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the + * incoming task. + */ + if (tss->io_bitmap.prev_sequence != iobm->sequence) + tss_copy_io_bitmap(tss, iobm); + + /* Enable the bitmap */ + *base = IO_BITMAP_OFFSET_VALID_MAP; + } /* - * Make sure that the TSS limit is correct for the CPU - * to notice the IO bitmap. + * Make sure that the TSS limit is covering the io bitmap. + * It might have been cut down by a VMEXIT to 0x67 which + * would cause a subsequent I/O access from user space to + * trigger a #GP because tbe bitmap is outside the TSS + * limit. */ refresh_tss_limit(); - } else if (tifp & _TIF_IO_BITMAP) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } else { + tss_invalidate_io_bitmap(tss); } } +#else /* CONFIG_X86_IOPL_IOPERM */ +static inline void switch_to_bitmap(unsigned long tifp) { } +#endif #ifdef CONFIG_SMP @@ -505,7 +623,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(prev, next, tifp, tifn); + + switch_to_bitmap(tifp); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b8ceec4974fe..323499f48858 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -112,74 +112,6 @@ void release_thread(struct task_struct *dead_task) release_vm86_irqs(dead_task); } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - struct pt_regs *childregs = task_pt_regs(p); - struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); - struct inactive_task_frame *frame = &fork_frame->frame; - struct task_struct *tsk; - int err; - - /* - * For a new task use the RESET flags value since there is no before. - * All the status flags are zero; DF and all the system flags must also - * be 0, specifically IF must be 0 because we context switch to the new - * task with interrupts disabled. - */ - frame->flags = X86_EFLAGS_FIXED; - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.sp0 = (unsigned long) (childregs+1); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->di = arg; - p->thread.io_bitmap_ptr = NULL; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - task_user_gs(p) = get_user_gs(current_pt_regs()); - - p->thread.io_bitmap_ptr = NULL; - tsk = current; - err = -ENOMEM; - - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - err = 0; - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { @@ -255,15 +187,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_TLS(next, cpu); - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - switch_to_extra(prev_p, next_p); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index af64519b2695..506d66830d4d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -371,81 +371,6 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) task->thread.gsbase = gsbase; } -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct *p, unsigned long tls) -{ - int err; - struct pt_regs *childregs; - struct fork_frame *fork_frame; - struct inactive_task_frame *frame; - struct task_struct *me = current; - - childregs = task_pt_regs(p); - fork_frame = container_of(childregs, struct fork_frame, regs); - frame = &fork_frame->frame; - - frame->bp = 0; - frame->ret_addr = (unsigned long) ret_from_fork; - p->thread.sp = (unsigned long) fork_frame; - p->thread.io_bitmap_ptr = NULL; - - savesegment(gs, p->thread.gsindex); - p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; - savesegment(fs, p->thread.fsindex); - p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; - savesegment(es, p->thread.es); - savesegment(ds, p->thread.ds); - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(p->flags & PF_KTHREAD)) { - /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - frame->bx = sp; /* function */ - frame->r12 = arg; - return 0; - } - frame->bx = 0; - *childregs = *current_pt_regs(); - - childregs->ax = 0; - if (sp) - childregs->sp = sp; - - err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_IA32_EMULATION - if (in_ia32_syscall()) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)tls, 0); - else -#endif - err = do_arch_prctl_64(p, ARCH_SET_FS, tls); - if (err) - goto out; - } - err = 0; -out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - - return err; -} - static void start_thread_common(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp, @@ -572,17 +497,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_to_extra(prev_p, next_p); -#ifdef CONFIG_XEN_PV - /* - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and - * current_pt_regs()->flags may not match the current task's - * intended IOPL. We need to switch it manually. - */ - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && - prev->iopl != next->iopl)) - xen_set_iopl_mask(next->iopl); -#endif - if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3c5bbe8e4120..066e5b01a7e0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -42,6 +42,7 @@ #include <asm/traps.h> #include <asm/syscall.h> #include <asm/fsgsbase.h> +#include <asm/io_bitmap.h> #include "tls.h" @@ -697,7 +698,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { - return target->thread.io_bitmap_max / regset->size; + struct io_bitmap *iobm = target->thread.io_bitmap; + + return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0; } static int ioperm_get(struct task_struct *target, @@ -705,12 +708,13 @@ static int ioperm_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (!target->thread.io_bitmap_ptr) + struct io_bitmap *iobm = target->thread.io_bitmap; + + if (!iobm) return -ENXIO; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.io_bitmap_ptr, - 0, IO_BITMAP_BYTES); + iobm->bitmap, 0, IO_BITMAP_BYTES); } /* diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index ee26df08002e..94b33885f8d2 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -35,8 +35,7 @@ #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) .text - .globl relocate_kernel -relocate_kernel: +SYM_CODE_START_NOALIGN(relocate_kernel) /* Save the CPU context, used for jumping back */ pushl %ebx @@ -93,8 +92,9 @@ relocate_kernel: addl $(identity_mapped - relocate_kernel), %eax pushl %eax ret +SYM_CODE_END(relocate_kernel) -identity_mapped: +SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* set return address to 0 if not preserving context */ pushl $0 /* store the start address on the stack */ @@ -191,8 +191,9 @@ identity_mapped: addl $(virtual_mapped - relocate_kernel), %eax pushl %eax ret +SYM_CODE_END(identity_mapped) -virtual_mapped: +SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) movl CR4(%edi), %eax movl %eax, %cr4 movl CR3(%edi), %eax @@ -208,9 +209,10 @@ virtual_mapped: popl %esi popl %ebx ret +SYM_CODE_END(virtual_mapped) /* Do the copies */ -swap_pages: +SYM_CODE_START_LOCAL_NOALIGN(swap_pages) movl 8(%esp), %edx movl 4(%esp), %ecx pushl %ebp @@ -270,6 +272,7 @@ swap_pages: popl %ebx popl %ebp ret +SYM_CODE_END(swap_pages) .globl kexec_control_code_size .set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c51ccff5cd01..ef3ba99068d3 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -38,8 +38,7 @@ .text .align PAGE_SIZE .code64 - .globl relocate_kernel -relocate_kernel: +SYM_CODE_START_NOALIGN(relocate_kernel) /* * %rdi indirection_page * %rsi page_list @@ -103,8 +102,9 @@ relocate_kernel: addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 ret +SYM_CODE_END(relocate_kernel) -identity_mapped: +SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* set return address to 0 if not preserving context */ pushq $0 /* store the start address on the stack */ @@ -209,8 +209,9 @@ identity_mapped: movq $virtual_mapped, %rax pushq %rax ret +SYM_CODE_END(identity_mapped) -virtual_mapped: +SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) movq RSP(%r8), %rsp movq CR4(%r8), %rax movq %rax, %cr4 @@ -228,9 +229,10 @@ virtual_mapped: popq %rbp popq %rbx ret +SYM_CODE_END(virtual_mapped) /* Do the copies */ -swap_pages: +SYM_CODE_START_LOCAL_NOALIGN(swap_pages) movq %rdi, %rcx /* Put the page_list in %rcx */ xorl %edi, %edi xorl %esi, %esi @@ -283,6 +285,7 @@ swap_pages: jmp 0b 3: ret +SYM_CODE_END(swap_pages) .globl kexec_control_code_size .set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 77ea96b794bd..cedfe2077a69 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -143,6 +143,13 @@ struct boot_params boot_params; /* * Machine setup.. */ +static struct resource rodata_resource = { + .name = "Kernel rodata", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + static struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -438,6 +445,12 @@ static void __init memblock_x86_reserve_range_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); memblock_reserve(pa_data, sizeof(*data) + data->len); + + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) + memblock_reserve(((struct setup_indirect *)data->data)->addr, + ((struct setup_indirect *)data->data)->len); + pa_data = data->next; early_memunmap(data, sizeof(*data)); } @@ -459,7 +472,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) * due to mapping restrictions. * * On 64bit, kdump kernel need be restricted to be under 64TB, which is - * the upper limit of system RAM in 4-level paing mode. Since the kdump + * the upper limit of system RAM in 4-level paging mode. Since the kdump * jumping could be from 5-level to 4-level, the jumping will fail if * kernel is put above 64TB, and there's no way to detect the paging mode * of the kernel which will be loaded for dumping during the 1st kernel @@ -743,8 +756,8 @@ static void __init trim_bios_range(void) e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); /* - * special case: Some BIOSen report the PC BIOS - * area (640->1Mb) as ram even though it is not. + * special case: Some BIOSes report the PC BIOS + * area (640Kb -> 1Mb) as RAM even though it is not. * take them out. */ e820__range_remove(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_TYPE_RAM, 1); @@ -951,7 +964,9 @@ void __init setup_arch(char **cmdline_p) code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; - data_resource.start = __pa_symbol(_etext); + rodata_resource.start = __pa_symbol(__start_rodata); + rodata_resource.end = __pa_symbol(__end_rodata)-1; + data_resource.start = __pa_symbol(_sdata); data_resource.end = __pa_symbol(_edata)-1; bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; @@ -1040,6 +1055,7 @@ void __init setup_arch(char **cmdline_p) /* after parse_early_param, so could debug it */ insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &rodata_resource); insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); @@ -1122,17 +1138,15 @@ void __init setup_arch(char **cmdline_p) reserve_bios_regions(); - if (efi_enabled(EFI_MEMMAP)) { - efi_fake_memmap(); - efi_find_mirror(); - efi_esrt_init(); + efi_fake_memmap(); + efi_find_mirror(); + efi_esrt_init(); - /* - * The EFI specification says that boot service code won't be - * called after ExitBootServices(). This is, in fact, a lie. - */ - efi_reserve_boot_services(); - } + /* + * The EFI specification says that boot service code won't be + * called after ExitBootServices(). This is, in fact, a lie. + */ + efi_reserve_boot_services(); /* preallocate 4k for mptable mpc */ e820__memblock_alloc_reserved_mpc_new(); diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c deleted file mode 100644 index 6384be751eff..000000000000 --- a/arch/x86/kernel/tce_64.c +++ /dev/null @@ -1,177 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * This file manages the translation entries for the IBM Calgary IOMMU. - * - * Derived from arch/powerpc/platforms/pseries/iommu.c - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/memblock.h> -#include <asm/tce.h> -#include <asm/calgary.h> -#include <asm/proto.h> -#include <asm/cacheflush.h> - -/* flush a tce at 'tceaddr' to main memory */ -static inline void flush_tce(void* tceaddr) -{ - /* a single tce can't cross a cache line */ - if (boot_cpu_has(X86_FEATURE_CLFLUSH)) - clflush(tceaddr); - else - wbinvd(); -} - -void tce_build(struct iommu_table *tbl, unsigned long index, - unsigned int npages, unsigned long uaddr, int direction) -{ - u64* tp; - u64 t; - u64 rpn; - - t = (1 << TCE_READ_SHIFT); - if (direction != DMA_TO_DEVICE) - t |= (1 << TCE_WRITE_SHIFT); - - tp = ((u64*)tbl->it_base) + index; - - while (npages--) { - rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT; - t &= ~TCE_RPN_MASK; - t |= (rpn << TCE_RPN_SHIFT); - - *tp = cpu_to_be64(t); - flush_tce(tp); - - uaddr += PAGE_SIZE; - tp++; - } -} - -void tce_free(struct iommu_table *tbl, long index, unsigned int npages) -{ - u64* tp; - - tp = ((u64*)tbl->it_base) + index; - - while (npages--) { - *tp = cpu_to_be64(0); - flush_tce(tp); - tp++; - } -} - -static inline unsigned int table_size_to_number_of_entries(unsigned char size) -{ - /* - * size is the order of the table, 0-7 - * smallest table is 8K entries, so shift result by 13 to - * multiply by 8K - */ - return (1 << size) << 13; -} - -static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) -{ - unsigned int bitmapsz; - unsigned long bmppages; - int ret; - - tbl->it_busno = dev->bus->number; - - /* set the tce table size - measured in entries */ - tbl->it_size = table_size_to_number_of_entries(specified_table_size); - - /* - * number of bytes needed for the bitmap size in number of - * entries; we need one bit per entry - */ - bitmapsz = tbl->it_size / BITS_PER_BYTE; - bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz)); - if (!bmppages) { - printk(KERN_ERR "Calgary: cannot allocate bitmap\n"); - ret = -ENOMEM; - goto done; - } - - tbl->it_map = (unsigned long*)bmppages; - - memset(tbl->it_map, 0, bitmapsz); - - tbl->it_hint = 0; - - spin_lock_init(&tbl->it_lock); - - return 0; - -done: - return ret; -} - -int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar) -{ - struct iommu_table *tbl; - int ret; - - if (pci_iommu(dev->bus)) { - printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n", - dev, pci_iommu(dev->bus)); - BUG(); - } - - tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL); - if (!tbl) { - printk(KERN_ERR "Calgary: error allocating iommu_table\n"); - ret = -ENOMEM; - goto done; - } - - ret = tce_table_setparms(dev, tbl); - if (ret) - goto free_tbl; - - tbl->bbar = bbar; - - set_pci_iommu(dev->bus, tbl); - - return 0; - -free_tbl: - kfree(tbl); -done: - return ret; -} - -void * __init alloc_tce_table(void) -{ - unsigned int size; - - size = table_size_to_number_of_entries(specified_table_size); - size *= TCE_ENTRY_SIZE; - - return memblock_alloc_low(size, size); -} - -void __init free_tce_table(void *tbl) -{ - unsigned int size; - - if (!tbl) - return; - - size = table_size_to_number_of_entries(specified_table_size); - size *= TCE_ENTRY_SIZE; - - memblock_free(__pa(tbl), size); -} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4bb0f8447112..c90312146da0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -37,11 +37,6 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/io.h> - -#if defined(CONFIG_EDAC) -#include <linux/edac.h> -#endif - #include <asm/stacktrace.h> #include <asm/processor.h> #include <asm/debugreg.h> diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index b4a304893189..4d732a444711 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -1,6 +1,6 @@ /* - * umip.c Emulation for instruction protected by the Intel User-Mode - * Instruction Prevention feature + * umip.c Emulation for instruction protected by the User-Mode Instruction + * Prevention feature * * Copyright (c) 2017, Intel Corporation. * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> @@ -18,10 +18,10 @@ /** DOC: Emulation for User-Mode Instruction Prevention (UMIP) * - * The feature User-Mode Instruction Prevention present in recent Intel - * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR) - * from being executed with CPL > 0. Otherwise, a general protection fault is - * issued. + * User-Mode Instruction Prevention is a security feature present in recent + * x86 processors that, when enabled, prevents a group of instructions (SGDT, + * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general + * protection fault if the instruction is executed with CPL > 0. * * Rather than relaying to the user space the general protection fault caused by * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 8cd745ef8c7b..15e5aad8ac2c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -842,8 +842,8 @@ static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @auprobe: the probepoint information. * @mm: the probed address space. - * @arch_uprobe: the probepoint information. * @addr: virtual address at which to install the probepoint * Return 0 on success or a -ve number on error. */ diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index a024c4f7ba56..641f0fe1e5b4 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -31,7 +31,7 @@ #include <asm/cpufeatures.h> #include <asm/msr-index.h> -ENTRY(verify_cpu) +SYM_FUNC_START_LOCAL(verify_cpu) pushf # Save caller passed flags push $0 # Kill any dangerous flags popf @@ -137,4 +137,4 @@ ENTRY(verify_cpu) popf # Restore caller passed flags xorl %eax, %eax ret -ENDPROC(verify_cpu) +SYM_FUNC_END(verify_cpu) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e2feacf921a0..3a1a819da137 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -21,6 +21,9 @@ #define LOAD_OFFSET __START_KERNEL_map #endif +#define EMITS_PT_NOTE +#define RO_EXCEPTION_TABLE_ALIGN 16 + #include <asm-generic/vmlinux.lds.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> @@ -141,17 +144,12 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif + } :text =0xcccc - /* End of text section */ - _etext = .; - } :text = 0x9090 - - NOTES :text :note - - EXCEPTION_TABLE(16) :text = 0x9090 - - /* .text should occupy whole number of pages */ + /* End of text section, which should occupy whole number of pages */ + _etext = .; . = ALIGN(PAGE_SIZE); + X86_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) X86_ALIGN_RODATA_END diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 18a799c8fa28..ce89430a7f80 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -31,6 +31,28 @@ static int __init iommu_init_noop(void) { return 0; } static void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } +static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } +static __init void get_rtc_noop(struct timespec64 *now) { } + +static __initconst const struct of_device_id of_cmos_match[] = { + { .compatible = "motorola,mc146818" }, + {} +}; + +/* + * Allow devicetree configured systems to disable the RTC by setting the + * corresponding DT node's status property to disabled. Code is optimized + * out for CONFIG_OF=n builds. + */ +static __init void x86_wallclock_init(void) +{ + struct device_node *node = of_find_matching_node(NULL, of_cmos_match); + + if (node && !of_device_is_available(node)) { + x86_platform.get_wallclock = get_rtc_noop; + x86_platform.set_wallclock = set_rtc_noop; + } +} /* * The platform setup functions are preset with the default functions @@ -73,7 +95,7 @@ struct x86_init_ops x86_init __initdata = { .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, .timer_init = hpet_time_init, - .wallclock_init = x86_init_noop, + .wallclock_init = x86_wallclock_init, }, .iommu = { diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index d5e6d5b3f06f..bcc6a73d6628 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -508,8 +508,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) *filter = tmp; mutex_lock(&kvm->lock); - rcu_swap_protected(kvm->arch.pmu_event_filter, filter, - mutex_is_locked(&kvm->lock)); + filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, + mutex_is_locked(&kvm->lock)); mutex_unlock(&kvm->lock); synchronize_srcu_expedited(&kvm->srcu); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 751a384c2eb0..81ada2ce99e7 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -43,7 +43,7 @@ * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump * to vmx_vmexit. */ -ENTRY(vmx_vmenter) +SYM_FUNC_START(vmx_vmenter) /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ je 2f @@ -65,7 +65,7 @@ ENTRY(vmx_vmenter) _ASM_EXTABLE(1b, 5b) _ASM_EXTABLE(2b, 5b) -ENDPROC(vmx_vmenter) +SYM_FUNC_END(vmx_vmenter) /** * vmx_vmexit - Handle a VMX VM-Exit @@ -77,7 +77,7 @@ ENDPROC(vmx_vmenter) * here after hardware loads the host's state, i.e. this is the destination * referred to by VMCS.HOST_RIP. */ -ENTRY(vmx_vmexit) +SYM_FUNC_START(vmx_vmexit) #ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE /* Preserve guest's RAX, it's used to stuff the RSB. */ @@ -90,7 +90,7 @@ ENTRY(vmx_vmexit) .Lvmexit_skip_rsb: #endif ret -ENDPROC(vmx_vmexit) +SYM_FUNC_END(vmx_vmexit) /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode @@ -101,7 +101,7 @@ ENDPROC(vmx_vmexit) * Returns: * 0 on VM-Exit, 1 on VM-Fail */ -ENTRY(__vmx_vcpu_run) +SYM_FUNC_START(__vmx_vcpu_run) push %_ASM_BP mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 @@ -233,4 +233,4 @@ ENTRY(__vmx_vcpu_run) /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ 2: mov $1, %eax jmp 1b -ENDPROC(__vmx_vcpu_run) +SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d175429c91b0..1b9ab4166397 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1368,14 +1368,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ - /* - * VM exits change the host TR limit to 0x67 after a VM - * exit. This is okay, since 0x67 covers everything except - * the IO bitmap and have have code to handle the IO bitmap - * being lost after a VM exit. - */ - BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67); - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index e0788bade5ab..3b6544111ac9 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -20,10 +20,10 @@ #define BEGIN(op) \ .macro endp; \ -ENDPROC(atomic64_##op##_386); \ +SYM_FUNC_END(atomic64_##op##_386); \ .purgem endp; \ .endm; \ -ENTRY(atomic64_##op##_386); \ +SYM_FUNC_START(atomic64_##op##_386); \ LOCK v; #define ENDP endp diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 843d978ee341..1c5c81c16b06 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -16,12 +16,12 @@ cmpxchg8b (\reg) .endm -ENTRY(atomic64_read_cx8) +SYM_FUNC_START(atomic64_read_cx8) read64 %ecx ret -ENDPROC(atomic64_read_cx8) +SYM_FUNC_END(atomic64_read_cx8) -ENTRY(atomic64_set_cx8) +SYM_FUNC_START(atomic64_set_cx8) 1: /* we don't need LOCK_PREFIX since aligned 64-bit writes * are atomic on 586 and newer */ @@ -29,19 +29,19 @@ ENTRY(atomic64_set_cx8) jne 1b ret -ENDPROC(atomic64_set_cx8) +SYM_FUNC_END(atomic64_set_cx8) -ENTRY(atomic64_xchg_cx8) +SYM_FUNC_START(atomic64_xchg_cx8) 1: LOCK_PREFIX cmpxchg8b (%esi) jne 1b ret -ENDPROC(atomic64_xchg_cx8) +SYM_FUNC_END(atomic64_xchg_cx8) .macro addsub_return func ins insc -ENTRY(atomic64_\func\()_return_cx8) +SYM_FUNC_START(atomic64_\func\()_return_cx8) pushl %ebp pushl %ebx pushl %esi @@ -69,14 +69,14 @@ ENTRY(atomic64_\func\()_return_cx8) popl %ebx popl %ebp ret -ENDPROC(atomic64_\func\()_return_cx8) +SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm addsub_return add add adc addsub_return sub sub sbb .macro incdec_return func ins insc -ENTRY(atomic64_\func\()_return_cx8) +SYM_FUNC_START(atomic64_\func\()_return_cx8) pushl %ebx read64 %esi @@ -94,13 +94,13 @@ ENTRY(atomic64_\func\()_return_cx8) movl %ecx, %edx popl %ebx ret -ENDPROC(atomic64_\func\()_return_cx8) +SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm incdec_return inc add adc incdec_return dec sub sbb -ENTRY(atomic64_dec_if_positive_cx8) +SYM_FUNC_START(atomic64_dec_if_positive_cx8) pushl %ebx read64 %esi @@ -119,9 +119,9 @@ ENTRY(atomic64_dec_if_positive_cx8) movl %ecx, %edx popl %ebx ret -ENDPROC(atomic64_dec_if_positive_cx8) +SYM_FUNC_END(atomic64_dec_if_positive_cx8) -ENTRY(atomic64_add_unless_cx8) +SYM_FUNC_START(atomic64_add_unless_cx8) pushl %ebp pushl %ebx /* these just push these two parameters on the stack */ @@ -155,9 +155,9 @@ ENTRY(atomic64_add_unless_cx8) jne 2b xorl %eax, %eax jmp 3b -ENDPROC(atomic64_add_unless_cx8) +SYM_FUNC_END(atomic64_add_unless_cx8) -ENTRY(atomic64_inc_not_zero_cx8) +SYM_FUNC_START(atomic64_inc_not_zero_cx8) pushl %ebx read64 %esi @@ -177,4 +177,4 @@ ENTRY(atomic64_inc_not_zero_cx8) 3: popl %ebx ret -ENDPROC(atomic64_inc_not_zero_cx8) +SYM_FUNC_END(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4df90c9ea383..4742e8fa7ee7 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -46,7 +46,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ -ENTRY(csum_partial) +SYM_FUNC_START(csum_partial) pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum @@ -128,13 +128,13 @@ ENTRY(csum_partial) popl %ebx popl %esi ret -ENDPROC(csum_partial) +SYM_FUNC_END(csum_partial) #else /* Version for PentiumII/PPro */ -ENTRY(csum_partial) +SYM_FUNC_START(csum_partial) pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum @@ -246,7 +246,7 @@ ENTRY(csum_partial) popl %ebx popl %esi ret -ENDPROC(csum_partial) +SYM_FUNC_END(csum_partial) #endif EXPORT_SYMBOL(csum_partial) @@ -280,7 +280,7 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define ARGBASE 16 #define FP 12 -ENTRY(csum_partial_copy_generic) +SYM_FUNC_START(csum_partial_copy_generic) subl $4,%esp pushl %edi pushl %esi @@ -398,7 +398,7 @@ DST( movb %cl, (%edi) ) popl %edi popl %ecx # equivalent to addl $4,%esp ret -ENDPROC(csum_partial_copy_generic) +SYM_FUNC_END(csum_partial_copy_generic) #else @@ -416,7 +416,7 @@ ENDPROC(csum_partial_copy_generic) #define ARGBASE 12 -ENTRY(csum_partial_copy_generic) +SYM_FUNC_START(csum_partial_copy_generic) pushl %ebx pushl %edi pushl %esi @@ -483,7 +483,7 @@ DST( movb %dl, (%edi) ) popl %edi popl %ebx ret -ENDPROC(csum_partial_copy_generic) +SYM_FUNC_END(csum_partial_copy_generic) #undef ROUND #undef ROUND1 diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 75a5a4515fa7..c4c7dd115953 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -13,15 +13,15 @@ * Zero a page. * %rdi - page */ -ENTRY(clear_page_rep) +SYM_FUNC_START(clear_page_rep) movl $4096/8,%ecx xorl %eax,%eax rep stosq ret -ENDPROC(clear_page_rep) +SYM_FUNC_END(clear_page_rep) EXPORT_SYMBOL_GPL(clear_page_rep) -ENTRY(clear_page_orig) +SYM_FUNC_START(clear_page_orig) xorl %eax,%eax movl $4096/64,%ecx .p2align 4 @@ -40,13 +40,13 @@ ENTRY(clear_page_orig) jnz .Lloop nop ret -ENDPROC(clear_page_orig) +SYM_FUNC_END(clear_page_orig) EXPORT_SYMBOL_GPL(clear_page_orig) -ENTRY(clear_page_erms) +SYM_FUNC_START(clear_page_erms) movl $4096,%ecx xorl %eax,%eax rep stosb ret -ENDPROC(clear_page_erms) +SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index d63185698a23..3542502faa3b 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -13,7 +13,7 @@ * %rcx : high 64 bits of new value * %al : Operation successful */ -ENTRY(this_cpu_cmpxchg16b_emu) +SYM_FUNC_START(this_cpu_cmpxchg16b_emu) # # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not @@ -44,4 +44,4 @@ ENTRY(this_cpu_cmpxchg16b_emu) xor %al,%al ret -ENDPROC(this_cpu_cmpxchg16b_emu) +SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 691d80e97488..ca01ed6029f4 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -13,7 +13,7 @@ * %ebx : low 32 bits of new value * %ecx : high 32 bits of new value */ -ENTRY(cmpxchg8b_emu) +SYM_FUNC_START(cmpxchg8b_emu) # # Emulate 'cmpxchg8b (%esi)' on UP except we don't @@ -42,5 +42,5 @@ ENTRY(cmpxchg8b_emu) popfl ret -ENDPROC(cmpxchg8b_emu) +SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index fd2d09afa097..2402d4c489d2 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -13,15 +13,15 @@ * prefetch distance based on SMP/UP. */ ALIGN -ENTRY(copy_page) +SYM_FUNC_START(copy_page) ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq ret -ENDPROC(copy_page) +SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) -ENTRY(copy_page_regs) +SYM_FUNC_START_LOCAL(copy_page_regs) subq $2*8, %rsp movq %rbx, (%rsp) movq %r12, 1*8(%rsp) @@ -86,4 +86,4 @@ ENTRY(copy_page_regs) movq 1*8(%rsp), %r12 addq $2*8, %rsp ret -ENDPROC(copy_page_regs) +SYM_FUNC_END(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 86976b55ae74..816f128a6d52 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -53,7 +53,7 @@ * Output: * eax uncopied bytes or 0 if successful. */ -ENTRY(copy_user_generic_unrolled) +SYM_FUNC_START(copy_user_generic_unrolled) ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ @@ -136,7 +136,7 @@ ENTRY(copy_user_generic_unrolled) _ASM_EXTABLE_UA(19b, 40b) _ASM_EXTABLE_UA(21b, 50b) _ASM_EXTABLE_UA(22b, 50b) -ENDPROC(copy_user_generic_unrolled) +SYM_FUNC_END(copy_user_generic_unrolled) EXPORT_SYMBOL(copy_user_generic_unrolled) /* Some CPUs run faster using the string copy instructions. @@ -157,7 +157,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled) * Output: * eax uncopied bytes or 0 if successful. */ -ENTRY(copy_user_generic_string) +SYM_FUNC_START(copy_user_generic_string) ASM_STAC cmpl $8,%edx jb 2f /* less than 8 bytes, go to byte copy loop */ @@ -182,7 +182,7 @@ ENTRY(copy_user_generic_string) _ASM_EXTABLE_UA(1b, 11b) _ASM_EXTABLE_UA(3b, 12b) -ENDPROC(copy_user_generic_string) +SYM_FUNC_END(copy_user_generic_string) EXPORT_SYMBOL(copy_user_generic_string) /* @@ -197,7 +197,7 @@ EXPORT_SYMBOL(copy_user_generic_string) * Output: * eax uncopied bytes or 0 if successful. */ -ENTRY(copy_user_enhanced_fast_string) +SYM_FUNC_START(copy_user_enhanced_fast_string) ASM_STAC cmpl $64,%edx jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ @@ -214,7 +214,7 @@ ENTRY(copy_user_enhanced_fast_string) .previous _ASM_EXTABLE_UA(1b, 12b) -ENDPROC(copy_user_enhanced_fast_string) +SYM_FUNC_END(copy_user_enhanced_fast_string) EXPORT_SYMBOL(copy_user_enhanced_fast_string) /* @@ -230,8 +230,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * Output: * eax uncopied bytes or 0 if successful. */ -ALIGN; -.Lcopy_user_handle_tail: +SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) movl %edx,%ecx 1: rep movsb 2: mov %ecx,%eax @@ -239,7 +238,7 @@ ALIGN; ret _ASM_EXTABLE_UA(1b, 2b) -END(.Lcopy_user_handle_tail) +SYM_CODE_END(.Lcopy_user_handle_tail) /* * copy_user_nocache - Uncached memory copy with exception handling @@ -250,7 +249,7 @@ END(.Lcopy_user_handle_tail) * - Require 8-byte alignment when size is 8 bytes or larger. * - Require 4-byte alignment when size is 4 bytes. */ -ENTRY(__copy_user_nocache) +SYM_FUNC_START(__copy_user_nocache) ASM_STAC /* If size is less than 8 bytes, go to 4-byte copy */ @@ -389,5 +388,5 @@ ENTRY(__copy_user_nocache) _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) -ENDPROC(__copy_user_nocache) +SYM_FUNC_END(__copy_user_nocache) EXPORT_SYMBOL(__copy_user_nocache) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index a4a379e79259..3394a8ff7fd0 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -49,7 +49,7 @@ .endm -ENTRY(csum_partial_copy_generic) +SYM_FUNC_START(csum_partial_copy_generic) cmpl $3*64, %edx jle .Lignore @@ -225,4 +225,4 @@ ENTRY(csum_partial_copy_generic) jz .Lende movl $-EFAULT, (%rax) jmp .Lende -ENDPROC(csum_partial_copy_generic) +SYM_FUNC_END(csum_partial_copy_generic) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 9578eb88fc87..c8a85b512796 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -36,7 +36,7 @@ #include <asm/export.h> .text -ENTRY(__get_user_1) +SYM_FUNC_START(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user @@ -47,10 +47,10 @@ ENTRY(__get_user_1) xor %eax,%eax ASM_CLAC ret -ENDPROC(__get_user_1) +SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) -ENTRY(__get_user_2) +SYM_FUNC_START(__get_user_2) add $1,%_ASM_AX jc bad_get_user mov PER_CPU_VAR(current_task), %_ASM_DX @@ -63,10 +63,10 @@ ENTRY(__get_user_2) xor %eax,%eax ASM_CLAC ret -ENDPROC(__get_user_2) +SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) -ENTRY(__get_user_4) +SYM_FUNC_START(__get_user_4) add $3,%_ASM_AX jc bad_get_user mov PER_CPU_VAR(current_task), %_ASM_DX @@ -79,10 +79,10 @@ ENTRY(__get_user_4) xor %eax,%eax ASM_CLAC ret -ENDPROC(__get_user_4) +SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) -ENTRY(__get_user_8) +SYM_FUNC_START(__get_user_8) #ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user @@ -111,25 +111,27 @@ ENTRY(__get_user_8) ASM_CLAC ret #endif -ENDPROC(__get_user_8) +SYM_FUNC_END(__get_user_8) EXPORT_SYMBOL(__get_user_8) -.Lbad_get_user_clac: +SYM_CODE_START_LOCAL(.Lbad_get_user_clac) ASM_CLAC bad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX ret +SYM_CODE_END(.Lbad_get_user_clac) #ifdef CONFIG_X86_32 -.Lbad_get_user_8_clac: +SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac) ASM_CLAC bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX ret +SYM_CODE_END(.Lbad_get_user_8_clac) #endif _ASM_EXTABLE_UA(1b, .Lbad_get_user_clac) diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index a14f9939c365..dbf8cc97b7f5 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -8,7 +8,7 @@ * unsigned int __sw_hweight32(unsigned int w) * %rdi: w */ -ENTRY(__sw_hweight32) +SYM_FUNC_START(__sw_hweight32) #ifdef CONFIG_X86_64 movl %edi, %eax # w @@ -33,10 +33,10 @@ ENTRY(__sw_hweight32) shrl $24, %eax # w = w_tmp >> 24 __ASM_SIZE(pop,) %__ASM_REG(dx) ret -ENDPROC(__sw_hweight32) +SYM_FUNC_END(__sw_hweight32) EXPORT_SYMBOL(__sw_hweight32) -ENTRY(__sw_hweight64) +SYM_FUNC_START(__sw_hweight64) #ifdef CONFIG_X86_64 pushq %rdi pushq %rdx @@ -79,5 +79,5 @@ ENTRY(__sw_hweight64) popl %ecx ret #endif -ENDPROC(__sw_hweight64) +SYM_FUNC_END(__sw_hweight64) EXPORT_SYMBOL(__sw_hweight64) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index a9bdf0805be0..cb5a1964506b 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -8,8 +8,8 @@ /* * override generic version in lib/iomap_copy.c */ -ENTRY(__iowrite32_copy) +SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx rep movsd ret -ENDPROC(__iowrite32_copy) +SYM_FUNC_END(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 92748660ba51..56b243b14c3a 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -28,8 +28,8 @@ * Output: * rax original destination */ -ENTRY(__memcpy) -ENTRY(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) +SYM_FUNC_START_LOCAL(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -41,8 +41,8 @@ ENTRY(memcpy) movl %edx, %ecx rep movsb ret -ENDPROC(memcpy) -ENDPROC(__memcpy) +SYM_FUNC_END(memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) EXPORT_SYMBOL(__memcpy) @@ -50,14 +50,14 @@ EXPORT_SYMBOL(__memcpy) * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. */ -ENTRY(memcpy_erms) +SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb ret -ENDPROC(memcpy_erms) +SYM_FUNC_END(memcpy_erms) -ENTRY(memcpy_orig) +SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax cmpq $0x20, %rdx @@ -182,7 +182,7 @@ ENTRY(memcpy_orig) .Lend: retq -ENDPROC(memcpy_orig) +SYM_FUNC_END(memcpy_orig) #ifndef CONFIG_UML @@ -193,7 +193,7 @@ MCSAFE_TEST_CTL * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(__memcpy_mcsafe) +SYM_FUNC_START(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe) xorl %eax, %eax .L_done: ret -ENDPROC(__memcpy_mcsafe) +SYM_FUNC_END(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index bbec69d8223b..337830d7a59c 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -26,8 +26,8 @@ */ .weak memmove -ENTRY(memmove) -ENTRY(__memmove) +SYM_FUNC_START_ALIAS(memmove) +SYM_FUNC_START(__memmove) /* Handle more 32 bytes in loop */ mov %rdi, %rax @@ -207,7 +207,7 @@ ENTRY(__memmove) movb %r11b, (%rdi) 13: retq -ENDPROC(__memmove) -ENDPROC(memmove) +SYM_FUNC_END(__memmove) +SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9bc861c71e75..9ff15ee404a4 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -19,8 +19,8 @@ * * rax original destination */ -ENTRY(memset) -ENTRY(__memset) +SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended * to use it when possible. If not available, use fast string instructions. @@ -43,8 +43,8 @@ ENTRY(__memset) rep stosb movq %r9,%rax ret -ENDPROC(memset) -ENDPROC(__memset) +SYM_FUNC_END(__memset) +SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) @@ -59,16 +59,16 @@ EXPORT_SYMBOL(__memset) * * rax original destination */ -ENTRY(memset_erms) +SYM_FUNC_START_LOCAL(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax ret -ENDPROC(memset_erms) +SYM_FUNC_END(memset_erms) -ENTRY(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ @@ -139,4 +139,4 @@ ENTRY(memset_orig) subq %r8,%rdx jmp .Lafter_bad_alignment .Lfinal: -ENDPROC(memset_orig) +SYM_FUNC_END(memset_orig) diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index ed33cbab3958..a2b9caa5274c 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -12,7 +12,7 @@ * */ .macro op_safe_regs op -ENTRY(\op\()_safe_regs) +SYM_FUNC_START(\op\()_safe_regs) pushq %rbx pushq %r12 movq %rdi, %r10 /* Save pointer */ @@ -41,13 +41,13 @@ ENTRY(\op\()_safe_regs) jmp 2b _ASM_EXTABLE(1b, 3b) -ENDPROC(\op\()_safe_regs) +SYM_FUNC_END(\op\()_safe_regs) .endm #else /* X86_32 */ .macro op_safe_regs op -ENTRY(\op\()_safe_regs) +SYM_FUNC_START(\op\()_safe_regs) pushl %ebx pushl %ebp pushl %esi @@ -83,7 +83,7 @@ ENTRY(\op\()_safe_regs) jmp 2b _ASM_EXTABLE(1b, 3b) -ENDPROC(\op\()_safe_regs) +SYM_FUNC_END(\op\()_safe_regs) .endm #endif diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 126dd6a9ec9b..7c7c92db8497 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -34,7 +34,7 @@ #define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX .text -ENTRY(__put_user_1) +SYM_FUNC_START(__put_user_1) ENTER cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX jae .Lbad_put_user @@ -43,10 +43,10 @@ ENTRY(__put_user_1) xor %eax,%eax ASM_CLAC ret -ENDPROC(__put_user_1) +SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) -ENTRY(__put_user_2) +SYM_FUNC_START(__put_user_2) ENTER mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $1,%_ASM_BX @@ -57,10 +57,10 @@ ENTRY(__put_user_2) xor %eax,%eax ASM_CLAC ret -ENDPROC(__put_user_2) +SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) -ENTRY(__put_user_4) +SYM_FUNC_START(__put_user_4) ENTER mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $3,%_ASM_BX @@ -71,10 +71,10 @@ ENTRY(__put_user_4) xor %eax,%eax ASM_CLAC ret -ENDPROC(__put_user_4) +SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) -ENTRY(__put_user_8) +SYM_FUNC_START(__put_user_8) ENTER mov TASK_addr_limit(%_ASM_BX),%_ASM_BX sub $7,%_ASM_BX @@ -88,14 +88,15 @@ ENTRY(__put_user_8) xor %eax,%eax ASM_CLAC RET -ENDPROC(__put_user_8) +SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) -.Lbad_put_user_clac: +SYM_CODE_START_LOCAL(.Lbad_put_user_clac) ASM_CLAC .Lbad_put_user: movl $-EFAULT,%eax RET +SYM_CODE_END(.Lbad_put_user_clac) _ASM_EXTABLE_UA(1b, .Lbad_put_user_clac) _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961e678a..363ec132df7e 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -11,11 +11,11 @@ .macro THUNK reg .section .text.__x86.indirect_thunk -ENTRY(__x86_indirect_thunk_\reg) +SYM_FUNC_START(__x86_indirect_thunk_\reg) CFI_STARTPROC JMP_NOSPEC %\reg CFI_ENDPROC -ENDPROC(__x86_indirect_thunk_\reg) +SYM_FUNC_END(__x86_indirect_thunk_\reg) .endm /* diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S index ee08449d20fd..951da2ad54bb 100644 --- a/arch/x86/math-emu/div_Xsig.S +++ b/arch/x86/math-emu/div_Xsig.S @@ -75,7 +75,7 @@ FPU_result_1: .text -ENTRY(div_Xsig) +SYM_FUNC_START(div_Xsig) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU @@ -364,4 +364,4 @@ L_bugged_2: pop %ebx jmp L_exit #endif /* PARANOID */ -ENDPROC(div_Xsig) +SYM_FUNC_END(div_Xsig) diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S index 8f5025c80ee0..d047d1816abe 100644 --- a/arch/x86/math-emu/div_small.S +++ b/arch/x86/math-emu/div_small.S @@ -19,7 +19,7 @@ #include "fpu_emu.h" .text -ENTRY(FPU_div_small) +SYM_FUNC_START(FPU_div_small) pushl %ebp movl %esp,%ebp @@ -45,4 +45,4 @@ ENTRY(FPU_div_small) leave ret -ENDPROC(FPU_div_small) +SYM_FUNC_END(FPU_div_small) diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index f98a0c956764..9b41391867dc 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -107,6 +107,8 @@ static inline bool seg_writable(struct desc_struct *d) #define FPU_access_ok(y,z) if ( !access_ok(y,z) ) \ math_abort(FPU_info,SIGSEGV) #define FPU_abort math_abort(FPU_info, SIGSEGV) +#define FPU_copy_from_user(to, from, n) \ + do { if (copy_from_user(to, from, n)) FPU_abort; } while (0) #undef FPU_IGNORE_CODE_SEGV #ifdef FPU_IGNORE_CODE_SEGV @@ -122,7 +124,7 @@ static inline bool seg_writable(struct desc_struct *d) #define FPU_code_access_ok(z) FPU_access_ok((void __user *)FPU_EIP,z) #endif -#define FPU_get_user(x,y) get_user((x),(y)) -#define FPU_put_user(x,y) put_user((x),(y)) +#define FPU_get_user(x,y) do { if (get_user((x),(y))) FPU_abort; } while (0) +#define FPU_put_user(x,y) do { if (put_user((x),(y))) FPU_abort; } while (0) #endif diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S index 3e489122a2b0..4afc7b1fa6e9 100644 --- a/arch/x86/math-emu/mul_Xsig.S +++ b/arch/x86/math-emu/mul_Xsig.S @@ -25,7 +25,7 @@ #include "fpu_emu.h" .text -ENTRY(mul32_Xsig) +SYM_FUNC_START(mul32_Xsig) pushl %ebp movl %esp,%ebp subl $16,%esp @@ -63,10 +63,10 @@ ENTRY(mul32_Xsig) popl %esi leave ret -ENDPROC(mul32_Xsig) +SYM_FUNC_END(mul32_Xsig) -ENTRY(mul64_Xsig) +SYM_FUNC_START(mul64_Xsig) pushl %ebp movl %esp,%ebp subl $16,%esp @@ -116,11 +116,11 @@ ENTRY(mul64_Xsig) popl %esi leave ret -ENDPROC(mul64_Xsig) +SYM_FUNC_END(mul64_Xsig) -ENTRY(mul_Xsig_Xsig) +SYM_FUNC_START(mul_Xsig_Xsig) pushl %ebp movl %esp,%ebp subl $16,%esp @@ -176,4 +176,4 @@ ENTRY(mul_Xsig_Xsig) popl %esi leave ret -ENDPROC(mul_Xsig_Xsig) +SYM_FUNC_END(mul_Xsig_Xsig) diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S index 604f0b2d17e8..702315eecb86 100644 --- a/arch/x86/math-emu/polynom_Xsig.S +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -37,7 +37,7 @@ #define OVERFLOWED -16(%ebp) /* addition overflow flag */ .text -ENTRY(polynomial_Xsig) +SYM_FUNC_START(polynomial_Xsig) pushl %ebp movl %esp,%ebp subl $32,%esp @@ -134,4 +134,4 @@ L_accum_done: popl %esi leave ret -ENDPROC(polynomial_Xsig) +SYM_FUNC_END(polynomial_Xsig) diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index f3779743d15e..fe6246ff9887 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -85,7 +85,7 @@ int FPU_load_extended(long double __user *s, int stnr) RE_ENTRANT_CHECK_OFF; FPU_access_ok(s, 10); - __copy_from_user(sti_ptr, s, 10); + FPU_copy_from_user(sti_ptr, s, 10); RE_ENTRANT_CHECK_ON; return FPU_tagof(sti_ptr); @@ -1126,9 +1126,9 @@ void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) /* Copy all registers in stack order. */ RE_ENTRANT_CHECK_OFF; FPU_access_ok(s, 80); - __copy_from_user(register_base + offset, s, other); + FPU_copy_from_user(register_base + offset, s, other); if (offset) - __copy_from_user(register_base, s + other, offset); + FPU_copy_from_user(register_base, s + other, offset); RE_ENTRANT_CHECK_ON; for (i = 0; i < 8; i++) { diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S index 7f6b4392a15d..cad1d60b1e84 100644 --- a/arch/x86/math-emu/reg_norm.S +++ b/arch/x86/math-emu/reg_norm.S @@ -22,7 +22,7 @@ .text -ENTRY(FPU_normalize) +SYM_FUNC_START(FPU_normalize) pushl %ebp movl %esp,%ebp pushl %ebx @@ -95,12 +95,12 @@ L_overflow: call arith_overflow pop %ebx jmp L_exit -ENDPROC(FPU_normalize) +SYM_FUNC_END(FPU_normalize) /* Normalise without reporting underflow or overflow */ -ENTRY(FPU_normalize_nuo) +SYM_FUNC_START(FPU_normalize_nuo) pushl %ebp movl %esp,%ebp pushl %ebx @@ -147,4 +147,4 @@ L_exit_nuo_zero: popl %ebx leave ret -ENDPROC(FPU_normalize_nuo) +SYM_FUNC_END(FPU_normalize_nuo) diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S index 04563421ee7d..11a1f798451b 100644 --- a/arch/x86/math-emu/reg_round.S +++ b/arch/x86/math-emu/reg_round.S @@ -109,7 +109,7 @@ FPU_denormal: .globl fpu_Arith_exit /* Entry point when called from C */ -ENTRY(FPU_round) +SYM_FUNC_START(FPU_round) pushl %ebp movl %esp,%ebp pushl %esi @@ -708,4 +708,4 @@ L_exception_exit: jmp fpu_reg_round_special_exit #endif /* PARANOID */ -ENDPROC(FPU_round) +SYM_FUNC_END(FPU_round) diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S index 50fe9f8c893c..9c9e2c810afe 100644 --- a/arch/x86/math-emu/reg_u_add.S +++ b/arch/x86/math-emu/reg_u_add.S @@ -32,7 +32,7 @@ #include "control_w.h" .text -ENTRY(FPU_u_add) +SYM_FUNC_START(FPU_u_add) pushl %ebp movl %esp,%ebp pushl %esi @@ -166,4 +166,4 @@ L_exit: leave ret #endif /* PARANOID */ -ENDPROC(FPU_u_add) +SYM_FUNC_END(FPU_u_add) diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S index 94d545e118e4..e2fb5c2644c5 100644 --- a/arch/x86/math-emu/reg_u_div.S +++ b/arch/x86/math-emu/reg_u_div.S @@ -75,7 +75,7 @@ FPU_ovfl_flag: #define DEST PARAM3 .text -ENTRY(FPU_u_div) +SYM_FUNC_START(FPU_u_div) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU @@ -471,4 +471,4 @@ L_exit: ret #endif /* PARANOID */ -ENDPROC(FPU_u_div) +SYM_FUNC_END(FPU_u_div) diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S index 21cde47fb3e5..0c779c87ac5b 100644 --- a/arch/x86/math-emu/reg_u_mul.S +++ b/arch/x86/math-emu/reg_u_mul.S @@ -45,7 +45,7 @@ FPU_accum_1: .text -ENTRY(FPU_u_mul) +SYM_FUNC_START(FPU_u_mul) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU @@ -147,4 +147,4 @@ L_exit: ret #endif /* PARANOID */ -ENDPROC(FPU_u_mul) +SYM_FUNC_END(FPU_u_mul) diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S index f05dea7dec38..e9bb7c248649 100644 --- a/arch/x86/math-emu/reg_u_sub.S +++ b/arch/x86/math-emu/reg_u_sub.S @@ -33,7 +33,7 @@ #include "control_w.h" .text -ENTRY(FPU_u_sub) +SYM_FUNC_START(FPU_u_sub) pushl %ebp movl %esp,%ebp pushl %esi @@ -271,4 +271,4 @@ L_exit: popl %esi leave ret -ENDPROC(FPU_u_sub) +SYM_FUNC_END(FPU_u_sub) diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S index 226a51e991f1..d9d7de8dbd7b 100644 --- a/arch/x86/math-emu/round_Xsig.S +++ b/arch/x86/math-emu/round_Xsig.S @@ -23,7 +23,7 @@ .text -ENTRY(round_Xsig) +SYM_FUNC_START(round_Xsig) pushl %ebp movl %esp,%ebp pushl %ebx /* Reserve some space */ @@ -79,11 +79,11 @@ L_exit: popl %ebx leave ret -ENDPROC(round_Xsig) +SYM_FUNC_END(round_Xsig) -ENTRY(norm_Xsig) +SYM_FUNC_START(norm_Xsig) pushl %ebp movl %esp,%ebp pushl %ebx /* Reserve some space */ @@ -139,4 +139,4 @@ L_n_exit: popl %ebx leave ret -ENDPROC(norm_Xsig) +SYM_FUNC_END(norm_Xsig) diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S index 96f4779aa9c1..726af985f758 100644 --- a/arch/x86/math-emu/shr_Xsig.S +++ b/arch/x86/math-emu/shr_Xsig.S @@ -22,7 +22,7 @@ #include "fpu_emu.h" .text -ENTRY(shr_Xsig) +SYM_FUNC_START(shr_Xsig) push %ebp movl %esp,%ebp pushl %esi @@ -86,4 +86,4 @@ L_more_than_95: popl %esi leave ret -ENDPROC(shr_Xsig) +SYM_FUNC_END(shr_Xsig) diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S index d588874eb6fb..4fc89174caf0 100644 --- a/arch/x86/math-emu/wm_shrx.S +++ b/arch/x86/math-emu/wm_shrx.S @@ -33,7 +33,7 @@ | Results returned in the 64 bit arg and eax. | +---------------------------------------------------------------------------*/ -ENTRY(FPU_shrx) +SYM_FUNC_START(FPU_shrx) push %ebp movl %esp,%ebp pushl %esi @@ -93,7 +93,7 @@ L_more_than_95: popl %esi leave ret -ENDPROC(FPU_shrx) +SYM_FUNC_END(FPU_shrx) /*---------------------------------------------------------------------------+ @@ -112,7 +112,7 @@ ENDPROC(FPU_shrx) | part which has been shifted out of the arg. | | Results returned in the 64 bit arg and eax. | +---------------------------------------------------------------------------*/ -ENTRY(FPU_shrxs) +SYM_FUNC_START(FPU_shrxs) push %ebp movl %esp,%ebp pushl %esi @@ -204,4 +204,4 @@ Ls_more_than_95: popl %esi leave ret -ENDPROC(FPU_shrxs) +SYM_FUNC_END(FPU_shrxs) diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S index f031c0e19356..3b2b58164ec1 100644 --- a/arch/x86/math-emu/wm_sqrt.S +++ b/arch/x86/math-emu/wm_sqrt.S @@ -75,7 +75,7 @@ FPU_fsqrt_arg_0: .text -ENTRY(wm_sqrt) +SYM_FUNC_START(wm_sqrt) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU @@ -469,4 +469,4 @@ sqrt_more_prec_large: /* Our estimate is too large */ movl $0x7fffff00,%eax jmp sqrt_round_result -ENDPROC(wm_sqrt) +SYM_FUNC_END(wm_sqrt) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index bbc68a54795e..3b89c201ac26 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_rbtree.o +obj-$(CONFIG_X86_PAT) += pat_interval.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 752ad11d6868..82ead8e27888 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -161,6 +161,14 @@ static void __init setup_cpu_entry_area(unsigned int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + /* + * VMX changes the host TR limit to 0x67 after a VM exit. This is + * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure + * that this is correct. + */ + BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0); + BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68); + cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); @@ -178,7 +186,9 @@ static __init void setup_cpu_entry_area_ptes(void) #ifdef CONFIG_X86_32 unsigned long start, end; - BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + /* The +1 is for the readonly IDT: */ + BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); + BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4d75bc656f97..30bb0bd3b1b8 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -45,55 +45,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, EXPORT_SYMBOL_GPL(ex_handler_fault); /* - * Handler for UD0 exception following a failed test against the - * result of a refcount inc/dec/add/sub. - */ -__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - /* First unconditionally saturate the refcount. */ - *(int *)regs->cx = INT_MIN / 2; - - /* - * Strictly speaking, this reports the fixup destination, not - * the fault location, and not the actually overflowing - * instruction, which is the instruction before the "js", but - * since that instruction could be a variety of lengths, just - * report the location after the overflow, which should be close - * enough for finding the overflow, as it's at least back in - * the function, having returned from .text.unlikely. - */ - regs->ip = ex_fixup_addr(fixup); - - /* - * This function has been called because either a negative refcount - * value was seen by any of the refcount functions, or a zero - * refcount value was seen by refcount_dec(). - * - * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result - * wrapped around) will be set. Additionally, seeing the refcount - * reach 0 will set ZF (Zero Flag: result was zero). In each of - * these cases we want a report, since it's a boundary condition. - * The SF case is not reported since it indicates post-boundary - * manipulations below zero or above INT_MAX. And if none of the - * flags are set, something has gone very wrong, so report it. - */ - if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { - bool zero = regs->flags & X86_EFLAGS_ZF; - - refcount_error_report(regs, zero ? "hit zero" : "overflow"); - } else if ((regs->flags & X86_EFLAGS_SF) == 0) { - /* Report if none of OF, ZF, nor SF are set. */ - refcount_error_report(regs, "unexpected saturation"); - } - - return true; -} -EXPORT_SYMBOL(ex_handler_refcount); - -/* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) * should always be valid. However, past bugs have allowed userspace to set diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd10d91a6115..e7bb483557c9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -829,14 +829,13 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) * used for the kernel image only. free_init_pages() will do the * right thing for either kind of address. */ -void free_kernel_image_pages(void *begin, void *end) +void free_kernel_image_pages(const char *what, void *begin, void *end) { unsigned long begin_ul = (unsigned long)begin; unsigned long end_ul = (unsigned long)end; unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; - - free_init_pages("unused kernel image", begin_ul, end_ul); + free_init_pages(what, begin_ul, end_ul); /* * PTI maps some of the kernel into userspace. For performance, @@ -865,7 +864,8 @@ void __ref free_initmem(void) mem_encrypt_free_decrypted_mem(); - free_kernel_image_pages(&__init_begin, &__init_end); + free_kernel_image_pages("unused kernel image (initmem)", + &__init_begin, &__init_end); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a6b5c653727b..dcb9bc961b39 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1263,7 +1263,7 @@ int kernel_set_to_readonly; void set_kernel_text_rw(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1282,7 +1282,7 @@ void set_kernel_text_rw(void) void set_kernel_text_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1300,9 +1300,9 @@ void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); unsigned long rodata_start = PFN_ALIGN(__start_rodata); - unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PFN_ALIGN(&__stop___ex_table); - unsigned long rodata_end = PFN_ALIGN(&__end_rodata); + unsigned long end = (unsigned long)__end_rodata_hpage_align; + unsigned long text_end = PFN_ALIGN(_etext); + unsigned long rodata_end = PFN_ALIGN(__end_rodata); unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", @@ -1334,8 +1334,10 @@ void mark_rodata_ro(void) set_memory_ro(start, (end-start) >> PAGE_SHIFT); #endif - free_kernel_image_pages((void *)text_end, (void *)rodata_start); - free_kernel_image_pages((void *)rodata_end, (void *)_sdata); + free_kernel_image_pages("unused kernel image (text/rodata gap)", + (void *)text_end, (void *)rodata_start); + free_kernel_image_pages("unused kernel image (rodata/data gap)", + (void *)rodata_end, (void *)_sdata); debug_checkwx(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a39dcdb5ae34..1ff9c2030b4f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -626,6 +626,17 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, paddr_next = data->next; len = data->len; + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + memunmap(data); + return true; + } + + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { + paddr = ((struct setup_indirect *)data->data)->addr; + len = ((struct setup_indirect *)data->data)->len; + } + memunmap(data); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 6d71481a1e70..106ead05bbe3 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -16,7 +16,7 @@ .text .code64 -ENTRY(sme_encrypt_execute) +SYM_FUNC_START(sme_encrypt_execute) /* * Entry parameters: @@ -66,9 +66,9 @@ ENTRY(sme_encrypt_execute) pop %rbp ret -ENDPROC(sme_encrypt_execute) +SYM_FUNC_END(sme_encrypt_execute) -ENTRY(__enc_copy) +SYM_FUNC_START(__enc_copy) /* * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since @@ -153,4 +153,4 @@ ENTRY(__enc_copy) ret .L__enc_copy_end: -ENDPROC(__enc_copy) +SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 4123100e0eaf..99f7a68738f0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -699,7 +699,7 @@ static int __init dummy_numa_init(void) * x86_numa_init - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory and + * last fallback is dummy single node config encompassing whole memory and * never fails. */ void __init x86_numa_init(void) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d9fbd4f69920..2d758e19ef22 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_lock(&memtype_lock); - err = rbt_memtype_check_insert(new, new_type); + err = memtype_check_insert(new, new_type); if (err) { pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, @@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end) } spin_lock(&memtype_lock); - entry = rbt_memtype_erase(start, end); + entry = memtype_erase(start, end); spin_unlock(&memtype_lock); if (IS_ERR(entry)) { @@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) spin_lock(&memtype_lock); - entry = rbt_memtype_lookup(paddr); + entry = memtype_lookup(paddr); if (entry != NULL) rettype = entry->type; else @@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos) return NULL; spin_lock(&memtype_lock); - ret = rbt_memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); if (!ret) { diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index eeb5caeb089b..79a06684349e 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type); -extern struct memtype *rbt_memtype_erase(u64 start, u64 end); -extern struct memtype *rbt_memtype_lookup(u64 addr); -extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type); +extern struct memtype *memtype_erase(u64 start, u64 end); +extern struct memtype *memtype_lookup(u64 addr); +extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); #else -static inline int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type) +static inline int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type) { return 0; } -static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) +static inline struct memtype *memtype_erase(u64 start, u64 end) { return NULL; } -static inline struct memtype *rbt_memtype_lookup(u64 addr) +static inline struct memtype *memtype_lookup(u64 addr) { return NULL; } -static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) { return 0; } #endif diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c new file mode 100644 index 000000000000..47a1bf30748f --- /dev/null +++ b/arch/x86/mm/pat_interval.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Suresh B Siddha <suresh.b.siddha@intel.com> + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/interval_tree_generic.h> +#include <linux/sched.h> +#include <linux/gfp.h> + +#include <asm/pgtable.h> +#include <asm/pat.h> + +#include "pat_internal.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) with tree ordered + * on starting address. Tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course. + * + * memtype_lock protects the rbtree. + */ +static inline u64 memtype_interval_start(struct memtype *memtype) +{ + return memtype->start; +} + +static inline u64 memtype_interval_end(struct memtype *memtype) +{ + return memtype->end - 1; +} +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + memtype_interval_start, memtype_interval_end, + static, memtype_interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *match; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end); + while (match != NULL && match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (match->start == start) && (match->end == end)) + return match; + + if ((match_type == MEMTYPE_END_MATCH) && + (match->start < start) && (match->end == end)) + return match; + + match = memtype_interval_iter_next(match, start, end); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *match; + enum page_cache_mode found_type = reqtype; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end); + if (match == NULL) + goto success; + + if (match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); + found_type = match->type; + + match = memtype_interval_iter_next(match, start, end); + while (match) { + if (match->type != found_type) + goto failure; + + match = memtype_interval_iter_next(match, start, end); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(match->type)); + return -EBUSY; +} + +int memtype_check_insert(struct memtype *new, + enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(new->start, new->end, new->type, ret_type); + if (err) + return err; + + if (ret_type) + new->type = *ret_type; + + memtype_interval_insert(new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *data; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!data) { + data = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!data) + return ERR_PTR(-EINVAL); + } + + if (data->start == start) { + /* munmap: erase this node */ + memtype_interval_remove(data, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + memtype_interval_remove(data, &memtype_rbroot); + data->end = start; + memtype_interval_insert(data, &memtype_rbroot); + return NULL; + } + + return data; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return memtype_interval_iter_first(&memtype_rbroot, addr, + addr + PAGE_SIZE); +} + +#if defined(CONFIG_DEBUG_FS) +int memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ + struct memtype *match; + int i = 1; + + match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + while (match && pos != i) { + match = memtype_interval_iter_next(match, 0, ULONG_MAX); + i++; + } + + if (match) { /* pos == i */ + *out = *match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c deleted file mode 100644 index 65ebe4b88f7c..000000000000 --- a/arch/x86/mm/pat_rbtree.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Interval tree (augmented rbtree) used to store the PAT memory type - * reservations. - */ - -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/rbtree_augmented.h> -#include <linux/sched.h> -#include <linux/gfp.h> - -#include <asm/pgtable.h> -#include <asm/pat.h> - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ - -static struct rb_root memtype_rbroot = RB_ROOT; - -static int is_node_overlap(struct memtype *node, u64 start, u64 end) -{ - if (node->start >= end || node->end <= start) - return 0; - - return 1; -} - -static u64 get_subtree_max_end(struct rb_node *node) -{ - u64 ret = 0; - if (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - ret = data->subtree_max_end; - } - return ret; -} - -#define NODE_END(node) ((node)->end) - -RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb, - struct memtype, rb, u64, subtree_max_end, NODE_END) - -/* Find the first (lowest start addr) overlapping range from rb tree */ -static struct memtype *memtype_rb_lowest_match(struct rb_root *root, - u64 start, u64 end) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - - if (get_subtree_max_end(node->rb_left) > start) { - /* Lowest overlap if any must be on left side */ - node = node->rb_left; - } else if (is_node_overlap(data, start, end)) { - last_lower = data; - break; - } else if (start >= data->start) { - /* Lowest overlap if any must be on right side */ - node = node->rb_right; - } else { - break; - } - } - return last_lower; /* Returns NULL if there is no overlap */ -} - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_rb_match(struct rb_root *root, - u64 start, u64 end, int match_type) -{ - struct memtype *match; - - match = memtype_rb_lowest_match(root, start, end); - while (match != NULL && match->start < end) { - struct rb_node *node; - - if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; - - if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; - - node = rb_next(&match->rb); - if (node) - match = rb_entry(node, struct memtype, rb); - else - match = NULL; - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_rb_check_conflict(struct rb_root *root, - u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct rb_node *node; - struct memtype *match; - enum page_cache_mode found_type = reqtype; - - match = memtype_rb_lowest_match(&memtype_rbroot, start, end); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - node = rb_next(&match->rb); - while (node) { - match = rb_entry(node, struct memtype, rb); - - if (match->start >= end) /* Checked all possible matches */ - goto success; - - if (is_node_overlap(match, start, end) && - match->type != found_type) { - goto failure; - } - - node = rb_next(&match->rb); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) -{ - struct rb_node **node = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*node) { - struct memtype *data = rb_entry(*node, struct memtype, rb); - - parent = *node; - if (data->subtree_max_end < newdata->end) - data->subtree_max_end = newdata->end; - if (newdata->start <= data->start) - node = &((*node)->rb_left); - else if (newdata->start > data->start) - node = &((*node)->rb_right); - } - - newdata->subtree_max_end = newdata->end; - rb_link_node(&newdata->rb, parent, node); - rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb); -} - -int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, - new->type, ret_type); - - if (!err) { - if (ret_type) - new->type = *ret_type; - - new->subtree_max_end = new->end; - memtype_rb_insert(&memtype_rbroot, new); - } - return err; -} - -struct memtype *rbt_memtype_erase(u64 start, u64 end) -{ - struct memtype *data; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_END_MATCH); - if (!data) - return ERR_PTR(-EINVAL); - } - - if (data->start == start) { - /* munmap: erase this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); - } else { - /* mremap: update the end value of this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); - data->end = start; - data->subtree_max_end = data->end; - memtype_rb_insert(&memtype_rbroot, data); - return NULL; - } - - return data; -} - -struct memtype *rbt_memtype_lookup(u64 addr) -{ - return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); -} - -#if defined(CONFIG_DEBUG_FS) -int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct rb_node *node; - int i = 1; - - node = rb_first(&memtype_rbroot); - while (node && pos != i) { - node = rb_next(node); - i++; - } - - if (node) { /* pos == i */ - struct memtype *this = rb_entry(node, struct memtype, rb); - *out = *this; - return 0; - } else { - return 1; - } -} -#endif diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3e4b9035bb9a..7bd2c3a52297 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -643,8 +643,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ pgprot_val(flags) &= __default_kernel_pte_mask; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 7f2140414440..44a9f068eee0 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -574,7 +574,7 @@ static void pti_clone_kernel_text(void) */ unsigned long start = PFN_ALIGN(_text); unsigned long end_clone = (unsigned long)__end_rodata_aligned; - unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table); + unsigned long end_global = PFN_ALIGN((unsigned long)_etext); if (!pti_kernel_image_global_ok()) return; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 425e025341db..38d44f36d5ed 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -128,6 +128,9 @@ void __init efi_find_mirror(void) efi_memory_desc_t *md; u64 mirror_size = 0, total_size = 0; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -145,14 +148,18 @@ void __init efi_find_mirror(void) /* * Tell the kernel about the EFI memory map. This might include - * more than the max 128 entries that can fit in the e820 legacy - * (zeropage) memory map. + * more than the max 128 entries that can fit in the passed in e820 + * legacy (zeropage) memory map, but the kernel's e820 table can hold + * E820_MAX_ENTRIES. */ static void __init do_add_efi_memmap(void) { efi_memory_desc_t *md; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -164,7 +171,10 @@ static void __init do_add_efi_memmap(void) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - if (md->attribute & EFI_MEMORY_WB) + if (efi_soft_reserve_enabled() + && (md->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else if (md->attribute & EFI_MEMORY_WB) e820_type = E820_TYPE_RAM; else e820_type = E820_TYPE_RESERVED; @@ -190,11 +200,36 @@ static void __init do_add_efi_memmap(void) e820_type = E820_TYPE_RESERVED; break; } + e820__range_add(start, size, e820_type); } e820__update_table(e820_table); } +/* + * Given add_efi_memmap defaults to 0 and there there is no alternative + * e820 mechanism for soft-reserved memory, import the full EFI memory + * map if soft reservations are present and enabled. Otherwise, the + * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is + * the efi=nosoftreserve option. + */ +static bool do_efi_soft_reserve(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return false; + + if (!efi_soft_reserve_enabled()) + return false; + + for_each_efi_memory_desc(md) + if (md->type == EFI_CONVENTIONAL_MEMORY && + (md->attribute & EFI_MEMORY_SP)) + return true; + return false; +} + int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; @@ -224,9 +259,11 @@ int __init efi_memblock_x86_reserve_range(void) if (rv) return rv; - if (add_efi_memmap) + if (add_efi_memmap || do_efi_soft_reserve()) do_add_efi_memmap(); + efi_fake_memmap_early(); + WARN(efi.memmap.desc_version != 1, "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", efi.memmap.desc_version); @@ -779,6 +816,15 @@ static bool should_map_region(efi_memory_desc_t *md) return false; /* + * EFI specific purpose memory may be reserved by default + * depending on kernel config and boot options. + */ + if (md->type == EFI_CONVENTIONAL_MEMORY && + efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return false; + + /* * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index ab2e91e76894..eed8b5b441f8 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -22,7 +22,7 @@ */ .text -ENTRY(efi_call_phys) +SYM_FUNC_START(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found @@ -114,7 +114,7 @@ ENTRY(efi_call_phys) movl (%edx), %ecx pushl %ecx ret -ENDPROC(efi_call_phys) +SYM_FUNC_END(efi_call_phys) .previous .data diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 74628ec78f29..b1d2313fe3bf 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -39,7 +39,7 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp -ENTRY(efi_call) +SYM_FUNC_START(efi_call) pushq %rbp movq %rsp, %rbp SAVE_XMM @@ -55,4 +55,4 @@ ENTRY(efi_call) RESTORE_XMM popq %rbp ret -ENDPROC(efi_call) +SYM_FUNC_END(efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 46c58b08739c..3189f1394701 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,7 +25,7 @@ .text .code64 -ENTRY(efi64_thunk) +SYM_FUNC_START(efi64_thunk) push %rbp push %rbx @@ -60,14 +60,14 @@ ENTRY(efi64_thunk) pop %rbx pop %rbp retq -ENDPROC(efi64_thunk) +SYM_FUNC_END(efi64_thunk) /* * We run this function from the 1:1 mapping. * * This function must be invoked with a 1:1 mapped stack. */ -ENTRY(__efi64_thunk) +SYM_FUNC_START_LOCAL(__efi64_thunk) movl %ds, %eax push %rax movl %es, %eax @@ -114,14 +114,14 @@ ENTRY(__efi64_thunk) or %rcx, %rax 1: ret -ENDPROC(__efi64_thunk) +SYM_FUNC_END(__efi64_thunk) -ENTRY(efi_exit32) +SYM_FUNC_START_LOCAL(efi_exit32) movq func_rt_ptr(%rip), %rax push %rax mov %rdi, %rax ret -ENDPROC(efi_exit32) +SYM_FUNC_END(efi_exit32) .code32 /* @@ -129,7 +129,7 @@ ENDPROC(efi_exit32) * * The stack should represent the 32-bit calling convention. */ -ENTRY(efi_enter32) +SYM_FUNC_START_LOCAL(efi_enter32) movl $__KERNEL_DS, %eax movl %eax, %ds movl %eax, %es @@ -145,7 +145,7 @@ ENTRY(efi_enter32) pushl %eax lret -ENDPROC(efi_enter32) +SYM_FUNC_END(efi_enter32) .data .balign 8 diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 3b9fd679cea9..7675cf754d90 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -320,6 +320,9 @@ void __init efi_reserve_boot_services(void) { efi_memory_desc_t *md; + if (!efi_enabled(EFI_MEMMAP)) + return; + for_each_efi_memory_desc(md) { u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S index 5fee3a2c2fd4..75f4faff8468 100644 --- a/arch/x86/platform/olpc/xo1-wakeup.S +++ b/arch/x86/platform/olpc/xo1-wakeup.S @@ -90,7 +90,7 @@ restore_registers: ret -ENTRY(do_olpc_suspend_lowlevel) +SYM_CODE_START(do_olpc_suspend_lowlevel) call save_processor_state call save_registers @@ -110,6 +110,7 @@ ret_point: call restore_registers call restore_processor_state ret +SYM_CODE_END(do_olpc_suspend_lowlevel) .data saved_gdt: .long 0,0 diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 1f8825bbaffb..43b4d864817e 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -50,7 +50,7 @@ #define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) #define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) -ENTRY(pvh_start_xen) +SYM_CODE_START_LOCAL(pvh_start_xen) cld lgdt (_pa(gdt)) @@ -146,15 +146,16 @@ ENTRY(pvh_start_xen) ljmp $PVH_CS_SEL, $_pa(startup_32) #endif -END(pvh_start_xen) +SYM_CODE_END(pvh_start_xen) .section ".init.data","aw" .balign 8 -gdt: +SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start .long _pa(gdt_start) .word 0 -gdt_start: +SYM_DATA_END(gdt) +SYM_DATA_START_LOCAL(gdt_start) .quad 0x0000000000000000 /* NULL descriptor */ #ifdef CONFIG_X86_64 .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */ @@ -163,15 +164,14 @@ gdt_start: #endif .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */ .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ -gdt_end: +SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) .balign 16 -canary: - .fill 48, 1, 0 +SYM_DATA_LOCAL(canary, .fill 48, 1, 0) -early_stack: +SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 -early_stack_end: +SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index c2ee31953372..ece9cb9c1189 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -184,20 +184,20 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) } EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); -void uv_bios_init(void) +int uv_bios_init(void) { uv_systab = NULL; if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) || !uv_systab_phys || efi_runtime_disabled()) { pr_crit("UV: UVsystab: missing\n"); - return; + return -EEXIST; } uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab)); if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) { pr_err("UV: UVsystab: bad signature!\n"); iounmap(uv_systab); - return; + return -EINVAL; } /* Starting with UV4 the UV systab size is variable */ @@ -208,8 +208,9 @@ void uv_bios_init(void) uv_systab = ioremap(uv_systab_phys, size); if (!uv_systab) { pr_err("UV: UVsystab: ioremap(%d) failed!\n", size); - return; + return -EFAULT; } } pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); + return 0; } diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 6fe383002125..8786653ad3c0 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -16,7 +16,7 @@ .text -ENTRY(swsusp_arch_suspend) +SYM_FUNC_START(swsusp_arch_suspend) movl %esp, saved_context_esp movl %ebx, saved_context_ebx movl %ebp, saved_context_ebp @@ -33,9 +33,9 @@ ENTRY(swsusp_arch_suspend) call swsusp_save FRAME_END ret -ENDPROC(swsusp_arch_suspend) +SYM_FUNC_END(swsusp_arch_suspend) -ENTRY(restore_image) +SYM_CODE_START(restore_image) /* prepare to jump to the image kernel */ movl restore_jump_address, %ebx movl restore_cr3, %ebp @@ -45,9 +45,10 @@ ENTRY(restore_image) /* jump to relocated restore code */ movl relocated_restore_code, %eax jmpl *%eax +SYM_CODE_END(restore_image) /* code below has been relocated to a safe page */ -ENTRY(core_restore_code) +SYM_CODE_START(core_restore_code) movl temp_pgt, %eax movl %eax, %cr3 @@ -77,10 +78,11 @@ copy_loop: done: jmpl *%ebx +SYM_CODE_END(core_restore_code) /* code below belongs to the image kernel */ .align PAGE_SIZE -ENTRY(restore_registers) +SYM_FUNC_START(restore_registers) /* go back to the original page tables */ movl %ebp, %cr3 movl mmu_cr4_features, %ecx @@ -107,4 +109,4 @@ ENTRY(restore_registers) movl %eax, in_suspend ret -ENDPROC(restore_registers) +SYM_FUNC_END(restore_registers) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index a4d5eb0a7ece..7918b8415f13 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -22,7 +22,7 @@ #include <asm/processor-flags.h> #include <asm/frame.h> -ENTRY(swsusp_arch_suspend) +SYM_FUNC_START(swsusp_arch_suspend) movq $saved_context, %rax movq %rsp, pt_regs_sp(%rax) movq %rbp, pt_regs_bp(%rax) @@ -50,9 +50,9 @@ ENTRY(swsusp_arch_suspend) call swsusp_save FRAME_END ret -ENDPROC(swsusp_arch_suspend) +SYM_FUNC_END(swsusp_arch_suspend) -ENTRY(restore_image) +SYM_CODE_START(restore_image) /* prepare to jump to the image kernel */ movq restore_jump_address(%rip), %r8 movq restore_cr3(%rip), %r9 @@ -67,9 +67,10 @@ ENTRY(restore_image) /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx jmpq *%rcx +SYM_CODE_END(restore_image) /* code below has been relocated to a safe page */ -ENTRY(core_restore_code) +SYM_CODE_START(core_restore_code) /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ @@ -97,10 +98,11 @@ ENTRY(core_restore_code) .Ldone: /* jump to the restore_registers address from the image header */ jmpq *%r8 +SYM_CODE_END(core_restore_code) /* code below belongs to the image kernel */ .align PAGE_SIZE -ENTRY(restore_registers) +SYM_FUNC_START(restore_registers) /* go back to the original page tables */ movq %r9, %cr3 @@ -142,4 +144,4 @@ ENTRY(restore_registers) movq %rax, in_suspend(%rip) ret -ENDPROC(restore_registers) +SYM_FUNC_END(restore_registers) diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S index 275a646d1048..0b4390ce586b 100644 --- a/arch/x86/purgatory/entry64.S +++ b/arch/x86/purgatory/entry64.S @@ -8,13 +8,13 @@ * This code has been taken from kexec-tools. */ +#include <linux/linkage.h> + .text .balign 16 .code64 - .globl entry64, entry64_regs - -entry64: +SYM_CODE_START(entry64) /* Setup a gdt that should be preserved */ lgdt gdt(%rip) @@ -54,10 +54,11 @@ new_cs_exit: /* Jump to the new code... */ jmpq *rip(%rip) +SYM_CODE_END(entry64) .section ".rodata" .balign 4 -entry64_regs: +SYM_DATA_START(entry64_regs) rax: .quad 0x0 rcx: .quad 0x0 rdx: .quad 0x0 @@ -75,13 +76,14 @@ r13: .quad 0x0 r14: .quad 0x0 r15: .quad 0x0 rip: .quad 0x0 - .size entry64_regs, . - entry64_regs +SYM_DATA_END(entry64_regs) /* GDT */ .section ".rodata" .balign 16 -gdt: - /* 0x00 unusable segment +SYM_DATA_START_LOCAL(gdt) + /* + * 0x00 unusable segment * 0x08 unused * so use them as gdt ptr */ @@ -94,6 +96,8 @@ gdt: /* 0x18 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF -gdt_end: -stack: .quad 0, 0 -stack_init: +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) + +SYM_DATA_START_LOCAL(stack) + .quad 0, 0 +SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, stack_init) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 3b95410ff0f8..2961234d0795 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -14,28 +14,10 @@ #include "../boot/string.h" -unsigned long purgatory_backup_dest __section(.kexec-purgatory); -unsigned long purgatory_backup_src __section(.kexec-purgatory); -unsigned long purgatory_backup_sz __section(.kexec-purgatory); - u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); -/* - * On x86, second kernel requries first 640K of memory to boot. Copy - * first 640K to a backup region in reserved memory range so that second - * kernel can use first 640K. - */ -static int copy_backup_region(void) -{ - if (purgatory_backup_dest) { - memcpy((void *)purgatory_backup_dest, - (void *)purgatory_backup_src, purgatory_backup_sz); - } - return 0; -} - static int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; @@ -66,7 +48,6 @@ void purgatory(void) for (;;) ; } - copy_backup_region(); } /* diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index 321146be741d..89d9e9e53fcd 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -7,14 +7,14 @@ * * This code has been taken from kexec-tools. */ +#include <linux/linkage.h> #include <asm/purgatory.h> .text - .globl purgatory_start .balign 16 -purgatory_start: .code64 +SYM_CODE_START(purgatory_start) /* Load a gdt so I know what the segment registers are */ lgdt gdt(%rip) @@ -32,10 +32,12 @@ purgatory_start: /* Call the C code */ call purgatory jmp entry64 +SYM_CODE_END(purgatory_start) .section ".rodata" .balign 16 -gdt: /* 0x00 unusable segment +SYM_DATA_START_LOCAL(gdt) + /* 0x00 unusable segment * 0x08 unused * so use them as the gdt ptr */ @@ -48,10 +50,10 @@ gdt: /* 0x00 unusable segment /* 0x18 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF -gdt_end: +SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) .bss .balign 4096 -lstack: +SYM_DATA_START_LOCAL(lstack) .skip 4096 -lstack_end: +SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end) diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S index 8b1427422dfc..1ef507ca50a5 100644 --- a/arch/x86/purgatory/stack.S +++ b/arch/x86/purgatory/stack.S @@ -5,13 +5,14 @@ * Copyright (C) 2014 Red Hat Inc. */ +#include <linux/linkage.h> + /* A stack for the loaded kernel. * Separate and in the data section so it can be prepopulated. */ .data .balign 4096 - .globl stack, stack_end -stack: +SYM_DATA_START(stack) .skip 4096 -stack_end: +SYM_DATA_END_LABEL(stack, SYM_L_GLOBAL, stack_end) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 7dce39c8c034..262f83cad355 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -8,6 +8,7 @@ #include <asm/pgtable.h> #include <asm/realmode.h> #include <asm/tlbflush.h> +#include <asm/crash.h> struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; @@ -34,6 +35,7 @@ void __init reserve_real_mode(void) memblock_reserve(mem, size); set_real_mode_mem(mem); + crash_reserve_low_1M(); } static void __init setup_real_mode(void) diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index 6363761cc74c..af04512c02d9 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -14,7 +14,7 @@ .section ".header", "a" .balign 16 -GLOBAL(real_mode_header) +SYM_DATA_START(real_mode_header) .long pa_text_start .long pa_ro_end /* SMP trampoline */ @@ -33,11 +33,9 @@ GLOBAL(real_mode_header) #ifdef CONFIG_X86_64 .long __KERNEL32_CS #endif -END(real_mode_header) +SYM_DATA_END(real_mode_header) /* End signature, used to verify integrity */ .section ".signature","a" .balign 4 -GLOBAL(end_signature) - .long REALMODE_END_SIGNATURE -END(end_signature) +SYM_DATA(end_signature, .long REALMODE_END_SIGNATURE) diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S index 3bb980800c58..64d135d1ee63 100644 --- a/arch/x86/realmode/rm/realmode.lds.S +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -11,6 +11,7 @@ OUTPUT_FORMAT("elf32-i386") OUTPUT_ARCH(i386) +ENTRY(pa_text_start) SECTIONS { diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S index cd2f97b9623b..f10515b10e0a 100644 --- a/arch/x86/realmode/rm/reboot.S +++ b/arch/x86/realmode/rm/reboot.S @@ -19,7 +19,7 @@ */ .section ".text32", "ax" .code32 -ENTRY(machine_real_restart_asm) +SYM_CODE_START(machine_real_restart_asm) #ifdef CONFIG_X86_64 /* Switch to trampoline GDT as it is guaranteed < 4 GiB */ @@ -33,7 +33,7 @@ ENTRY(machine_real_restart_asm) movl %eax, %cr0 ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off -GLOBAL(machine_real_restart_paging_off) +SYM_INNER_LABEL(machine_real_restart_paging_off, SYM_L_GLOBAL) xorl %eax, %eax xorl %edx, %edx movl $MSR_EFER, %ecx @@ -63,6 +63,7 @@ GLOBAL(machine_real_restart_paging_off) movl %ecx, %gs movl %ecx, %ss ljmpw $8, $1f +SYM_CODE_END(machine_real_restart_asm) /* * This is 16-bit protected mode code to disable paging and the cache, @@ -127,13 +128,13 @@ bios: .section ".rodata", "a" .balign 16 -GLOBAL(machine_real_restart_idt) +SYM_DATA_START(machine_real_restart_idt) .word 0xffff /* Length - real mode default value */ .long 0 /* Base - real mode default value */ -END(machine_real_restart_idt) +SYM_DATA_END(machine_real_restart_idt) .balign 16 -GLOBAL(machine_real_restart_gdt) +SYM_DATA_START(machine_real_restart_gdt) /* Self-pointer */ .word 0xffff /* Length - real mode default value */ .long pa_machine_real_restart_gdt @@ -153,4 +154,4 @@ GLOBAL(machine_real_restart_gdt) * semantics we don't have to reload the segments once CR0.PE = 0. */ .quad GDT_ENTRY(0x0093, 0x100, 0xffff) -END(machine_real_restart_gdt) +SYM_DATA_END(machine_real_restart_gdt) diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S index 8d4cb64799ea..0fca64061ad2 100644 --- a/arch/x86/realmode/rm/stack.S +++ b/arch/x86/realmode/rm/stack.S @@ -6,15 +6,13 @@ #include <linux/linkage.h> .data -GLOBAL(HEAP) - .long rm_heap -GLOBAL(heap_end) - .long rm_stack +SYM_DATA(HEAP, .long rm_heap) +SYM_DATA(heap_end, .long rm_stack) .bss .balign 16 -GLOBAL(rm_heap) - .space 2048 -GLOBAL(rm_stack) +SYM_DATA(rm_heap, .space 2048) + +SYM_DATA_START(rm_stack) .space 2048 -GLOBAL(rm_stack_end) +SYM_DATA_END_LABEL(rm_stack, SYM_L_GLOBAL, rm_stack_end) diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 1868b158480d..3fad907a179f 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -29,7 +29,7 @@ .code16 .balign PAGE_SIZE -ENTRY(trampoline_start) +SYM_CODE_START(trampoline_start) wbinvd # Needed for NUMA-Q should be harmless for others LJMPW_RM(1f) @@ -54,18 +54,20 @@ ENTRY(trampoline_start) lmsw %dx # into protected mode ljmpl $__BOOT_CS, $pa_startup_32 +SYM_CODE_END(trampoline_start) .section ".text32","ax" .code32 -ENTRY(startup_32) # note: also used from wakeup_asm.S +SYM_CODE_START(startup_32) # note: also used from wakeup_asm.S jmp *%eax +SYM_CODE_END(startup_32) .bss .balign 8 -GLOBAL(trampoline_header) - tr_start: .space 4 - tr_gdt_pad: .space 2 - tr_gdt: .space 6 -END(trampoline_header) +SYM_DATA_START(trampoline_header) + SYM_DATA_LOCAL(tr_start, .space 4) + SYM_DATA_LOCAL(tr_gdt_pad, .space 2) + SYM_DATA_LOCAL(tr_gdt, .space 6) +SYM_DATA_END(trampoline_header) #include "trampoline_common.S" diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index aee2b45d83b8..251758ed7443 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -38,7 +38,7 @@ .code16 .balign PAGE_SIZE -ENTRY(trampoline_start) +SYM_CODE_START(trampoline_start) cli # We should be safe anyway wbinvd @@ -78,12 +78,14 @@ ENTRY(trampoline_start) no_longmode: hlt jmp no_longmode +SYM_CODE_END(trampoline_start) + #include "../kernel/verify_cpu.S" .section ".text32","ax" .code32 .balign 4 -ENTRY(startup_32) +SYM_CODE_START(startup_32) movl %edx, %ss addl $pa_real_mode_base, %esp movl %edx, %ds @@ -137,38 +139,39 @@ ENTRY(startup_32) * the new gdt/idt that has __KERNEL_CS with CS.L = 1. */ ljmpl $__KERNEL_CS, $pa_startup_64 +SYM_CODE_END(startup_32) .section ".text64","ax" .code64 .balign 4 -ENTRY(startup_64) +SYM_CODE_START(startup_64) # Now jump into the kernel using virtual addresses jmpq *tr_start(%rip) +SYM_CODE_END(startup_64) .section ".rodata","a" # Duplicate the global descriptor table # so the kernel can live anywhere .balign 16 - .globl tr_gdt -tr_gdt: +SYM_DATA_START(tr_gdt) .short tr_gdt_end - tr_gdt - 1 # gdt limit .long pa_tr_gdt .short 0 .quad 0x00cf9b000000ffff # __KERNEL32_CS .quad 0x00af9b000000ffff # __KERNEL_CS .quad 0x00cf93000000ffff # __KERNEL_DS -tr_gdt_end: +SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end) .bss .balign PAGE_SIZE -GLOBAL(trampoline_pgd) .space PAGE_SIZE +SYM_DATA(trampoline_pgd, .space PAGE_SIZE) .balign 8 -GLOBAL(trampoline_header) - tr_start: .space 8 - GLOBAL(tr_efer) .space 8 - GLOBAL(tr_cr4) .space 4 - GLOBAL(tr_flags) .space 4 -END(trampoline_header) +SYM_DATA_START(trampoline_header) + SYM_DATA_LOCAL(tr_start, .space 8) + SYM_DATA(tr_efer, .space 8) + SYM_DATA(tr_cr4, .space 4) + SYM_DATA(tr_flags, .space 4) +SYM_DATA_END(trampoline_header) #include "trampoline_common.S" diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S index 8d8208dcca24..5033e640f957 100644 --- a/arch/x86/realmode/rm/trampoline_common.S +++ b/arch/x86/realmode/rm/trampoline_common.S @@ -1,4 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ .section ".rodata","a" .balign 16 -tr_idt: .fill 1, 6, 0 +SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0) diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S index 05ac9c17c811..02d0ba16ae33 100644 --- a/arch/x86/realmode/rm/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -17,7 +17,7 @@ .section ".data", "aw" .balign 16 -GLOBAL(wakeup_header) +SYM_DATA_START(wakeup_header) video_mode: .short 0 /* Video mode number */ pmode_entry: .long 0 pmode_cs: .short __KERNEL_CS @@ -31,13 +31,13 @@ GLOBAL(wakeup_header) realmode_flags: .long 0 real_magic: .long 0 signature: .long WAKEUP_HEADER_SIGNATURE -END(wakeup_header) +SYM_DATA_END(wakeup_header) .text .code16 .balign 16 -ENTRY(wakeup_start) +SYM_CODE_START(wakeup_start) cli cld @@ -73,7 +73,7 @@ ENTRY(wakeup_start) movw %ax, %fs movw %ax, %gs - lidtl wakeup_idt + lidtl .Lwakeup_idt /* Clear the EFLAGS */ pushl $0 @@ -135,6 +135,7 @@ ENTRY(wakeup_start) #else jmp trampoline_start #endif +SYM_CODE_END(wakeup_start) bogus_real_magic: 1: @@ -152,7 +153,7 @@ bogus_real_magic: */ .balign 16 -GLOBAL(wakeup_gdt) +SYM_DATA_START(wakeup_gdt) .word 3*8-1 /* Self-descriptor */ .long pa_wakeup_gdt .word 0 @@ -164,15 +165,15 @@ GLOBAL(wakeup_gdt) .word 0xffff /* 16-bit data segment @ real_mode_base */ .long 0x93000000 + pa_real_mode_base .word 0x008f /* big real mode */ -END(wakeup_gdt) +SYM_DATA_END(wakeup_gdt) .section ".rodata","a" .balign 8 /* This is the standard real-mode IDT */ .balign 16 -GLOBAL(wakeup_idt) +SYM_DATA_START_LOCAL(.Lwakeup_idt) .word 0xffff /* limit */ .long 0 /* address */ .word 0 -END(wakeup_idt) +SYM_DATA_END(.Lwakeup_idt) diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S index c078dba40cef..c8fef76743f6 100644 --- a/arch/x86/realmode/rmpiggy.S +++ b/arch/x86/realmode/rmpiggy.S @@ -10,12 +10,10 @@ .balign PAGE_SIZE -GLOBAL(real_mode_blob) +SYM_DATA_START(real_mode_blob) .incbin "arch/x86/realmode/rm/realmode.bin" -END(real_mode_blob) +SYM_DATA_END_LABEL(real_mode_blob, SYM_L_GLOBAL, real_mode_blob_end) -GLOBAL(real_mode_blob_end); - -GLOBAL(real_mode_relocs) +SYM_DATA_START(real_mode_relocs) .incbin "arch/x86/realmode/rm/realmode.relocs" -END(real_mode_relocs) +SYM_DATA_END(real_mode_relocs) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S index a4a3870dc059..a6eaf293a73b 100644 --- a/arch/x86/um/vdso/vdso.S +++ b/arch/x86/um/vdso/vdso.S @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/init.h> +#include <linux/linkage.h> __INITDATA - .globl vdso_start, vdso_end -vdso_start: +SYM_DATA_START(vdso_start) .incbin "arch/x86/um/vdso/vdso.so" -vdso_end: +SYM_DATA_END_LABEL(vdso_start, SYM_L_GLOBAL, vdso_end) __FINIT diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5bfea374a160..ae4a41ca19f6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -837,15 +837,6 @@ static void xen_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } -void xen_set_iopl_mask(unsigned mask) -{ - struct physdev_set_iopl set_iopl; - - /* Force the change at ring 0. */ - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); -} - static void xen_io_delay(void) { } @@ -1055,7 +1046,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_idt_entry = xen_write_idt_entry, .load_sp0 = xen_load_sp0, - .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index be104eef80be..508fe204520b 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -19,7 +19,7 @@ * event status with one and operation. If there are pending events, * then enter the hypervisor to get them handled. */ -ENTRY(xen_irq_enable_direct) +SYM_FUNC_START(xen_irq_enable_direct) FRAME_BEGIN /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask @@ -38,17 +38,17 @@ ENTRY(xen_irq_enable_direct) 1: FRAME_END ret - ENDPROC(xen_irq_enable_direct) +SYM_FUNC_END(xen_irq_enable_direct) /* * Disabling events is simply a matter of making the event mask * non-zero. */ -ENTRY(xen_irq_disable_direct) +SYM_FUNC_START(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask ret -ENDPROC(xen_irq_disable_direct) +SYM_FUNC_END(xen_irq_disable_direct) /* * (xen_)save_fl is used to get the current interrupt enable status. @@ -59,12 +59,12 @@ ENDPROC(xen_irq_disable_direct) * undefined. We need to toggle the state of the bit, because Xen and * x86 use opposite senses (mask vs enable). */ -ENTRY(xen_save_fl_direct) +SYM_FUNC_START(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah, %ah ret - ENDPROC(xen_save_fl_direct) +SYM_FUNC_END(xen_save_fl_direct) /* @@ -74,7 +74,7 @@ ENTRY(xen_save_fl_direct) * interrupt mask state, it checks for unmasked pending events and * enters the hypervisor to get them delivered if so. */ -ENTRY(xen_restore_fl_direct) +SYM_FUNC_START(xen_restore_fl_direct) FRAME_BEGIN #ifdef CONFIG_X86_64 testw $X86_EFLAGS_IF, %di @@ -95,14 +95,14 @@ ENTRY(xen_restore_fl_direct) 1: FRAME_END ret - ENDPROC(xen_restore_fl_direct) +SYM_FUNC_END(xen_restore_fl_direct) /* * Force an event check by making a hypercall, but preserve regs * before making the call. */ -ENTRY(check_events) +SYM_FUNC_START(check_events) FRAME_BEGIN #ifdef CONFIG_X86_32 push %eax @@ -135,19 +135,19 @@ ENTRY(check_events) #endif FRAME_END ret -ENDPROC(check_events) +SYM_FUNC_END(check_events) -ENTRY(xen_read_cr2) +SYM_FUNC_START(xen_read_cr2) FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX FRAME_END ret - ENDPROC(xen_read_cr2); +SYM_FUNC_END(xen_read_cr2); -ENTRY(xen_read_cr2_direct) +SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX FRAME_END ret - ENDPROC(xen_read_cr2_direct); +SYM_FUNC_END(xen_read_cr2_direct); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index c15db060a242..2712e9155306 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -56,7 +56,7 @@ _ASM_EXTABLE(1b,2b) .endm -ENTRY(xen_iret) +SYM_CODE_START(xen_iret) /* test eflags for special cases */ testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) jnz hyper_iret @@ -122,14 +122,14 @@ xen_iret_end_crit: hyper_iret: /* put this out of line since its very rarely used */ jmp hypercall_page + __HYPERVISOR_iret * 32 +SYM_CODE_END(xen_iret) .globl xen_iret_start_crit, xen_iret_end_crit /* - * This is called by xen_hypervisor_callback in entry.S when it sees + * This is called by xen_hypervisor_callback in entry_32.S when it sees * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in - * %eax so we can do a more refined determination of what to do. + * xen_iret_start_crit and xen_iret_end_crit. * * The stack format at this point is: * ---------------- @@ -138,70 +138,46 @@ hyper_iret: * eflags } outer exception info * cs } * eip } - * ---------------- <- edi (copy dest) - * eax : outer eax if it hasn't been restored * ---------------- - * eflags } nested exception info - * cs } (no ss/esp because we're nested - * eip } from the same ring) - * orig_eax }<- esi (copy src) - * - - - - - - - - - * fs } - * es } - * ds } SAVE_ALL state - * eax } - * : : - * ebx }<- esp + * eax : outer eax if it hasn't been restored * ---------------- + * eflags } + * cs } nested exception info + * eip } + * return address : (into xen_hypervisor_callback) * - * In order to deliver the nested exception properly, we need to shift - * everything from the return addr up to the error code so it sits - * just under the outer exception info. This means that when we - * handle the exception, we do it in the context of the outer - * exception rather than starting a new one. + * In order to deliver the nested exception properly, we need to discard the + * nested exception frame such that when we handle the exception, we do it + * in the context of the outer exception rather than starting a new one. * - * The only caveat is that if the outer eax hasn't been restored yet - * (ie, it's still on stack), we need to insert its value into the - * SAVE_ALL state before going on, since it's usermode state which we - * eventually need to restore. + * The only caveat is that if the outer eax hasn't been restored yet (i.e. + * it's still on stack), we need to restore its value here. */ -ENTRY(xen_iret_crit_fixup) +SYM_CODE_START(xen_iret_crit_fixup) /* * Paranoia: Make sure we're really coming from kernel space. * One could imagine a case where userspace jumps into the * critical range address, but just before the CPU delivers a - * GP, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. The Intel documents - * explicitly say that the reported EIP for a bad jump is the - * jump instruction itself, not the destination, but some - * virtual environments get this wrong. + * PF, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. */ - movl PT_CS(%esp), %ecx - andl $SEGMENT_RPL_MASK, %ecx - cmpl $USER_RPL, %ecx - je 2f - - lea PT_ORIG_EAX(%esp), %esi - lea PT_EFLAGS(%esp), %edi + testb $2, 2*4(%esp) /* nested CS */ + jnz 2f /* * If eip is before iret_restore_end then stack * hasn't been restored yet. */ - cmp $iret_restore_end, %eax + cmpl $iret_restore_end, 1*4(%esp) jae 1f - movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ - movl %eax, PT_EAX(%esp) + movl 4*4(%esp), %eax /* load outer EAX */ + ret $4*4 /* discard nested EIP, CS, and EFLAGS as + * well as the just restored EAX */ - lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ - - /* set up the copy */ -1: std - mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ - rep movsl - cld - - lea 4(%edi), %esp /* point esp to new frame */ -2: jmp xen_do_upcall +1: + ret $3*4 /* discard nested EIP, CS, and EFLAGS */ +2: + ret +SYM_CODE_END(xen_iret_crit_fixup) diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index ebf610b49c06..0a0fd168683a 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -20,11 +20,11 @@ #include <linux/linkage.h> .macro xen_pv_trap name -ENTRY(xen_\name) +SYM_CODE_START(xen_\name) pop %rcx pop %r11 jmp \name -END(xen_\name) +SYM_CODE_END(xen_\name) _ASM_NOKPROBE(xen_\name) .endm @@ -57,7 +57,7 @@ xen_pv_trap entry_INT80_compat xen_pv_trap hypervisor_callback __INIT -ENTRY(xen_early_idt_handler_array) +SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS pop %rcx @@ -66,7 +66,7 @@ ENTRY(xen_early_idt_handler_array) i = i + 1 .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -END(xen_early_idt_handler_array) +SYM_CODE_END(xen_early_idt_handler_array) __FINIT hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 @@ -85,11 +85,12 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 * r11 }<-- pushed by hypercall page * rsp->rax } */ -ENTRY(xen_iret) +SYM_CODE_START(xen_iret) pushq $0 jmp hypercall_iret +SYM_CODE_END(xen_iret) -ENTRY(xen_sysret64) +SYM_CODE_START(xen_sysret64) /* * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back. @@ -107,6 +108,7 @@ ENTRY(xen_sysret64) pushq $VGCF_in_syscall jmp hypercall_iret +SYM_CODE_END(xen_sysret64) /* * Xen handles syscall callbacks much like ordinary exceptions, which @@ -124,7 +126,7 @@ ENTRY(xen_sysret64) */ /* Normal 64-bit system call target */ -ENTRY(xen_syscall_target) +SYM_FUNC_START(xen_syscall_target) popq %rcx popq %r11 @@ -137,12 +139,12 @@ ENTRY(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -ENDPROC(xen_syscall_target) +SYM_FUNC_END(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -ENTRY(xen_syscall32_target) +SYM_FUNC_START(xen_syscall32_target) popq %rcx popq %r11 @@ -155,25 +157,25 @@ ENTRY(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -ENDPROC(xen_syscall32_target) +SYM_FUNC_END(xen_syscall32_target) /* 32-bit compat sysenter target */ -ENTRY(xen_sysenter_target) +SYM_FUNC_START(xen_sysenter_target) mov 0*8(%rsp), %rcx mov 1*8(%rsp), %r11 mov 5*8(%rsp), %rsp jmp entry_SYSENTER_compat -ENDPROC(xen_sysenter_target) +SYM_FUNC_END(xen_sysenter_target) #else /* !CONFIG_IA32_EMULATION */ -ENTRY(xen_syscall32_target) -ENTRY(xen_sysenter_target) +SYM_FUNC_START_ALIAS(xen_syscall32_target) +SYM_FUNC_START(xen_sysenter_target) lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -ENDPROC(xen_syscall32_target) -ENDPROC(xen_sysenter_target) +SYM_FUNC_END(xen_sysenter_target) +SYM_FUNC_END_ALIAS(xen_syscall32_target) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index c1d8b90aa4e2..1d0cee3163e4 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -22,7 +22,7 @@ #ifdef CONFIG_XEN_PV __INIT -ENTRY(startup_xen) +SYM_CODE_START(startup_xen) UNWIND_HINT_EMPTY cld @@ -52,13 +52,13 @@ ENTRY(startup_xen) #endif jmp xen_start_kernel -END(startup_xen) +SYM_CODE_END(startup_xen) __FINIT #endif .pushsection .text .balign PAGE_SIZE -ENTRY(hypercall_page) +SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_EMPTY .skip 32 @@ -69,7 +69,7 @@ ENTRY(hypercall_page) .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 #include <asm/xen-hypercalls.h> #undef HYPERCALL -END(hypercall_page) +SYM_CODE_END(hypercall_page) .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |