From 07d620212d51d113fad997357a75f5e1f2ffd5a7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 7 Feb 2012 21:09:03 -0800 Subject: x86: Use generic posix_types.h Change the x86 architecture to use . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1328677745-20121-20-git-send-email-hpa@zytor.com Cc: Ingo Molnar Cc: Thomas Gleixner --- arch/x86/include/asm/posix_types_32.h | 75 +++--------------------- arch/x86/include/asm/posix_types_64.h | 106 +--------------------------------- 2 files changed, 12 insertions(+), 169 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index f7d9adf82e53..99f262e04b91 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -7,79 +7,22 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; +#define __kernel_mode_t __kernel_mode_t + typedef unsigned short __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; +#define __kernel_ipc_pid_t __kernel_ipc_pid_t + typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef int __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; -typedef unsigned int __kernel_uid32_t; -typedef unsigned int __kernel_gid32_t; +#define __kernel_uid_t __kernel_uid_t -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - -#if defined(__KERNEL__) - -#undef __FD_SET -#define __FD_SET(fd,fdsetp) \ - asm volatile("btsl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int)(fd))) - -#undef __FD_CLR -#define __FD_CLR(fd,fdsetp) \ - asm volatile("btrl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int) (fd))) - -#undef __FD_ISSET -#define __FD_ISSET(fd,fdsetp) \ - (__extension__ \ - ({ \ - unsigned char __result; \ - asm volatile("btl %1,%2 ; setb %0" \ - : "=q" (__result) \ - : "r" ((int)(fd)), \ - "m" (*(__kernel_fd_set *)(fdsetp))); \ - __result; \ -})) - -#undef __FD_ZERO -#define __FD_ZERO(fdsetp) \ -do { \ - int __d0, __d1; \ - asm volatile("cld ; rep ; stosl" \ - : "=m" (*(__kernel_fd_set *)(fdsetp)), \ - "=&c" (__d0), "=&D" (__d1) \ - : "a" (0), "1" (__FDSET_LONGS), \ - "2" ((__kernel_fd_set *)(fdsetp)) \ - : "memory"); \ -} while (0) - -#endif /* defined(__KERNEL__) */ +#include #endif /* _ASM_X86_POSIX_TYPES_32_H */ diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/asm/posix_types_64.h index eb8d2d92b63e..cba0c1ead162 100644 --- a/arch/x86/include/asm/posix_types_64.h +++ b/arch/x86/include/asm/posix_types_64.h @@ -7,113 +7,13 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; -typedef unsigned int __kernel_mode_t; -typedef unsigned long __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; -typedef int __kernel_ipc_pid_t; -typedef unsigned int __kernel_uid_t; -typedef unsigned int __kernel_gid_t; -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; -typedef __kernel_uid_t __kernel_uid32_t; -typedef __kernel_gid_t __kernel_gid32_t; +#define __kernel_old_uid_t __kernel_old_uid_t typedef unsigned long __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __KERNEL__ - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -/* - * This will unroll the loop for the normal constant cases (8 or 32 longs, - * for 256 and 1024-bit fd_sets respectively) - */ -#undef __FD_ZERO -static inline void __FD_ZERO(__kernel_fd_set *p) -{ - unsigned long *tmp = p->fds_bits; - int i; - - if (__builtin_constant_p(__FDSET_LONGS)) { - switch (__FDSET_LONGS) { - case 32: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; - tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; - tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; - tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; - return; - case 16: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - return; - case 8: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - return; - case 4: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - return; - } - } - i = __FDSET_LONGS; - while (i) { - i--; - *tmp = 0; - tmp++; - } -} - -#endif /* defined(__KERNEL__) */ +#include #endif /* _ASM_X86_POSIX_TYPES_64_H */ -- cgit From 6bd330083e0e97b7ddc053459190bf3d5768ca83 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 6 Feb 2012 13:03:09 -0800 Subject: x86: Factor out TIF_IA32 from 32-bit address space Factor out IA32 (compatibility instruction set) from 32-bit address space in the thread_info flags; this is a precondition patch for x32 support. Originally-by: H. J. Lu Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-4pr1xnnksprt7t0h3w5fw4rv@git.kernel.org --- arch/x86/include/asm/elf.h | 4 ++-- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/include/asm/thread_info.h | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 5f962df30d0f..410fa6a219f6 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -287,7 +287,7 @@ do { \ #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ /* 1GB for 64bit, 8MB for 32bit */ -#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) +#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) #define ARCH_DLINFO \ do { \ @@ -330,7 +330,7 @@ static inline int mmap_is_ia32(void) return 1; #endif #ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) + if (test_thread_flag(TIF_ADDR32)) return 1; #endif return 0; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index aa9088c26931..9f748b5fb701 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -924,9 +924,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ 0xc0000000 : 0xFFFFe000) -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index bc817cd8b443..d1803a495b35 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -86,7 +86,7 @@ struct thread_info { #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_IA32 17 /* 32bit process */ +#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ @@ -95,6 +95,7 @@ struct thread_info { #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -116,6 +117,7 @@ struct thread_info { #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_ADDR32 (1 << TIF_ADDR32) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ -- cgit From 4f72e331c20ac1c656f300cee246330c1786652b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 6 Feb 2012 13:50:20 -0800 Subject: x86-64: Use explicit sizes in sigcontext.h, prepare for x32 Use explicit sizes (__u64) instead of implicit sizes (unsigned long) in the definition for sigcontext.h; this will allow this structure to be shared between the x86-64 native ABI and the x32 ABI. Originally-by: H. J. Lu Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-4pr1xnnksprt7t0h3w5fw4rv@git.kernel.org --- arch/x86/include/asm/sigcontext.h | 57 ++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 04459d25e66e..4a085383af27 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -230,34 +230,37 @@ struct sigcontext { * User-space might still rely on the old definition: */ struct sigcontext { - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long rdi; - unsigned long rsi; - unsigned long rbp; - unsigned long rbx; - unsigned long rdx; - unsigned long rax; - unsigned long rcx; - unsigned long rsp; - unsigned long rip; - unsigned long eflags; /* RFLAGS */ - unsigned short cs; - unsigned short gs; - unsigned short fs; - unsigned short __pad0; - unsigned long err; - unsigned long trapno; - unsigned long oldmask; - unsigned long cr2; + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; + __u64 rdi; + __u64 rsi; + __u64 rbp; + __u64 rbx; + __u64 rdx; + __u64 rax; + __u64 rcx; + __u64 rsp; + __u64 rip; + __u64 eflags; /* RFLAGS */ + __u16 cs; + __u16 gs; + __u16 fs; + __u16 __pad0; + __u64 err; + __u64 trapno; + __u64 oldmask; + __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ - unsigned long reserved1[8]; +#ifndef __LP64__ + __u32 __fpstate_pad; +#endif + __u64 reserved1[8]; }; #endif /* !__KERNEL__ */ -- cgit From 1f5e27a90add2fe2a1c11508f68d377e3ddcf9ab Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 14 Feb 2012 13:13:21 -0800 Subject: x32: Create posix_types_x32.h This is the same as the 64-bit posix_types.h, except that __kernel_[u]long_t is defined to be [unsigned] long long and therefore 64 bits. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/posix_types.h | 4 +++- arch/x86/include/asm/posix_types_x32.h | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/posix_types_x32.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index b57e6a43a37a..986954fb9513 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -14,6 +14,7 @@ header-y += msr.h header-y += mtrr.h header-y += posix_types_32.h header-y += posix_types_64.h +header-y += posix_types_x32.h header-y += prctl.h header-y += processor-flags.h header-y += ptrace-abi.h diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index bb7133dc155d..3427b7798dbc 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,7 +7,9 @@ #else # ifdef __i386__ # include "posix_types_32.h" -# else +# elif defined(__LP64__) # include "posix_types_64.h" +# else +# include "posix_types_x32.h" # endif #endif diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/asm/posix_types_x32.h new file mode 100644 index 000000000000..85f9bdafa93c --- /dev/null +++ b/arch/x86/include/asm/posix_types_x32.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_POSIX_TYPES_X32_H +#define _ASM_X86_POSIX_TYPES_X32_H + +/* + * This file is only used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * These types should generally match the ones used by the 64-bit kernel, + * + */ + +typedef long long __kernel_long_t; +typedef unsigned long long __kernel_ulong_t; +#define __kernel_long_t __kernel_long_t + +#include + +#endif /* _ASM_X86_POSIX_TYPES_X32_H */ -- cgit From d046ff8b30319d9aa38d877a0ba4206771e54346 Mon Sep 17 00:00:00 2001 From: "H. J. Lu" Date: Tue, 14 Feb 2012 13:49:48 -0800 Subject: x86-64: Add prototype for old_rsp to a header file So far this has only been used in process_64.c, but the x32 code will need it in additional code. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9f748b5fb701..e34f95129f16 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -948,6 +948,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); + +/* + * User space RSP while inside the SYSCALL fast path + */ +DECLARE_PER_CPU(unsigned long, old_rsp); + #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, -- cgit From bb2127240c5595ae4ef7115494f51e973692f64e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 14 Feb 2012 13:56:49 -0800 Subject: x32: Add a thread flag for x32 processes An x32 process is *almost* the same thing as a 64-bit process with a 32-bit address limit, but there are a few minor differences -- in particular core dumps are 32 bits and signal handling is different. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d1803a495b35..912e93511466 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -96,6 +96,7 @@ struct thread_info { #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ +#define TIF_X32 30 /* 32-bit native x86-64 binary */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -118,6 +119,7 @@ struct thread_info { #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_ADDR32 (1 << TIF_ADDR32) +#define _TIF_X32 (1 << TIF_X32) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ -- cgit From 2c73ce734653f96542a070f3c3b3e3d1cd0fba02 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 09:48:01 -0800 Subject: x86-64, ia32: Drop sys32_rt_sigprocmask On x86, the only difference between sys_rt_sigprocmask and sys32_rt_sigprocmask is the alignment of the data structures. However, x86 allows data accesses with arbitrary alignment, and therefore there is no reason for this code to be different. Reported-by: Gregory M. Lueck Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sys_ia32.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index cb238526a9f1..68da87bfb5a3 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -36,8 +36,6 @@ asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, struct sigaction32 __user *, unsigned int); asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); -asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, - compat_sigset_t __user *, unsigned int); asmlinkage long sys32_alarm(unsigned int); asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); -- cgit From 6cbb369f578378cf5b1876766d860ae7c2a94d60 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 14 Feb 2012 14:38:31 -0800 Subject: x32: Generate Generate ; this exports x32 system call numbers to user space. [ v2: Enclose all arguments to syshdr in '' so empty arguments aren't dropped on the floor. ] Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/unistd.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 986954fb9513..f9c0d3ba9e84 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -25,3 +25,4 @@ header-y += vsyscall.h genhdr-y += unistd_32.h genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 21f77b89e47a..dab5349f14fc 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -1,6 +1,9 @@ #ifndef _ASM_X86_UNISTD_H #define _ASM_X86_UNISTD_H 1 +/* x32 syscall flag bit */ +#define __X32_SYSCALL_BIT 0x40000000 + #ifdef __KERNEL__ # ifdef CONFIG_X86_32 @@ -52,8 +55,10 @@ #else # ifdef __i386__ # include -# else +# elif defined(__LP64__) # include +# else +# include # endif #endif -- cgit From ea499fec48dd771bd92984337fcb57ed4c787e69 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 14 Feb 2012 14:46:23 -0800 Subject: x32: Generate Generate macros for the *kernel* code to use to refer to x32 system calls. These have an __NR_x32_ prefix and do not include __X32_SYSCALL_BIT. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/unistd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index dab5349f14fc..7a48a5557470 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -17,6 +17,7 @@ # else # include +# include # define __ARCH_WANT_COMPAT_SYS_TIME # endif -- cgit From f28f0c23576662fb293defe9b1884d5a6e1bd85c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 07:38:43 -0800 Subject: x86: Move some signal-handling definitions to a common header There are some definitions which are duplicated between kernel/signal.c and ia32/ia32_signal.c; move them to a common header file. Rather than adding stuff to existing header files which contain data structures, create a new header file; hence the slightly odd name ("all the good ones were taken.") Note: nothing relied on signal_fault() being defined in . Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/ptrace.h | 1 - arch/x86/include/asm/sighandling.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/sighandling.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 35664547125b..dcfde52979c3 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -145,7 +145,6 @@ extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); -void signal_fault(struct pt_regs *regs, void __user *frame, char *where); extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h new file mode 100644 index 000000000000..843e299e120e --- /dev/null +++ b/arch/x86/include/asm/sighandling.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_SIGHANDLING_H +#define _ASM_X86_SIGHANDLING_H + +#include +#include +#include + +#include + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +#endif /* _ASM_X86_SIGHANDLING_H */ -- cgit From 851394229e79c11b0b5b74c509817848e9a80564 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 07:43:09 -0800 Subject: x32: Export setup/restore_sigcontext from signal.c Export setup_sigcontext() and restore_sigcontext() from signal.c, so we can use the 64-bit versions verbatim for x32. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sighandling.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 843e299e120e..ada93b3b8c66 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -16,4 +16,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax); +int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, + struct pt_regs *regs, unsigned long mask); + #endif /* _ASM_X86_SIGHANDLING_H */ -- cgit From 4048e2a8d4b491a69bf47ceda12cc0c0b924f6b8 Mon Sep 17 00:00:00 2001 From: "H. J. Lu" Date: Sun, 19 Feb 2012 07:46:08 -0800 Subject: x32: Add struct ucontext_x32 Add a definition for struct ucontext_x32; this is inherently a mix of the 32- and 64-bit versions. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/ia32.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 1f7e62517284..c6435ab1cc13 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -43,6 +43,15 @@ struct ucontext_ia32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ -- cgit From 9d3897630e14b3d33bcb24a3c0fa9d60a01d3058 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 07:50:12 -0800 Subject: x32: Add rt_sigframe_x32 Add rt_sigframe_x32 to . Unfortunately we can't just define all the data structures unconditionally, due to the #ifdef CONFIG_COMPAT in and its trickle-down effects, hence the #ifdef mess. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sigframe.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 4e0fe26d27d3..7c7c27c97daa 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -59,12 +59,25 @@ struct rt_sigframe_ia32 { #endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ #ifdef CONFIG_X86_64 + struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; /* fp state follows here */ }; + +#ifdef CONFIG_X86_X32_ABI + +struct rt_sigframe_x32 { + u64 pretcode; + struct ucontext_x32 uc; + compat_siginfo_t info; + /* fp state follows here */ +}; + +#endif /* CONFIG_X86_X32_ABI */ + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_SIGFRAME_H */ -- cgit From fca460f95e928bae373daa8295877b6905bc62b8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 07:56:26 -0800 Subject: x32: Handle the x32 system call flag x32 shares most system calls with x86-64, but unfortunately some subsystem (the input subsystem is the chief offender) which require is_compat() when operating with a 32-bit userspace. The input system actually has text files in sysfs whose meaning is dependent on sizeof(long) in userspace! We could solve this by having two completely disjoint system call tables; requiring that each system call be duplicated. This patch takes a different approach: we add a flag to the system call number; this flag doesn't affect the system call dispatch but requests compat treatment from affected subsystems for the duration of the system call. The change of cmpq to cmpl is safe since it immediately follows the and. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/compat.h | 13 +++++++++++-- arch/x86/include/asm/syscall.h | 5 +++-- arch/x86/include/asm/unistd.h | 7 +++++++ 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 30d737ef2a42..7938b84e4506 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -7,6 +7,7 @@ #include #include #include +#include #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" @@ -212,9 +213,17 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)regs->sp - len; } -static inline int is_compat_task(void) +static inline bool is_compat_task(void) { - return current_thread_info()->status & TS_COMPAT; +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & TS_COMPAT) + return true; +#endif +#ifdef CONFIG_X86_X32_ABI + if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) + return true; +#endif + return false; } #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d962e5652a73..386b78686c4d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,7 @@ #include #include #include /* For NR_syscalls */ +#include extern const unsigned long sys_call_table[]; @@ -26,13 +27,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax; + return regs->orig_ax & __SYSCALL_MASK; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax; + regs->ax = regs->orig_ax & __SYSCALL_MASK; } static inline long syscall_get_error(struct task_struct *task, diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 7a48a5557470..37cdc9d99bb1 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -5,6 +5,13 @@ #define __X32_SYSCALL_BIT 0x40000000 #ifdef __KERNEL__ + +# ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) +# else +# define __SYSCALL_MASK (~0) +# endif + # ifdef CONFIG_X86_32 # include -- cgit From a96d692e9a559980f269f81c9b0b094220382186 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 14:02:46 -0800 Subject: x86: Add #ifdef CONFIG_COMPAT to Unfortunately a lot of the compat types are guarded with CONFIG_COMPAT or the equivalent, so add a similar guard to to avoid compilation failures when CONFIG_COMPAT=n. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sys_ia32.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 68da87bfb5a3..3fda9db48819 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_SYS_IA32_H #define _ASM_X86_SYS_IA32_H +#ifdef CONFIG_COMPAT + #include #include #include @@ -81,4 +83,7 @@ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, const char __user *); + +#endif /* CONFIG_COMPAT */ + #endif /* _ASM_X86_SYS_IA32_H */ -- cgit From d1a797f388d6d30fa502915d1b9937ed758b7137 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 19 Feb 2012 10:06:34 -0800 Subject: x32: Handle process creation Allow an x32 process to be started. Originally-by: H. J. Lu Signed-off-by: H. Peter Anvin Cc: Peter Zijlstra --- arch/x86/include/asm/compat.h | 26 ++++++++++++++++++++++++-- arch/x86/include/asm/elf.h | 25 +++++++++++++++++++++---- 2 files changed, 45 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 7938b84e4506..e7f68b49c01a 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -6,6 +6,7 @@ */ #include #include +#include #include #include @@ -187,7 +188,20 @@ struct compat_shmid64_ds { /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ +#ifdef CONFIG_X86_X32_ABI +typedef struct user_regs_struct compat_elf_gregset_t; + +#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216) +#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296) +#define SET_PR_FPVALID(S,V) \ + do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \ + while (0) + +#define COMPAT_USE_64BIT_TIME \ + (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) +#else typedef struct user_regs_struct32 compat_elf_gregset_t; +#endif /* * A pointer passed in from user mode. This should not @@ -209,8 +223,16 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline void __user *arch_compat_alloc_user_space(long len) { - struct pt_regs *regs = task_pt_regs(current); - return (void __user *)regs->sp - len; + compat_uptr_t sp; + + if (test_thread_flag(TIF_IA32)) { + sp = task_pt_regs(current)->sp; + } else { + /* -128 for the x32 ABI redzone */ + sp = percpu_read(old_rsp) - 128; + } + + return (void __user *)round_down(sp - len, 16); } static inline bool is_compat_task(void) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 410fa6a219f6..83aabea95dd7 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -156,7 +156,12 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) elf_check_arch_ia32(x) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) + +#if __USER32_DS != __USER_DS +# error "The following code assumes __USER32_DS == __USER_DS" +#endif static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) @@ -179,8 +184,9 @@ static inline void elf_common_init(struct thread_struct *t, void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -void set_personality_ia32(void); -#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() +void set_personality_ia32(bool); +#define COMPAT_SET_PERSONALITY(ex) \ + set_personality_ia32((ex).e_machine == EM_X86_64) #define COMPAT_ELF_PLATFORM ("i686") @@ -296,9 +302,20 @@ do { \ (unsigned long)current->mm->context.vdso); \ } while (0) +#define ARCH_DLINFO_X32 \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + #define AT_SYSINFO 32 -#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) +#define COMPAT_ARCH_DLINFO \ +if (test_thread_flag(TIF_X32)) \ + ARCH_DLINFO_X32; \ +else \ + ARCH_DLINFO_IA32(sysctl_vsyscall32) #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) -- cgit From 22e842d4d90ffec9677cc114487a5cefd39b5643 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 21 Feb 2012 14:32:19 -0800 Subject: x32: Fix coding style violations in the x32 VDSO code Move the prototype for x32_setup_additional_pages() to a header file, and adjust the coding style to match standard. Signed-off-by: H. Peter Anvin Cc: H. J. Lu --- arch/x86/include/asm/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 83aabea95dd7..1e40634591a4 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -331,6 +331,8 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); +extern int x32_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages -- cgit From b263b31e8ad65cdbfa5a7f739460f350554a2dc1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 27 Feb 2012 15:15:25 -0800 Subject: x86, mtrr: Use explicit sizing and padding for the 64-bit ioctls Specify the data structures for the 64-bit ioctls with explicit sizing and padding so that the x32 kernel will correctly use the 64-bit forms of these ioctls. Note that these ioctls are bogus in both forms on both 32 and 64 bits; even on 64 bits the maximum MTRR size is only 44 bits long. Note that nothing really is supposed to use these ioctls and that the preferred interface is text strings on /proc/mtrr, or better yet, nothing at all (use /sys/bus/pci/devices/*/resource*_wc for write combining; that uses PAT not MTRRs.) Signed-off-by: H. Peter Anvin Cc: H. J. Lu Tested-by: Nitin A. Kamble Link: http://lkml.kernel.org/n/tip-vwvnlu3hjmtkwvij4qxtm90l@git.kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mtrr.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 4365ffdb461f..7e3f17f92c66 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -29,18 +29,18 @@ #define MTRR_IOCTL_BASE 'M' -struct mtrr_sentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int type; /* Type of region */ -}; - /* Warning: this structure has a different order from i386 on x86-64. The 32bit emulation code takes care of that. But you need to use this for 64bit, otherwise your X server will break. */ #ifdef __i386__ +struct mtrr_sentry { + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + struct mtrr_gentry { unsigned int regnum; /* Register number */ unsigned long base; /* Base address */ @@ -50,12 +50,20 @@ struct mtrr_gentry { #else /* __i386__ */ +struct mtrr_sentry { + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 type; /* Type of region */ +}; + struct mtrr_gentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int regnum; /* Register number */ - unsigned int type; /* Type of region */ + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 regnum; /* Register number */ + __u32 type; /* Type of region */ + __u32 _pad; /* Unused */ }; + #endif /* !__i386__ */ struct mtrr_var_range { -- cgit From 2b036c6b861dc5da295c6fe19a3edcff7093fdeb Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 9 Jan 2012 14:00:35 -0500 Subject: KVM: SVM: Add support for AMD's OSVW feature in guests In some cases guests should not provide workarounds for errata even when the physical processor is affected. For example, because of erratum 400 on family 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before going to idle in order to avoid getting stuck in a non-C0 state. This is not necessary: HLT and IO instructions are intercepted and therefore there is no reason for erratum 400 workaround in the guest. This patch allows us to present a guest with certain errata as fixed, regardless of the state of actual hardware. Signed-off-by: Boris Ostrovsky Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52d6640a5ca1..bd69c93da8fa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -478,6 +478,12 @@ struct kvm_vcpu_arch { u32 id; bool send_user_only; } apf; + + /* OSVW MSRs (AMD only) */ + struct { + u64 length; + u64 status; + } osvw; }; struct kvm_arch { -- cgit From b9e5dc8d4511e6a00862a795319569e7fe7f60f4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 11 Jan 2012 11:20:30 +0100 Subject: KVM: provide synchronous registers in kvm_run On some cpus the overhead for virtualization instructions is in the same range as a system call. Having to call multiple ioctls to get set registers will make certain userspace handled exits more expensive than necessary. Lets provide a section in kvm_run that works as a shared save area for guest registers. We also provide two 64bit flags fields (architecture specific), that will specify 1. which parts of these fields are valid. 2. which registers were modified by userspace Each bit for these flag fields will define a group of registers (like general purpose) or a single register. Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4d8dcbdfc120..e7d1c194d272 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -321,4 +321,8 @@ struct kvm_xcrs { __u64 padding[16]; }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + #endif /* _ASM_X86_KVM_H */ -- cgit From 3ea8b75e47ac70bdd0a2c0492102682d43bfa3c4 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 17 Jan 2012 19:50:08 +0900 Subject: KVM: MMU: Remove unused kvm_pte_chain Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd69c93da8fa..461016614324 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache { void *objects[KVM_NR_MEM_OBJS]; }; -#define NR_PTE_CHAIN_ENTRIES 5 - -struct kvm_pte_chain { - u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; - struct hlist_node link; -}; - /* * kvm_mmu_page_role, below, is defined as: * -- cgit From df156f90a0f90649dd38b7667901ef85478f3d2b Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 7 Feb 2012 15:52:44 +0100 Subject: x86: Introduce x86_cpuinit.early_percpu_clock_init hook When kvm guest uses kvmclock, it may hang on vcpu hot-plug. This is caused by an overflow in pvclock_get_nsec_offset, u64 delta = tsc - shadow->tsc_timestamp; which in turn is caused by an undefined values from percpu hv_clock that hasn't been initialized yet. Uninitialized clock on being booted cpu is accessed from start_secondary -> smp_callin -> smp_store_cpu_info -> identify_secondary_cpu -> mtrr_ap_init -> mtrr_restore -> stop_machine_from_inactive_cpu -> queue_stop_cpus_work ... -> sched_clock -> kvm_clock_read which is well before x86_cpuinit.setup_percpu_clockev call in start_secondary, where percpu clock is initialized. This patch introduces a hook that allows to setup/initialize per_cpu clock early and avoid overflow due to reading - undefined values - old values if cpu was offlined and then onlined again Another possible early user of this clock source is ftrace that accesses it to get timestamps for ring buffer entries. So if mtrr_ap_init is moved from identify_secondary_cpu to past x86_cpuinit.setup_percpu_clockev in start_secondary, ftrace may cause the same overflow/hang on cpu hot-plug anyway. More complete description of the problem: https://lkml.org/lkml/2012/2/2/101 Credits to Marcelo Tosatti for hook idea. Acked-by: Thomas Gleixner Signed-off-by: Igor Mammedov Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/x86_init.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 517d4767ffdd..5d0afac2962c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -145,9 +145,11 @@ struct x86_init_ops { /** * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device + * @early_percpu_clock_init: early init of the per cpu clock event device */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); + void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; -- cgit From a59cb29e4d81e025192550c2703f305637f016f6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Feb 2012 12:28:31 -0200 Subject: KVM: x86: increase recommended max vcpus to 160 Increase recommended max vcpus from 64 to 160 (tested internally at Red Hat). Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 461016614324..782d973b0719 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -29,7 +29,7 @@ #include #define KVM_MAX_VCPUS 254 -#define KVM_SOFT_MAX_VCPUS 64 +#define KVM_SOFT_MAX_VCPUS 160 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 -- cgit From a628b684d27d22631d1819890f13047ae9075241 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 5 Mar 2012 13:39:29 -0800 Subject: x32: Provide separate is_ia32_task() and is_x32_task() predicates The is_compat_task() test is composed of two predicates already, so make each of them available separately. Signed-off-by: H. Peter Anvin Cc: H. J. Lu Link: http://lkml.kernel.org/r/1329696488-16970-1-git-send-email-hpa@zytor.com --- arch/x86/include/asm/compat.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index e7f68b49c01a..355edc091604 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -235,12 +235,17 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)round_down(sp - len, 16); } -static inline bool is_compat_task(void) +static inline bool is_ia32_task(void) { #ifdef CONFIG_IA32_EMULATION if (current_thread_info()->status & TS_COMPAT) return true; #endif + return false; +} + +static inline bool is_x32_task(void) +{ #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) return true; @@ -248,4 +253,9 @@ static inline bool is_compat_task(void) return false; } +static inline bool is_compat_task(void) +{ + return is_ia32_task() || is_x32_task(); +} + #endif /* _ASM_X86_COMPAT_H */ -- cgit From e7084fd52ed71249ab2ce7a7d89d601c9d1f904c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 5 Mar 2012 13:40:24 -0800 Subject: x32: Switch to a 64-bit clock_t clock_t is used mainly to give the number of jiffies a certain process has burned. It is entirely feasible for a long-running process to consume more than 2^32 jiffies especially in a multiprocess system. As such, switch to a 64-bit clock_t for x32, just as we already switched to a 64-bit time_t. clock_t is only used in a handful of places, and as such it is really not a very significant change. The one that has the biggest impact is in struct siginfo, but since the *size* of struct siginfo doesn't change (it is padded to the hilt) it is fairly easy to make this a localized change. This also gets rid of sys_x32_times, however since this is a pretty late change don't compactify the system call numbers; we can reuse system call slot 521 next time we need an x32 system call. Reported-by: Gregory M. Lueck Signed-off-by: H. Peter Anvin Cc: H. J. Lu Link: http://lkml.kernel.org/r/1329696488-16970-1-git-send-email-hpa@zytor.com --- arch/x86/include/asm/ia32.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index c6435ab1cc13..7d0c18587709 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -125,6 +125,15 @@ typedef struct compat_siginfo { compat_clock_t _stime; } _sigchld; + /* SIGCHLD (x32 version) */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + s64 _utime; + s64 _stime; + } _sigchld_x32; + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ -- cgit From cc578287e3224d0da196cc1d226bdae6b068faa7 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 3 Feb 2012 15:43:50 -0200 Subject: KVM: Infrastructure for software and hardware based TSC rate scaling This requires some restructuring; rather than use 'virtual_tsc_khz' to indicate whether hardware rate scaling is in effect, we consider each VCPU to always have a virtual TSC rate. Instead, there is new logic above the vendor-specific hardware scaling that decides whether it is even necessary to use and updates all rate variables used by common code. This means we can simply query the virtual rate at any point, which is needed for software rate scaling. There is also now a threshold added to the TSC rate scaling; minor differences and variations of measured TSC rate can accidentally provoke rate scaling to be used when it is not needed. Instead, we have a tolerance variable called tsc_tolerance_ppm, which is the maximum variation from user requested rate at which scaling will be used. The default is 250ppm, which is the half the threshold for NTP adjustment, allowing for some hardware variation. In the event that hardware rate scaling is not available, we can kludge a bit by forcing TSC catchup to turn on when a faster than hardware speed has been requested, but there is nothing available yet for the reverse case; this requires a trap and emulate software implementation for RDTSC, which is still forthcoming. [avi: fix 64-bit division on i386] Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 782d973b0719..ddebbe01fff9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -422,10 +422,11 @@ struct kvm_vcpu_arch { u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; - u32 virtual_tsc_khz; bool tsc_catchup; - u32 tsc_catchup_mult; - s8 tsc_catchup_shift; + bool tsc_always_catchup; + s8 virtual_tsc_shift; + u32 virtual_tsc_mult; + u32 virtual_tsc_khz; atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -651,7 +652,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); -- cgit From 5d3cb0f6a8e3af018a522ae8d36f8f7d2511b5d8 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 3 Feb 2012 15:43:51 -0200 Subject: KVM: Improve TSC offset matching There are a few improvements that can be made to the TSC offset matching code. First, we don't need to call the 128-bit multiply (especially on a constant number), the code works much nicer to do computation in nanosecond units. Second, the way everything is setup with software TSC rate scaling, we currently have per-cpu rates. Obviously this isn't too desirable to use in practice, but if for some reason we do change the rate of all VCPUs at runtime, then reset the TSCs, we will only want to match offsets for VCPUs running at the same rate. Finally, for the case where we have an unstable host TSC, but rate scaling is being done in hardware, we should call the platform code to compute the TSC offset, so the math is reorganized to recompute the base instead, then transform the base into an offset using the existing API. [avi: fix 64-bit division on i386] Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti KVM: Fix 64-bit division in kvm_write_tsc() Breaks i386 build. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ddebbe01fff9..8a34fca6c572 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; + u32 last_tsc_khz; struct kvm_xen_hvm_config xen_hvm_config; -- cgit From 6f526ec5383dcd5fa5ffc7b3ac1d62099a0b46ad Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 3 Feb 2012 15:43:54 -0200 Subject: KVM: Add last_host_tsc tracking back to KVM The variable last_host_tsc was removed from upstream code. I am adding it back for two reasons. First, it is unnecessary to use guest TSC computation to conclude information about the host TSC. The guest may set the TSC backwards (this case handled by the previous patch), but the computation of guest TSC (and fetching an MSR) is significanlty more work and complexity than simply reading the hardware counter. In addition, we don't actually need the guest TSC for any part of the computation, by always recomputing the offset, we can eliminate the need to deal with the current offset and any scaling factors that may apply. The second reason is that later on, we are going to be using the host TSC value to restore TSC offsets after a host S4 suspend, so we need to be reading the host values, not the guest values here. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8a34fca6c572..b23682900f41 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_vcpu_arch { u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u64 last_host_tsc; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; -- cgit From f1e2b26003c41e581243c09ceed7567677449468 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Feb 2012 15:43:55 -0200 Subject: KVM: Allow adjust_tsc_offset to be in host or guest cycles Redefine the API to take a parameter indicating whether an adjustment is in host or guest cycles. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b23682900f41..dd439f13df84 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -646,7 +646,7 @@ struct kvm_x86_ops { u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); - void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); + void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -676,6 +676,17 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); -- cgit From 0dd6a6edb0124e6c71931ff575b18e15ed6e8603 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 3 Feb 2012 15:43:56 -0200 Subject: KVM: Dont mark TSC unstable due to S4 suspend During a host suspend, TSC may go backwards, which KVM interprets as an unstable TSC. Technically, KVM should not be marking the TSC unstable, which causes the TSC clocksource to go bad, but we need to be adjusting the TSC offsets in such a case. Dealing with this issue is a little tricky as the only place we can reliably do it is before much of the timekeeping infrastructure is up and running. On top of this, we are not in a KVM thread context, so we may not be able to safely access VCPU fields. Instead, we compute our best known hardware offset at power-up and stash it to be applied to all VCPUs when they actually start running. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd439f13df84..4fbeb84b1818 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -423,6 +423,7 @@ struct kvm_vcpu_arch { u64 last_tsc_nsec; u64 last_tsc_write; u64 last_host_tsc; + u64 tsc_offset_adjustment; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; -- cgit From e26101b116a6235bcd80b3a4c38c9fe91286cd79 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 3 Feb 2012 15:43:57 -0200 Subject: KVM: Track TSC synchronization in generations This allows us to track the original nanosecond and counter values at each phase of TSC writing by the guest. This gets us perfect offset matching for stable TSC systems, and perfect software computed TSC matching for machines with unstable TSC. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4fbeb84b1818..c24125cd0c63 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -420,10 +420,11 @@ struct kvm_vcpu_arch { u64 last_guest_tsc; u64 last_kernel_ns; - u64 last_tsc_nsec; - u64 last_tsc_write; u64 last_host_tsc; u64 tsc_offset_adjustment; + u64 this_tsc_nsec; + u64 this_tsc_write; + u8 this_tsc_generation; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; @@ -513,9 +514,12 @@ struct kvm_arch { s64 kvmclock_offset; raw_spinlock_t tsc_write_lock; u64 last_tsc_nsec; - u64 last_tsc_offset; u64 last_tsc_write; u32 last_tsc_khz; + u64 cur_tsc_nsec; + u64 cur_tsc_write; + u64 cur_tsc_offset; + u8 cur_tsc_generation; struct kvm_xen_hvm_config xen_hvm_config; -- cgit From db3fe4eb45f3555d91a7124e18cf3a2f2a30eb90 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 8 Feb 2012 13:02:18 +0900 Subject: KVM: Introduce kvm_memory_slot::arch and move lpage_info into it Some members of kvm_memory_slot are not used by every architecture. This patch is the first step to make this difference clear by introducing kvm_memory_slot::arch; lpage_info is moved into it. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c24125cd0c63..74c9edf2bb18 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -483,6 +483,15 @@ struct kvm_vcpu_arch { } osvw; }; +struct kvm_lpage_info { + unsigned long rmap_pde; + int write_count; +}; + +struct kvm_arch_memory_slot { + struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; -- cgit From 7f3d35fddd173e52886d03bc34b5b5d6f5bea343 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Feb 2012 14:34:38 +0100 Subject: KVM: x86 emulator: Fix task switch privilege checks Currently, all task switches check privileges against the DPL of the TSS. This is only correct for jmp/call to a TSS. If a task gate is used, the DPL of this take gate is used for the check instead. Exceptions, external interrupts and iret shouldn't perform any check. [avi: kill kvm-kmod remnants] Signed-off-by: Kevin Wolf Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7b9cfc4878af..df437b68f42b 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -388,7 +388,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, int reason, + u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74c9edf2bb18..e216ba066e79 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -768,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, - bool has_error_code, u32 error_code); +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, + int reason, bool has_error_code, u32 error_code); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); -- cgit From 4cee4798a304ee1ea579423ca048f16ceaccdfb5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Feb 2012 14:34:41 +0100 Subject: KVM: x86 emulator: Allow PM/VM86 switch during task switch Task switches can switch between Protected Mode and VM86. The current mode must be updated during the task switch emulation so that the new segment selectors are interpreted correctly. In order to let privilege checks succeed, rflags needs to be updated in the vcpu struct as this causes a CPL update. Signed-off-by: Kevin Wolf Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index df437b68f42b..c222e1a1b12a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,6 +176,7 @@ struct x86_emulate_ops { void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val); int (*cpl)(struct x86_emulate_ctxt *ctxt); int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); -- cgit From a7b9d2ccc3d86303ee9314612d301966e04011c7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 26 Feb 2012 16:55:40 +0200 Subject: KVM: PMU: warn when pin control is set in eventsel msr Print warning once if pin control bit is set in eventsel msr since emulation does not support it yet. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 096c975e099f..f1f71823f682 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -23,6 +23,7 @@ #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19) #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) -- cgit From c94082656dac74257f63e91f78d5d458ac781fa5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 9 Mar 2012 16:07:10 -0800 Subject: x86: Use enum instead of literals for trap values The traps are referred to by their numbers and it can be difficult to understand them while reading the code without context. This patch adds enumeration of the trap numbers and replaces the numbers with the correct enum for x86. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20120310000710.GA32667@www.outflux.net Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/traps.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0012d0902c5f..88eae2aec619 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -89,4 +89,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ -- cgit From 51e7dc7011c99e1e5294658c7b551b92ca069985 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 12 Mar 2012 14:55:55 +0530 Subject: x86: Rename trap_no to trap_nr in thread_struct There are precedences of trap number being referred to as trap_nr. However thread struct refers trap number as trap_no. Change it to trap_nr. Also use enum instead of left-over literals for trap values. This is pure cleanup, no functional change intended. Suggested-by: Ingo Molnar Signed-off-by: Srikar Dronamraju Cc: Linus Torvalds Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Linux-mm Cc: Oleg Nesterov Cc: Andi Kleen Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120312092555.5379.942.sendpatchset@srdronam.in.ibm.com [ Fixed the math-emu build ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 02ce0b379647..f6d0d2eb0832 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -453,7 +453,7 @@ struct thread_struct { unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; - unsigned long trap_no; + unsigned long trap_nr; unsigned long error_code; /* floating point and extended processor state */ struct fpu fpu; -- cgit From ef334a20d84f52407a8a2afd02ddeaecbef0ad3d Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 13 Mar 2012 19:33:03 +0530 Subject: x86: Move is_ia32_task to asm/thread_info.h from asm/compat.h is_ia32_task() is useful even in !CONFIG_COMPAT cases - utrace will use it for example. Hence move it to a more generic file: asm/thread_info.h Also now is_ia32_task() returns true if CONFIG_X86_32 is defined. Signed-off-by: Srikar Dronamraju Acked-by: H. Peter Anvin Cc: Linus Torvalds Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Linux-mm Cc: Oleg Nesterov Cc: Andi Kleen Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120313140303.17134.1401.sendpatchset@srdronam.in.ibm.com [ Performed minor cleanup ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/compat.h | 9 --------- arch/x86/include/asm/thread_info.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 355edc091604..d6805798d6fc 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -235,15 +235,6 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)round_down(sp - len, 16); } -static inline bool is_ia32_task(void) -{ -#ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & TS_COMPAT) - return true; -#endif - return false; -} - static inline bool is_x32_task(void) { #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index af1db7e722f4..ad6df8ccd715 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -266,6 +266,18 @@ static inline void set_restore_sigmask(void) ti->status |= TS_RESTORE_SIGMASK; set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); } + +static inline bool is_ia32_task(void) +{ +#ifdef CONFIG_X86_32 + return true; +#endif +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & TS_COMPAT) + return true; +#endif + return false; +} #endif /* !__ASSEMBLY__ */ #ifndef __ASSEMBLY__ -- cgit From 31796ac4e8f0e88f5c10f1ad6dab8f19bebe44a4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 14 Mar 2012 14:27:52 -0700 Subject: x32: Fix alignment fail in struct compat_siginfo Adding struct _sigchld_x32 caused a misalignment cascade in struct siginfo, because union _sifields is located on an 4-byte boundary (8-byte misaligned.) Adding new fields that are 8-byte aligned caused the intermediate structures to also be aligned to 8 bytes, thereby adding padding in unexpected places. Thus, change s64 to compat_s64 here, which makes it "misaligned on paper". In reality these fields *are* actually aligned (there are 3 preceeding ints outside the union and 3 inside struct _sigchld_x32), but because of the intervening union and struct it is not possible for gcc to avoid padding without breaking the ABI. Reported-and-tested-by: H. J. Lu Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1329696488-16970-1-git-send-email-hpa@zytor.com --- arch/x86/include/asm/ia32.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 7d0c18587709..ee52760549f0 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -130,8 +130,8 @@ typedef struct compat_siginfo { unsigned int _pid; /* which child */ unsigned int _uid; /* sender's uid */ int _status; /* exit code */ - s64 _utime; - s64 _stime; + compat_s64 _utime; + compat_s64 _stime; } _sigchld_x32; /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ -- cgit From 2ab516575f2f273b19d95140d02c54612201e80a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Feb 2012 19:46:04 +0000 Subject: x86: vdso: Use seqcount instead of seqlock The update of the vdso data happens under xtime_lock, so adding a nested lock is pointless. Just use a seqcount to sync the readers. Reviewed-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/include/asm/vgtod.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 815285bcaceb..1f007178c813 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,7 +5,7 @@ #include struct vsyscall_gtod_data { - seqlock_t lock; + seqcount_t seq; /* open coded 'struct timespec' */ time_t wall_time_sec; -- cgit From b74f05d61b73af584d0c39121980171389ecfaaa Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 13 Feb 2012 11:07:27 -0200 Subject: x86: kvmclock: abstract save/restore sched_clock_state Upon resume from hibernation, CPU 0's hvclock area contains the old values for system_time and tsc_timestamp. It is necessary for the hypervisor to update these values with uptodate ones before the CPU uses them. Abstract TSC's save/restore sched_clock_state functions and use restore_state to write to KVM_SYSTEM_TIME MSR, forcing an update. Also move restore_sched_clock_state before __restore_processor_state, since the later calls CONFIG_LOCK_STAT's lockstat_clock (also for TSC). Thanks to Igor Mammedov for tracking it down. Fixes suspend-to-disk with kvmclock. Reviewed-by: Thomas Gleixner Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/tsc.h | 4 ++-- arch/x86/include/asm/x86_init.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 15d99153a96d..c91e8b9d588b 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); extern int notsc_setup(char *); -extern void save_sched_clock_state(void); -extern void restore_sched_clock_state(void); +extern void tsc_save_sched_clock_state(void); +extern void tsc_restore_sched_clock_state(void); #endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 5d0afac2962c..baaca8defec8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -162,6 +162,8 @@ struct x86_cpuinit_ops { * @is_untracked_pat_range exclude from PAT logic * @nmi_init enable NMI on cpus * @i8042_detect pre-detect if i8042 controller exists + * @save_sched_clock_state: save state for sched_clock() on suspend + * @restore_sched_clock_state: restore state for sched_clock() on resume */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -173,6 +175,8 @@ struct x86_platform_ops { void (*nmi_init)(void); unsigned char (*get_nmi_reason)(void); int (*i8042_detect)(void); + void (*save_sched_clock_state)(void); + void (*restore_sched_clock_state)(void); }; struct pci_dev; -- cgit From b7157acf429e6aef690646ba964b9ebd25049ec2 Mon Sep 17 00:00:00 2001 From: Steffen Persvold Date: Fri, 16 Mar 2012 20:25:35 +0100 Subject: x86/apic: Add separate apic_id_valid() functions for selected apic drivers As suggested by Suresh Siddha and Yinghai Lu: For x2apic pre-enabled systems, apic driver is set already early through early_acpi_boot_init()/early_acpi_process_madt()/ acpi_parse_madt()/default_acpi_madt_oem_check() path so that apic_id_valid() checking will be sufficient during MADT and SRAT parsing. For non-x2apic pre-enabled systems, all apic ids should be less than 255. This allows us to substitute the checks in arch/x86/kernel/acpi/boot.c::acpi_parse_x2apic() and arch/x86/mm/srat.c::acpi_numa_x2apic_affinity_init() with apic->apic_id_valid(). In addition we can avoid feigning the x2apic cpu feature in the NumaChip apic code. The following apic drivers have separate apic_id_valid() functions which will accept x2apic type IDs : x2apic_phys x2apic_cluster x2apic_uv_x apic_numachip Signed-off-by: Steffen Persvold Cc: Suresh Siddha Cc: Daniel J Blueman Cc: Yinghai Lu Cc: Jack Steiner Link: http://lkml.kernel.org/r/1331925935-13372-1-git-send-email-sp@numascale.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/x2apic.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9371c91718c..d3eaac44860a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -535,7 +535,7 @@ static inline unsigned int read_apic_id(void) static inline int default_apic_id_valid(int apicid) { - return x2apic_mode || (apicid < 255); + return (apicid < 255); } extern void default_setup_apic_routing(void); diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 6bf5b8e478c0..92e54abf89e0 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void) return cpu_online_mask; } +static int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + static int x2apic_apic_id_registered(void) { return 1; -- cgit From 91ec87d57fc38c529034e853687dfb7756de5406 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Mar 2012 21:15:51 -0700 Subject: x86-64: Simplify and optimize vdso clock_gettime monotonic variants We used to store the wall-to-monotonic offset and the realtime base. It's faster to precompute the monotonic base. This is about a 3% speedup on Sandy Bridge for CLOCK_MONOTONIC. It's much more impressive for CLOCK_MONOTONIC_COARSE. Signed-off-by: Andy Lutomirski Signed-off-by: John Stultz --- arch/x86/include/asm/vgtod.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 1f007178c813..8b38be2de9e1 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -7,11 +7,6 @@ struct vsyscall_gtod_data { seqcount_t seq; - /* open coded 'struct timespec' */ - time_t wall_time_sec; - u32 wall_time_nsec; - - struct timezone sys_tz; struct { /* extract of a clocksource struct */ int vclock_mode; cycle_t cycle_last; @@ -19,8 +14,16 @@ struct vsyscall_gtod_data { u32 mult; u32 shift; } clock; - struct timespec wall_to_monotonic; + + /* open coded 'struct timespec' */ + time_t wall_time_sec; + u32 wall_time_nsec; + u32 monotonic_time_nsec; + time_t monotonic_time_sec; + + struct timezone sys_tz; struct timespec wall_time_coarse; + struct timespec monotonic_time_coarse; }; extern struct vsyscall_gtod_data vsyscall_gtod_data; -- cgit From 90e240142bd31ff10aeda5a280a53153f4eff004 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 25 Mar 2012 23:00:04 +0200 Subject: x86: Merge the x86_32 and x86_64 cpu_idle() functions Both functions are mostly identical. The differences are: - x86_32's cpu_idle() makes use of check_pgt_cache(), which is a nop on both x86_32 and x86_64. - x86_64's cpu_idle() uses enter/__exit_idle/(), on x86_32 these function are a nop. - In contrast to x86_32, x86_64 calls rcu_idle_enter/exit() in the innermost loop because idle notifications need RCU. Calling these function on x86_32 also in the innermost loop does not hurt. So we can merge both functions. Signed-off-by: Richard Weinberger Acked-by: Frederic Weisbecker Cc: paulmck@linux.vnet.ibm.com Cc: josh@joshtriplett.org Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1332709204-22496-1-git-send-email-richard@nod.at Signed-off-by: Ingo Molnar --- arch/x86/include/asm/idle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index f49253d75710..c5d1785373ed 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -14,6 +14,7 @@ void exit_idle(void); #else /* !CONFIG_X86_64 */ static inline void enter_idle(void) { } static inline void exit_idle(void) { } +static inline void __exit_idle(void) { } #endif /* CONFIG_X86_64 */ void amd_e400_remove_cpu(int cpu); -- cgit From 136d249ef7dbf0fefa292082cc40be1ea864cbd6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 21 Mar 2012 22:58:08 -0400 Subject: x86/ioapic: Add io_apic_ops driver layer to allow interception Xen dom0 needs to paravirtualize IO operations to the IO APIC, so add a io_apic_ops for it to intercept. Do this as ops structure because there's at least some chance that another paravirtualized environment may want to intercept these. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Suresh Siddha Cc: jwboyer@redhat.com Cc: yinghai@kernel.org Link: http://lkml.kernel.org/r/1332385090-18056-2-git-send-email-konrad.wilk@oracle.com [ Made all the affected code easier on the eyes ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 690d1cc9a877..2c4943de5150 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -21,6 +21,15 @@ #define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) #define IO_APIC_REDIR_MASKED (1 << 16) +struct io_apic_ops { + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); +}; + +void __init set_io_apic_ops(const struct io_apic_ops *); + /* * The structure of the IO-APIC: */ -- cgit From baa676fcf8d555269bd0a5a2496782beee55824d Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 27 Mar 2012 14:28:18 +0200 Subject: X86 & IA64: adapt for dma_map_ops changes Adapt core x86 and IA64 architecture code for dma_map_ops changes: replace alloc/free_coherent with generic alloc/free methods. Signed-off-by: Andrzej Pietrasiewicz Acked-by: Kyungmin Park [removed swiotlb related changes and replaced it with wrappers, merged with IA64 patch to avoid inter-patch dependences in intel-iommu code] Signed-off-by: Marek Szyprowski Reviewed-by: Arnd Bergmann Acked-by: Tony Luck --- arch/x86/include/asm/dma-mapping.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ed3065fd6314..4b4331d71935 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag); + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs); static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { @@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) return gfp; } +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *memory; @@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!is_device_dma_capable(dev)) return NULL; - if (!ops->alloc_coherent) + if (!ops->alloc) return NULL; - memory = ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp), attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, memory); return memory; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, bus); - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); + if (ops->free) + ops->free(dev, size, vaddr, bus, attrs); } #endif -- cgit From f05e798ad4c09255f590f5b2c00a7ca6c172f983 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Mar 2012 18:11:12 +0100 Subject: Disintegrate asm/system.h for X86 Disintegrate asm/system.h for X86. Signed-off-by: David Howells Acked-by: H. Peter Anvin cc: x86@kernel.org --- arch/x86/include/asm/apic.h | 1 - arch/x86/include/asm/auxvec.h | 7 + arch/x86/include/asm/barrier.h | 116 ++++++++ arch/x86/include/asm/bug.h | 4 + arch/x86/include/asm/cacheflush.h | 1 + arch/x86/include/asm/elf.h | 1 - arch/x86/include/asm/exec.h | 1 + arch/x86/include/asm/futex.h | 1 - arch/x86/include/asm/i387.h | 1 - arch/x86/include/asm/local.h | 1 - arch/x86/include/asm/mc146818rtc.h | 1 - arch/x86/include/asm/processor.h | 31 +- arch/x86/include/asm/segment.h | 58 +++- arch/x86/include/asm/special_insns.h | 199 +++++++++++++ arch/x86/include/asm/stackprotector.h | 1 - arch/x86/include/asm/switch_to.h | 129 +++++++++ arch/x86/include/asm/system.h | 527 +--------------------------------- arch/x86/include/asm/tlbflush.h | 2 +- arch/x86/include/asm/virtext.h | 1 - 19 files changed, 549 insertions(+), 534 deletions(-) create mode 100644 arch/x86/include/asm/barrier.h create mode 100644 arch/x86/include/asm/exec.h create mode 100644 arch/x86/include/asm/special_insns.h create mode 100644 arch/x86/include/asm/switch_to.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9371c91718c..4b2caeefe1a2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #define ARCH_APICTIMER_STOPS_ON_C3 1 diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h index 1316b4c35425..77203ac352de 100644 --- a/arch/x86/include/asm/auxvec.h +++ b/arch/x86/include/asm/auxvec.h @@ -9,4 +9,11 @@ #endif #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO: */ +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) +# define AT_VECTOR_SIZE_ARCH 2 +#else /* else it's non-compat x86-64 */ +# define AT_VECTOR_SIZE_ARCH 1 +#endif + #endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h new file mode 100644 index 000000000000..c6cd358a1eec --- /dev/null +++ b/arch/x86/include/asm/barrier.h @@ -0,0 +1,116 @@ +#ifndef _ASM_X86_BARRIER_H +#define _ASM_X86_BARRIER_H + +#include +#include + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static __always_inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index f654d1bb17fb..11e1152222d0 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -36,4 +36,8 @@ do { \ #endif /* !CONFIG_BUG */ #include + + +extern void show_regs_common(void); + #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 4e12668711e5..9863ee3747da 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -3,6 +3,7 @@ /* Caches aren't brain-dead on the intel. */ #include +#include #ifdef CONFIG_X86_PAT /* diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 5f962df30d0f..f27f79abe021 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -84,7 +84,6 @@ extern unsigned int vdso_enabled; (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) #include -#include #ifdef CONFIG_X86_32 #include diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h new file mode 100644 index 000000000000..54c2e1db274a --- /dev/null +++ b/arch/x86/include/asm/exec.h @@ -0,0 +1 @@ +/* define arch_align_stack() here */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index d09bb03653f0..71ecbcba1a4e 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -9,7 +9,6 @@ #include #include #include -#include #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ asm volatile("1:\t" insn "\n" \ diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 7ce0798b1b26..257d9cca214f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -14,7 +14,6 @@ #include #include -#include struct pt_regs; struct user_i387_struct; diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 9cdae5d47e8f..c8bed0da434a 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -3,7 +3,6 @@ #include -#include #include #include diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 0e8e85bb7c51..d354fb781c57 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -5,7 +5,6 @@ #define _ASM_X86_MC146818RTC_H #include -#include #include #include diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 95da14f7ee85..78e30ea492b2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -14,13 +14,13 @@ struct mm_struct; #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -29,6 +29,15 @@ struct mm_struct; #include #include #include +#include + +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 #define HBP_NUM 4 /* @@ -1022,4 +1031,24 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ +#ifdef CONFIG_X86_32 +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +#endif + +void disable_hlt(void); +void enable_hlt(void); + +void cpu_idle_wait(void); + +extern unsigned long arch_align_stack(unsigned long sp); +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + +void default_idle(void); +bool set_pm_idle_to_default(void); + +void stop_this_cpu(void *dummy); + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5e641715c3fe..165466233ab0 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -212,7 +212,61 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; -#endif -#endif + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ +} while (0) + +/* + * Save a segment register away + */ +#define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + +/* + * x86_32 user gs accessors. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS +#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) +#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +#define task_user_gs(tsk) ((tsk)->thread.gs) +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); + return __limit + 1; +} + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ #endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h new file mode 100644 index 000000000000..41fc93a2e225 --- /dev/null +++ b/arch/x86/include/asm/special_insns.h @@ -0,0 +1,199 @@ +#ifndef _ASM_X86_SPECIAL_INSNS_H +#define _ASM_X86_SPECIAL_INSNS_H + + +#ifdef __KERNEL__ + +static inline void native_clts(void) +{ + asm volatile("clts"); +} + +/* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ +static unsigned long __force_order; + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ +#ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); +#else + val = native_read_cr4(); +#endif + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long native_read_cr8(void) +{ + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; +} + +static inline void native_write_cr8(unsigned long val) +{ + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); +} +#endif + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +extern void native_load_gs_index(unsigned); + +#ifdef CONFIG_PARAVIRT +#include +#else + +static inline unsigned long read_cr0(void) +{ + return native_read_cr0(); +} + +static inline void write_cr0(unsigned long x) +{ + native_write_cr0(x); +} + +static inline unsigned long read_cr2(void) +{ + return native_read_cr2(); +} + +static inline void write_cr2(unsigned long x) +{ + native_write_cr2(x); +} + +static inline unsigned long read_cr3(void) +{ + return native_read_cr3(); +} + +static inline void write_cr3(unsigned long x) +{ + native_write_cr3(x); +} + +static inline unsigned long read_cr4(void) +{ + return native_read_cr4(); +} + +static inline unsigned long read_cr4_safe(void) +{ + return native_read_cr4_safe(); +} + +static inline void write_cr4(unsigned long x) +{ + native_write_cr4(x); +} + +static inline void wbinvd(void) +{ + native_wbinvd(); +} + +#ifdef CONFIG_X86_64 + +static inline unsigned long read_cr8(void) +{ + return native_read_cr8(); +} + +static inline void write_cr8(unsigned long x) +{ + native_write_cr8(x); +} + +static inline void load_gs_index(unsigned selector) +{ + native_load_gs_index(selector); +} + +#endif + +/* Clear the 'TS' bit */ +static inline void clts(void) +{ + native_clts(); +} + +#endif/* CONFIG_PARAVIRT */ + +#define stts() write_cr0(read_cr0() | X86_CR0_TS) + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); +} + +#define nop() asm volatile ("nop") + + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_SPECIAL_INSNS_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 157517763565..b5d9533d2c38 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h new file mode 100644 index 000000000000..4ec45b3abba1 --- /dev/null +++ b/arch/x86/include/asm/switch_to.h @@ -0,0 +1,129 @@ +#ifndef _ASM_X86_SWITCH_TO_H +#define _ASM_X86_SWITCH_TO_H + +struct task_struct; /* one of the stranger aspects of C forward declarations */ +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); +struct tss_struct; +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss); + +#ifdef CONFIG_X86_32 + +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (stack_canary.canary) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + +/* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ +#define switch_to(prev, next, last) \ +do { \ + /* \ + * Context-switching clobbers all registers, so we clobber \ + * them explicitly, via unused output variables. \ + * (EAX and EBP is not listed because EBP is saved/restored \ + * explicitly for wchan access and EAX is the return value of \ + * __switch_to()) \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ + asm volatile("pushfl\n\t" /* save flags */ \ + "pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ + __switch_canary \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ + "popfl\n" /* restore flags */ \ + \ + /* output parameters */ \ + : [prev_sp] "=m" (prev->thread.sp), \ + [prev_ip] "=m" (prev->thread.ip), \ + "=a" (last), \ + \ + /* clobbered output registers: */ \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ + __switch_canary_oparam \ + \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ + __switch_canary_iparam \ + \ + : /* reloaded segment registers */ \ + "memory"); \ +} while (0) + +#else /* CONFIG_X86_32 */ + +/* frame pointer must be last for get_wchan */ +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + +#define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (irq_stack_union.stack_canary) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + +/* Save restore flags to clear handle leaking NT */ +#define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ + __switch_canary \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "jnz ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + __switch_canary_oparam \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [_tif_fork] "i" (_TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [current_task] "m" (current_task) \ + __switch_canary_iparam \ + : "memory", "cc" __EXTRA_CLOBBER) + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2d2f01ce6dcb..0d84f9e42fde 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -1,523 +1,6 @@ -#ifndef _ASM_X86_SYSTEM_H -#define _ASM_X86_SYSTEM_H - -#include -#include -#include +/* FILE TO BE DELETED. DO NOT ADD STUFF HERE! */ +#include #include -#include - -#include -#include - -/* entries in ARCH_DLINFO: */ -#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) -# define AT_VECTOR_SIZE_ARCH 2 -#else /* else it's non-compat x86-64 */ -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -struct task_struct; /* one of the stranger aspects of C forward declarations */ -struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); -extern void show_regs_common(void); - -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movl %P[task_canary](%[next]), %%ebx\n\t" \ - "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" -#define __switch_canary_oparam \ - , [stack_canary] "=m" (stack_canary.canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* - * Saving eflags is important. It switches not only IOPL between tasks, - * it also protects other tasks from NT leaking through sysenter etc. - */ -#define switch_to(prev, next, last) \ -do { \ - /* \ - * Context-switching clobbers all registers, so we clobber \ - * them explicitly, via unused output variables. \ - * (EAX and EBP is not listed because EBP is saved/restored \ - * explicitly for wchan access and EAX is the return value of \ - * __switch_to()) \ - */ \ - unsigned long ebx, ecx, edx, esi, edi; \ - \ - asm volatile("pushfl\n\t" /* save flags */ \ - "pushl %%ebp\n\t" /* save EBP */ \ - "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ - "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ - "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ - "pushl %[next_ip]\n\t" /* restore EIP */ \ - __switch_canary \ - "jmp __switch_to\n" /* regparm call */ \ - "1:\t" \ - "popl %%ebp\n\t" /* restore EBP */ \ - "popfl\n" /* restore flags */ \ - \ - /* output parameters */ \ - : [prev_sp] "=m" (prev->thread.sp), \ - [prev_ip] "=m" (prev->thread.ip), \ - "=a" (last), \ - \ - /* clobbered output registers: */ \ - "=b" (ebx), "=c" (ecx), "=d" (edx), \ - "=S" (esi), "=D" (edi) \ - \ - __switch_canary_oparam \ - \ - /* input parameters: */ \ - : [next_sp] "m" (next->thread.sp), \ - [next_ip] "m" (next->thread.ip), \ - \ - /* regparm parameters for __switch_to(): */ \ - [prev] "a" (prev), \ - [next] "d" (next) \ - \ - __switch_canary_iparam \ - \ - : /* reloaded segment registers */ \ - "memory"); \ -} while (0) - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -#else - -/* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" - -#define __EXTRA_CLOBBER \ - , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15" - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,"__percpu_arg([gs_canary])"\n\t" -#define __switch_canary_oparam \ - , [gs_canary] "=m" (irq_stack_union.stack_canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ - "call __switch_to\n\t" \ - "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ - "jnz ret_from_fork\n\t" \ - RESTORE_CONTEXT \ - : "=a" (last) \ - __switch_canary_oparam \ - : [next] "S" (next), [prev] "D" (prev), \ - [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (current_task) \ - __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) -#endif - -#ifdef __KERNEL__ - -extern void native_load_gs_index(unsigned); - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg, value) \ -do { \ - unsigned short __val = (value); \ - \ - asm volatile(" \n" \ - "1: movl %k0,%%" #seg " \n" \ - \ - ".section .fixup,\"ax\" \n" \ - "2: xorl %k0,%k0 \n" \ - " jmp 1b \n" \ - ".previous \n" \ - \ - _ASM_EXTABLE(1b, 2b) \ - \ - : "+r" (__val) : : "memory"); \ -} while (0) - -/* - * Save a segment register away - */ -#define savesegment(seg, value) \ - asm("mov %%" #seg ",%0":"=r" (value) : : "memory") - -/* - * x86_32 user gs accessors. - */ -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_32_LAZY_GS -#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) -#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -#define task_user_gs(tsk) ((tsk)->thread.gs) -#define lazy_save_gs(v) savesegment(gs, (v)) -#define lazy_load_gs(v) loadsegment(gs, (v)) -#else /* X86_32_LAZY_GS */ -#define get_user_gs(regs) (u16)((regs)->gs) -#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) -#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) -#define lazy_save_gs(v) do { } while (0) -#define lazy_load_gs(v) do { } while (0) -#endif /* X86_32_LAZY_GS */ -#endif /* X86_32 */ - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); - return __limit + 1; -} - -static inline void native_clts(void) -{ - asm volatile("clts"); -} - -/* - * Volatile isn't enough to prevent the compiler from reordering the - * read/write functions for the control registers and messing everything up. - * A memory clobber would solve the problem, but would prevent reordering of - * all loads stores around it, which can hurt performance. Solution is to - * use a variable and mimic reads and writes to it to enforce serialization - */ -static unsigned long __force_order; - -static inline unsigned long native_read_cr0(void) -{ - unsigned long val; - asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr2(void) -{ - unsigned long val; - asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr2(unsigned long val) -{ - asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr3(void) -{ - unsigned long val; - asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr3(unsigned long val) -{ - asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr4(void) -{ - unsigned long val; - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist. In x86_64, a cr4 always - * exists, so it will never fail. */ -#ifdef CONFIG_X86_32 - asm volatile("1: mov %%cr4, %0\n" - "2:\n" - _ASM_EXTABLE(1b, 2b) - : "=r" (val), "=m" (__force_order) : "0" (0)); -#else - val = native_read_cr4(); -#endif - return val; -} - -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); -} - -#ifdef CONFIG_X86_64 -static inline unsigned long native_read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void native_write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} -#endif - -static inline void native_wbinvd(void) -{ - asm volatile("wbinvd": : :"memory"); -} - -#ifdef CONFIG_PARAVIRT -#include -#else - -static inline unsigned long read_cr0(void) -{ - return native_read_cr0(); -} - -static inline void write_cr0(unsigned long x) -{ - native_write_cr0(x); -} - -static inline unsigned long read_cr2(void) -{ - return native_read_cr2(); -} - -static inline void write_cr2(unsigned long x) -{ - native_write_cr2(x); -} - -static inline unsigned long read_cr3(void) -{ - return native_read_cr3(); -} - -static inline void write_cr3(unsigned long x) -{ - native_write_cr3(x); -} - -static inline unsigned long read_cr4(void) -{ - return native_read_cr4(); -} - -static inline unsigned long read_cr4_safe(void) -{ - return native_read_cr4_safe(); -} - -static inline void write_cr4(unsigned long x) -{ - native_write_cr4(x); -} - -static inline void wbinvd(void) -{ - native_wbinvd(); -} - -#ifdef CONFIG_X86_64 - -static inline unsigned long read_cr8(void) -{ - return native_read_cr8(); -} - -static inline void write_cr8(unsigned long x) -{ - native_write_cr8(x); -} - -static inline void load_gs_index(unsigned selector) -{ - native_load_gs_index(selector); -} - -#endif - -/* Clear the 'TS' bit */ -static inline void clts(void) -{ - native_clts(); -} - -#endif/* CONFIG_PARAVIRT */ - -#define stts() write_cr0(read_cr0() | X86_CR0_TS) - -#endif /* __KERNEL__ */ - -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); -} - -#define nop() asm volatile ("nop") - -void disable_hlt(void); -void enable_hlt(void); - -void cpu_idle_wait(void); - -extern unsigned long arch_align_stack(unsigned long sp); -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); - -void default_idle(void); -bool set_pm_idle_to_default(void); - -void stop_this_cpu(void *dummy); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static __always_inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -/* - * We handle most unaligned accesses in hardware. On the other hand - * unaligned DMA can be quite expensive on some Nehalem processors. - * - * Based on this we disable the IP header alignment in network drivers. - */ -#define NET_IP_ALIGN 0 -#endif /* _ASM_X86_SYSTEM_H */ +#include +#include +#include diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 169be8938b96..c0e108e08079 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -5,7 +5,7 @@ #include #include -#include +#include #ifdef CONFIG_PARAVIRT #include diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index e0f9aa16358b..5da71c27cc59 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -16,7 +16,6 @@ #define _ASM_X86_VIRTEX_H #include -#include #include #include -- cgit From 49a7f04a4b9d45cd794741ce3d5d66524b37bdd0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Mar 2012 18:30:03 +0100 Subject: Move all declarations of free_initmem() to linux/mm.h Move all declarations of free_initmem() to linux/mm.h so that there's only one and it's used by everything. Signed-off-by: David Howells cc: linux-c6x-dev@linux-c6x.org cc: microblaze-uclinux@itee.uq.edu.au cc: linux-sh@vger.kernel.org cc: sparclinux@vger.kernel.org cc: x86@kernel.org cc: linux-mm@kvack.org --- arch/x86/include/asm/page_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index bce688d54c12..e21fdd10479f 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -55,7 +55,6 @@ extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(void); -extern void free_initmem(void); #endif /* !__ASSEMBLY__ */ -- cgit From 141124c02059eee9dbc5c86ea797b1ca888e77f7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Mar 2012 18:30:03 +0100 Subject: Delete all instances of asm/system.h Delete all instances of asm/system.h as they should be redundant by this point. Signed-off-by: David Howells --- arch/x86/include/asm/system.h | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 arch/x86/include/asm/system.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h deleted file mode 100644 index 0d84f9e42fde..000000000000 --- a/arch/x86/include/asm/system.h +++ /dev/null @@ -1,6 +0,0 @@ -/* FILE TO BE DELETED. DO NOT ADD STUFF HERE! */ -#include -#include -#include -#include -#include -- cgit From f6365201d8a21fb347260f89d6e9b3e718d63c70 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 14:49:17 -0700 Subject: x86: Remove the ancient and deprecated disable_hlt() and enable_hlt() facility The X86_32-only disable_hlt/enable_hlt mechanism was used by the 32-bit floppy driver. Its effect was to replace the use of the HLT instruction inside default_idle() with cpu_relax() - essentially it turned off the use of HLT. This workaround was commented in the code as: "disable hlt during certain critical i/o operations" "This halt magic was a workaround for ancient floppy DMA wreckage. It should be safe to remove." H. Peter Anvin additionally adds: "To the best of my knowledge, no-hlt only existed because of flaky power distributions on 386/486 systems which were sold to run DOS. Since DOS did no power management of any kind, including HLT, the power draw was fairly uniform; when exposed to the much hhigher noise levels you got when Linux used HLT caused some of these systems to fail. They were by far in the minority even back then." Alan Cox further says: "Also for the Cyrix 5510 which tended to go castors up if a HLT occurred during a DMA cycle and on a few other boxes HLT during DMA tended to go astray. Do we care ? I doubt it. The 5510 was pretty obscure, the 5520 fixed it, the 5530 is probably the oldest still in any kind of use." So, let's finally drop this. Signed-off-by: Len Brown Signed-off-by: Josh Boyer Signed-off-by: Andrew Morton Acked-by: "H. Peter Anvin" Acked-by: Alan Cox Cc: Stephen Hemminger Cc: Link: http://lkml.kernel.org/n/tip-3rhk9bzf0x9rljkv488tloib@git.kernel.org [ If anyone cares then alternative instruction patching could be used to replace HLT with a one-byte NOP instruction. Much simpler. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7284c9a6a0b5..4fa7dcceb6c0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -974,16 +974,6 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ -#ifdef CONFIG_X86_32 -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -#endif - -void disable_hlt(void); -void enable_hlt(void); - void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); -- cgit From f68e556e23d1a4176b563bcb25d8baf2c5313f91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 Apr 2012 13:54:56 -0700 Subject: Make the "word-at-a-time" helper functions more commonly usable I have a new optimized x86 "strncpy_from_user()" that will use these same helper functions for all the same reasons the name lookup code uses them. This is preparation for that. This moves them into an architecture-specific header file. It's architecture-specific for two reasons: - some of the functions are likely to want architecture-specific implementations. Even if the current code happens to be "generic" in the sense that it should work on any little-endian machine, it's likely that the "multiply by a big constant and shift" implementation is less than optimal for an architecture that has a guaranteed fast bit count instruction, for example. - I expect that if architectures like sparc want to start playing around with this, we'll need to abstract out a few more details (in particular the actual unaligned accesses). So we're likely to have more architecture-specific stuff if non-x86 architectures start using this. (and if it turns out that non-x86 architectures don't start using this, then having it in an architecture-specific header is still the right thing to do, of course) Signed-off-by: Linus Torvalds --- arch/x86/include/asm/word-at-a-time.h | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 arch/x86/include/asm/word-at-a-time.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..6fe6767b7124 --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h @@ -0,0 +1,46 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This is largely generic for little-endian machines, but the + * optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit