diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/boot/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_ibs.c | 61 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_knc.c | 248 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perfctr-watchdog.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 48 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 49 | ||||
-rw-r--r-- | arch/x86/um/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 22 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 18 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 41 |
20 files changed, 449 insertions, 96 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 42d2c35a5bbd..46c3bff3ced2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,9 @@ config X86 select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE + select MODULES_USE_ELF_REL if X86_32 + select MODULES_USE_ELF_RELA if X86_64 config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ce03476d8c8f..ccce0ed67dde 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -37,7 +37,8 @@ setup-y += video-bios.o targets += $(setup-y) hostprogs-y := mkcpustr tools/build -HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \ +HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ + -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ $(obj)/cpu.o: $(obj)/cpustr.h diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fbee9714d9ab..7f0edceb7563 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,6 +121,11 @@ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 55d155560fdf..16f3fc6ebf2e 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -51,7 +51,6 @@ # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID # define __ARCH_WANT_SYS_EXECVE -# define __ARCH_WANT_KERNEL_EXECVE /* * "Conditional" syscalls diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 8b38be2de9e1..46e24d36b7da 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -17,8 +17,8 @@ struct vsyscall_gtod_data { /* open coded 'struct timespec' */ time_t wall_time_sec; - u32 wall_time_nsec; - u32 monotonic_time_nsec; + u64 wall_time_snsec; + u64 monotonic_time_snsec; time_t monotonic_time_sec; struct timezone sys_tz; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d30a6a9a0121..a0e067d3d96c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8b6defe7eefc..271d25700297 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -626,6 +626,8 @@ int p4_pmu_init(void); int p6_pmu_init(void); +int knc_pmu_init(void); + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index eebd5ffe1bba..6336bcbd0618 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -41,17 +41,22 @@ struct cpu_perf_ibs { }; struct perf_ibs { - struct pmu pmu; - unsigned int msr; - u64 config_mask; - u64 cnt_mask; - u64 enable_mask; - u64 valid_mask; - u64 max_period; - unsigned long offset_mask[1]; - int offset_max; - struct cpu_perf_ibs __percpu *pcpu; - u64 (*get_count)(u64 config); + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + struct cpu_perf_ibs __percpu *pcpu; + + struct attribute **format_attrs; + struct attribute_group format_group; + const struct attribute_group *attr_groups[2]; + + u64 (*get_count)(u64 config); }; struct perf_ibs_data { @@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +PMU_FORMAT_ATTR(rand_en, "config:57"); +PMU_FORMAT_ATTR(cnt_ctl, "config:19"); + +static struct attribute *ibs_fetch_format_attrs[] = { + &format_attr_rand_en.attr, + NULL, +}; + +static struct attribute *ibs_op_format_attrs[] = { + NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ + NULL, +}; + static struct perf_ibs perf_ibs_fetch = { .pmu = { .task_ctx_nr = perf_invalid_context, @@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = { .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, + .format_attrs = ibs_fetch_format_attrs, .get_count = get_ibs_fetch_count, }; @@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = { .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, + .format_attrs = ibs_op_format_attrs, .get_count = get_ibs_op_count, }; @@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) perf_ibs->pcpu = pcpu; + /* register attributes */ + if (perf_ibs->format_attrs[0]) { + memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); + perf_ibs->format_group.name = "format"; + perf_ibs->format_group.attrs = perf_ibs->format_attrs; + + memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); + perf_ibs->attr_groups[0] = &perf_ibs->format_group; + perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; + } + ret = perf_pmu_register(&perf_ibs->pmu, name, -1); if (ret) { perf_ibs->pcpu = NULL; @@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) static __init int perf_event_ibs_init(void) { + struct attribute **attr = ibs_op_format_attrs; + if (!ibs_caps) return -ENODEV; /* ibs not supported by the cpu */ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); - if (ibs_caps & IBS_CAPS_OPCNT) + + if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; + *attr++ = &format_attr_cnt_ctl.attr; + } perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6bca492b8547..324bb523d9d9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void) switch (boot_cpu_data.x86) { case 0x6: return p6_pmu_init(); + case 0xb: + return knc_pmu_init(); case 0xf: return p4_pmu_init(); } diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c new file mode 100644 index 000000000000..7c46bfdbc373 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -0,0 +1,248 @@ +/* Driver for Intel Xeon Phi "Knights Corner" PMU */ + +#include <linux/perf_event.h> +#include <linux/types.h> + +#include "perf_event.h" + +static const u64 knc_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, +}; + +static __initconst u64 knc_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + /* On Xeon Phi event "0" is a valid DATA_READ */ + /* (L1 Data Cache Reads) Instruction. */ + /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ + /* bit will always be set in x86_pmu_hw_config(). */ + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ + [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ + [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + /* see note on L1 OP_READ */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ + [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + + +static u64 knc_pmu_event_map(int hw_event) +{ + return knc_perfmon_event_map[hw_event]; +} + +static struct event_constraint knc_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ + INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ + INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ + INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ + INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ + INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ + INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ + INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ + INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ + INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ + INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ + INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ + INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ + INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ + INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ + EVENT_CONSTRAINT_END +}; + +#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d +#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e +#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f + +#define KNC_ENABLE_COUNTER0 0x00000001 +#define KNC_ENABLE_COUNTER1 0x00000002 + +static void knc_pmu_disable_all(void) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static void knc_pmu_enable_all(int added) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static inline void +knc_pmu_disable_event(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static void knc_pmu_enable_event(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_knc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static __initconst struct x86_pmu knc_pmu = { + .name = "knc", + .handle_irq = x86_pmu_handle_irq, + .disable_all = knc_pmu_disable_all, + .enable_all = knc_pmu_enable_all, + .enable = knc_pmu_enable_event, + .disable = knc_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_KNC_EVNTSEL0, + .perfctr = MSR_KNC_PERFCTR0, + .event_map = knc_pmu_event_map, + .max_events = ARRAY_SIZE(knc_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_counters = 2, + /* in theory 40 bits, early silicon is buggy though */ + .cntval_bits = 32, + .cntval_mask = (1ULL << 32) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = knc_event_constraints, + .format_attrs = intel_knc_formats_attr, +}; + +__init int knc_pmu_init(void) +{ + x86_pmu = knc_pmu; + + memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 966512b2cacf..2e8caf03f593 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_PERFCTR0; + case 11: + return msr - MSR_KNC_PERFCTR0; case 15: return msr - MSR_P4_BPU_PERFCTR0; } @@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_EVNTSEL0; + case 11: + return msr - MSR_KNC_EVNTSEL0; case 15: return msr - MSR_P4_BSU_ESCR0; } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8f9ed1afde8f..a1193aef6d7d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -299,12 +299,20 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) -ENTRY(ret_from_kernel_execve) - movl %eax, %esp - movl $0,PT_EAX(%esp) +ENTRY(ret_from_kernel_thread) + CFI_STARTPROC + pushl_cfi %eax + call schedule_tail GET_THREAD_INFO(%ebp) + popl_cfi %eax + pushl_cfi $0x0202 # Reset kernel eflags + popfl_cfi + movl PT_EBP(%esp),%eax + call *PT_EBX(%esp) + movl $0,PT_EAX(%esp) jmp syscall_exit -END(ret_from_kernel_execve) + CFI_ENDPROC +ENDPROC(ret_from_kernel_thread) /* * Interrupt exit functions should be protected against kprobes @@ -622,6 +630,10 @@ work_notifysig: # deal with pending signals and movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space +1: +#else + movl %esp, %eax +#endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) movb PT_CS(%esp), %bl @@ -632,24 +644,15 @@ work_notifysig: # deal with pending signals and call do_notify_resume jmp resume_userspace +#ifdef CONFIG_VM86 ALIGN work_notifysig_v86: pushl_cfi %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl_cfi %ecx movl %eax, %esp -#else - movl %esp, %eax + jmp 1b #endif - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace END(work_pending) # perform syscall exit tracing @@ -1020,21 +1023,6 @@ END(spurious_interrupt_bug) */ .popsection -ENTRY(ret_from_kernel_thread) - CFI_STARTPROC - pushl_cfi %eax - call schedule_tail - GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi - movl PT_EBP(%esp),%eax - call *PT_EBX(%esp) - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(ret_from_kernel_thread) - #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cdc790c78f32..0c58952d64e8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -563,15 +563,13 @@ ENTRY(ret_from_fork) jmp ret_from_sys_call # go to the SYSRET fastpath 1: - subq $REST_SKIP, %rsp # move the stack pointer back + subq $REST_SKIP, %rsp # leave space for volatiles CFI_ADJUST_CFA_OFFSET REST_SKIP movq %rbp, %rdi call *%rbx - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - + movl $0, RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -1326,20 +1324,6 @@ bad_gs: jmp 2b .previous -ENTRY(ret_from_kernel_execve) - movq %rdi, %rsp - movl $0, RAX(%rsp) - // RESTORE_REST - movq 0*8(%rsp), %r15 - movq 1*8(%rsp), %r14 - movq 2*8(%rsp), %r13 - movq 3*8(%rsp), %r12 - movq 4*8(%rsp), %rbp - movq 5*8(%rsp), %rbx - addq $(6*8), %rsp - jmp int_ret_from_sys_call -END(ret_from_kernel_execve) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 3f61904365cf..836f8322960e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; +#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; +#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d609be046b57..a2bb18e02839 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -68,6 +68,7 @@ #include <linux/percpu.h> #include <linux/crash_dump.h> #include <linux/tboot.h> +#include <linux/jiffies.h> #include <video/edid.h> @@ -1032,6 +1033,8 @@ void __init setup_arch(char **cmdline_p) mcheck_init(); arch_init_ideal_nops(); + + register_refined_jiffies(CLOCK_TICK_RATE); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8d141b309046..3a3e8c9e280d 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -28,7 +28,7 @@ #include <linux/jiffies.h> #include <linux/sysctl.h> #include <linux/topology.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> #include <linux/getcpu.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -82,32 +82,41 @@ void update_vsyscall_tz(void) vsyscall_gtod_data.sys_tz = sys_tz; } -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) +void update_vsyscall(struct timekeeper *tk) { - struct timespec monotonic; + struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - write_seqcount_begin(&vsyscall_gtod_data.seq); + write_seqcount_begin(&vdata->seq); /* copy vsyscall data */ - vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.mask = tk->clock->mask; + vdata->clock.mult = tk->mult; + vdata->clock.shift = tk->shift; + + vdata->wall_time_sec = tk->xtime_sec; + vdata->wall_time_snsec = tk->xtime_nsec; + + vdata->monotonic_time_sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_snsec = tk->xtime_nsec + + (tk->wall_to_monotonic.tv_nsec + << tk->shift); + while (vdata->monotonic_time_snsec >= + (((u64)NSEC_PER_SEC) << tk->shift)) { + vdata->monotonic_time_snsec -= + ((u64)NSEC_PER_SEC) << tk->shift; + vdata->monotonic_time_sec++; + } - monotonic = timespec_add(*wall_time, *wtm); - vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; - vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; + vdata->wall_time_coarse.tv_sec = tk->xtime_sec; + vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - vsyscall_gtod_data.monotonic_time_coarse = - timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); + vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, + tk->wall_to_monotonic); - write_seqcount_end(&vsyscall_gtod_data.seq); + write_seqcount_end(&vdata->seq); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 30c4eec033af..07611759ce35 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -14,6 +14,7 @@ config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config 64BIT bool "64-bit kernel" if SUBARCH = "x86" @@ -23,9 +24,11 @@ config X86_32 def_bool !64BIT select HAVE_AOUT select ARCH_WANT_IPC_PARSE_VERSION + select MODULES_USE_ELF_REL config X86_64 def_bool 64BIT + select MODULES_USE_ELF_RELA config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD && 64BIT diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 885eff49d6ab..4df6c373421a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) } -notrace static inline long vgetns(void) +notrace static inline u64 vgetsns(void) { long v; cycles_t cycles; @@ -91,21 +91,24 @@ notrace static inline long vgetns(void) else return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return (v * gtod->clock.mult) >> gtod->clock.shift; + return v * gtod->clock.mult; } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ notrace static int __always_inline do_realtime(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; - ts->tv_nsec = gtod->wall_time_nsec; - ns = vgetns(); + ns = gtod->wall_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); @@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts) notrace static int do_monotonic(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; - ts->tv_nsec = gtod->monotonic_time_nsec; - ns = vgetns(); + ns = gtod->monotonic_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bf788d34530d..e3497f240eab 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -987,7 +987,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -1156,6 +1165,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -1164,6 +1178,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fd28d86fe3d2..6226c99729b9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/seq_file.h> +#include <linux/crash_dump.h> #include <trace/events/xen.h> @@ -2381,6 +2382,43 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); #ifdef CONFIG_XEN_PVHVM +#ifdef CONFIG_PROC_VMCORE +/* + * This function is used in two contexts: + * - the kdump kernel has to check whether a pfn of the crashed kernel + * was a ballooned page. vmcore is using this function to decide + * whether to access a pfn of the crashed kernel. + * - the kexec kernel has to check whether a pfn was ballooned by the + * previous kernel. If the pfn is ballooned, handle it properly. + * Returns 0 if the pfn is not backed by a RAM page, the caller may + * handle the pfn special in this case. + */ +static int xen_oldmem_pfn_is_ram(unsigned long pfn) +{ + struct xen_hvm_get_mem_type a = { + .domid = DOMID_SELF, + .pfn = pfn, + }; + int ram; + + if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) + return -ENXIO; + + switch (a.mem_type) { + case HVMMEM_mmio_dm: + ram = 0; + break; + case HVMMEM_ram_rw: + case HVMMEM_ram_ro: + default: + ram = 1; + break; + } + + return ram; +} +#endif + static void xen_hvm_exit_mmap(struct mm_struct *mm) { struct xen_hvm_pagetable_dying a; @@ -2411,6 +2449,9 @@ void __init xen_hvm_init_mmu_ops(void) { if (is_pagetable_dying_supported()) pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; +#ifdef CONFIG_PROC_VMCORE + register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); +#endif } #endif |