summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c25
-rw-r--r--arch/x86/kernel/alternative.c155
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c12
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c10
-rw-r--r--arch/x86/kernel/cpu/centaur.c8
-rw-r--r--arch/x86/kernel/cpu/common.c21
-rw-r--r--arch/x86/kernel/cpu/cpu.h20
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c15
-rw-r--r--arch/x86/kernel/cpu/intel.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c28
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c42
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c40
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c27
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c260
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c236
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c31
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c402
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h10
-rw-r--r--arch/x86/kernel/cpu/proc.c15
-rw-r--r--arch/x86/kernel/cpu/umc.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c8
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/devicetree.c3
-rw-r--r--arch/x86/kernel/e820.c5
-rw-r--r--arch/x86/kernel/early-quirks.c154
-rw-r--r--arch/x86/kernel/early_printk.c9
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S23
-rw-r--r--arch/x86/kernel/head32.c6
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c7
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/irq.c8
-rw-r--r--arch/x86/kernel/irq_32.c34
-rw-r--r--arch/x86/kernel/irq_64.c21
-rw-r--r--arch/x86/kernel/irq_work.c4
-rw-r--r--arch/x86/kernel/jump_label.c84
-rw-r--r--arch/x86/kernel/kprobes/common.h5
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c115
-rw-r--r--arch/x86/kernel/kvm.c279
-rw-r--r--arch/x86/kernel/microcode_amd.c1
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c18
-rw-r--r--arch/x86/kernel/paravirt.c9
-rw-r--r--arch/x86/kernel/preempt.S25
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c10
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/kernel/reboot.c285
-rw-r--r--arch/x86/kernel/rtc.c12
-rw-r--r--arch/x86/kernel/setup.c28
-rw-r--r--arch/x86/kernel/signal.c12
-rw-r--r--arch/x86/kernel/smp.c12
-rw-r--r--arch/x86/kernel/smpboot.c36
-rw-r--r--arch/x86/kernel/syscall_32.c2
-rw-r--r--arch/x86/kernel/syscall_64.c5
-rw-r--r--arch/x86/kernel/sysfb.c74
-rw-r--r--arch/x86/kernel/sysfb_efi.c214
-rw-r--r--arch/x86/kernel/sysfb_simplefb.c95
-rw-r--r--arch/x86/kernel/tboot.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c7
-rw-r--r--arch/x86/kernel/x86_init.c24
81 files changed, 2366 insertions, 837 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88d99ea77723..9b0a34e2cd79 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-$(CONFIG_PREEMPT) += preempt.o
+
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
@@ -103,6 +105,9 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
+obj-y += sysfb.o
+obj-$(CONFIG_X86_SYSFB) += sysfb_simplefb.o
+obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2627a81253ee..40c76604199f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
+int acpi_disable_cmcff;
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
@@ -141,16 +142,8 @@ static u32 irq_to_gsi(int irq)
}
/*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
+ * This is just a simple wrapper around early_ioremap(),
+ * with sanity checks for phys == 0 and size == 0.
*/
char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
@@ -160,6 +153,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
return early_ioremap(phys, size);
}
+
void __init __acpi_unmap_table(char *map, unsigned long size)
{
if (!map || !size)
@@ -199,7 +193,7 @@ static void acpi_register_lapic(int id, u8 enabled)
{
unsigned int ver = 0;
- if (id >= (MAX_LOCAL_APIC-1)) {
+ if (id >= MAX_LOCAL_APIC) {
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
return;
}
@@ -1120,6 +1114,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
int ioapic;
int ioapic_pin;
struct io_apic_irq_attr irq_attr;
+ int ret;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
@@ -1149,7 +1144,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+ ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+ if (ret < 0)
+ gsi = INT_MIN;
return gsi;
}
@@ -1626,6 +1623,10 @@ static int __init parse_acpi(char *arg)
/* "acpi=copy_dsdt" copys DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1;
+ }
+ /* "acpi=nocmcff" disables FF mode for corrected errors */
+ else if (strcmp(arg, "nocmcff") == 0) {
+ acpi_disable_cmcff = 1;
} else {
/* Core will printk when we return error. */
return -EINVAL;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c15cf9a25e27..15e8563e5c24 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/memory.h>
#include <linux/stop_machine.h>
#include <linux/slab.h>
+#include <linux/kdebug.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -596,97 +597,93 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
return addr;
}
-/*
- * Cross-modifying kernel text with stop_machine().
- * This code originally comes from immediate value.
- */
-static atomic_t stop_machine_first;
-static int wrote_text;
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
-struct text_poke_params {
- struct text_poke_param *params;
- int nparams;
-};
+static bool bp_patching_in_progress;
+static void *bp_int3_handler, *bp_int3_addr;
-static int __kprobes stop_machine_text_poke(void *data)
+int poke_int3_handler(struct pt_regs *regs)
{
- struct text_poke_params *tpp = data;
- struct text_poke_param *p;
- int i;
+ /* bp_patching_in_progress */
+ smp_rmb();
- if (atomic_xchg(&stop_machine_first, 0)) {
- for (i = 0; i < tpp->nparams; i++) {
- p = &tpp->params[i];
- text_poke(p->addr, p->opcode, p->len);
- }
- smp_wmb(); /* Make sure other cpus see that this has run */
- wrote_text = 1;
- } else {
- while (!wrote_text)
- cpu_relax();
- smp_mb(); /* Load wrote_text before following execution */
- }
+ if (likely(!bp_patching_in_progress))
+ return 0;
- for (i = 0; i < tpp->nparams; i++) {
- p = &tpp->params[i];
- flush_icache_range((unsigned long)p->addr,
- (unsigned long)p->addr + p->len);
- }
- /*
- * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
- * that a core serializing instruction such as "cpuid" should be
- * executed on _each_ core before the new instruction is made visible.
- */
- sync_core();
- return 0;
-}
+ if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ return 0;
+
+ /* set up the specified breakpoint handler */
+ regs->ip = (unsigned long) bp_int3_handler;
+
+ return 1;
-/**
- * text_poke_smp - Update instructions on a live kernel on SMP
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy
- *
- * Modify multi-byte instruction by using stop_machine() on SMP. This allows
- * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
- * should be allowed, since stop_machine() does _not_ protect code against
- * NMI and MCE.
- *
- * Note: Must be called under get_online_cpus() and text_mutex.
- */
-void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
-{
- struct text_poke_params tpp;
- struct text_poke_param p;
-
- p.addr = addr;
- p.opcode = opcode;
- p.len = len;
- tpp.params = &p;
- tpp.nparams = 1;
- atomic_set(&stop_machine_first, 1);
- wrote_text = 0;
- /* Use __stop_machine() because the caller already got online_cpus. */
- __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
- return addr;
}
/**
- * text_poke_smp_batch - Update instructions on a live kernel on SMP
- * @params: an array of text_poke parameters
- * @n: the number of elements in params.
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @handler: address to jump to when the temporary breakpoint is hit
*
- * Modify multi-byte instruction by using stop_machine() on SMP. Since the
- * stop_machine() is heavy task, it is better to aggregate text_poke requests
- * and do it once if possible.
+ * Modify multi-byte instruction by using int3 breakpoint on SMP.
+ * We completely avoid stop_machine() here, and achieve the
+ * synchronization using int3 breakpoint.
*
- * Note: Must be called under get_online_cpus() and text_mutex.
+ * The way it is done:
+ * - add a int3 trap to the address that will be patched
+ * - sync cores
+ * - update all but the first byte of the patched range
+ * - sync cores
+ * - replace the first byte (int3) by the first byte of
+ * replacing opcode
+ * - sync cores
+ *
+ * Note: must be called under text_mutex.
*/
-void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
- struct text_poke_params tpp = {.params = params, .nparams = n};
+ unsigned char int3 = 0xcc;
+
+ bp_int3_handler = handler;
+ bp_int3_addr = (u8 *)addr + sizeof(int3);
+ bp_patching_in_progress = true;
+ /*
+ * Corresponding read barrier in int3 notifier for
+ * making sure the in_progress flags is correctly ordered wrt.
+ * patching
+ */
+ smp_wmb();
+
+ text_poke(addr, &int3, sizeof(int3));
- atomic_set(&stop_machine_first, 1);
- wrote_text = 0;
- __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
+ on_each_cpu(do_sync_core, NULL, 1);
+
+ if (len - sizeof(int3) > 0) {
+ /* patch all but the first byte */
+ text_poke((char *)addr + sizeof(int3),
+ (const char *) opcode + sizeof(int3),
+ len - sizeof(int3));
+ /*
+ * According to Intel, this core syncing is very likely
+ * not necessary and we'd be safe even without it. But
+ * better safe than sorry (plus there's not only Intel).
+ */
+ on_each_cpu(do_sync_core, NULL, 1);
+ }
+
+ /* patch the first byte */
+ text_poke(addr, opcode, sizeof(int3));
+
+ on_each_cpu(do_sync_core, NULL, 1);
+
+ bp_patching_in_progress = false;
+ smp_wmb();
+
+ return addr;
}
+
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3048ded1b598..59554dca96ec 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{}
};
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{}
};
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void)
next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link =
next_northbridge(link, amd_nb_link_ids);
- }
+ }
+ /* GART present only on Fam15h upto model 0fh */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- boot_cpu_data.x86 == 0x15)
+ (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
amd_northbridges.flags |= AMD_NB_GART;
/*
+ * Check for L3 cache presence.
+ */
+ if (!cpuid_edx(0x80000006))
+ return 0;
+
+ /*
* Some CPU families support L3 Cache Index Disable. There are some
* limitations because of E382 and E388 on family 0x10.
*/
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index c9876efecafb..af5b08ab3b71 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -40,7 +40,7 @@
#include <asm/fixmap.h>
#include <asm/apb_timer.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/time.h>
#define APBT_CLOCKEVENT_RATING 110
@@ -157,13 +157,13 @@ static int __init apbt_clockevent_register(void)
adev->num = smp_processor_id();
adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0",
- mrst_timer_options == MRST_TIMER_LAPIC_APBT ?
+ intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ?
APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING,
adev_virt_addr(adev), 0, apbt_freq);
/* Firmware does EOI handling for us. */
adev->timer->eoi = NULL;
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
global_clock_event = &adev->timer->ced;
printk(KERN_DEBUG "%s clockevent registered as global\n",
global_clock_event->name);
@@ -253,7 +253,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
static __init int apbt_late_init(void)
{
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT ||
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
!apb_timer_block_enabled)
return 0;
/* This notifier should be called after workqueue is ready */
@@ -340,7 +340,7 @@ void __init apbt_time_init(void)
}
#ifdef CONFIG_SMP
/* kernel cmdline disable apb timer, so we will use lapic timers */
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
printk(KERN_INFO "apbt: disabled per cpu timer\n");
return;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index eca89c53a7f5..a7eb82d9b012 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -913,7 +913,7 @@ static void local_apic_timer_interrupt(void)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -932,7 +932,7 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
-void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1946,14 +1946,14 @@ static inline void __smp_spurious_interrupt(void)
"should never happen.\n", smp_processor_id());
}
-void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void smp_spurious_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_spurious_interrupt();
exiting_irq();
}
-void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
@@ -2002,14 +2002,14 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
}
-void smp_error_interrupt(struct pt_regs *regs)
+__visible void smp_error_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_error_interrupt(regs);
exiting_irq();
}
-void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void smp_trace_error_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_error_apic_entry(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ed796ccc32c..e63a5bd2a78f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1534,6 +1534,11 @@ void intel_ir_io_apic_print_entries(unsigned int apic,
}
}
+void ioapic_zap_locks(void)
+{
+ raw_spin_lock_init(&ioapic_lock);
+}
+
__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
{
union IO_APIC_reg_00 reg_00;
@@ -3375,12 +3380,15 @@ int io_apic_setup_irq_pin_once(unsigned int irq, int node,
{
unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
int ret;
+ struct IO_APIC_route_entry orig_entry;
/* Avoid redundant programming */
if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return 0;
+ pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
+ orig_entry = ioapic_read_entry(attr->ioapic, pin);
+ if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
+ return 0;
+ return -EBUSY;
}
ret = io_apic_setup_irq_pin(irq, node, attr);
if (!ret)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1191ac1c9d25..a419814cea57 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -113,7 +113,7 @@ static int __init early_get_pnodeid(void)
break;
case UV3_HUB_PART_NUMBER:
case UV3_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
+ uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
break;
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 53a4e2744846..3ab03430211d 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -392,7 +392,7 @@ static struct cpuidle_device apm_cpuidle_device;
/*
* Local variables
*/
-static struct {
+__visible struct {
unsigned long offset;
unsigned short segment;
} apm_bios_entry;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 28610822fb3c..9f6b9341950f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,6 @@ void common(void) {
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 08a089043ccf..3daece79a142 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -66,8 +66,8 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
* performance at the same time..
*/
-extern void vide(void);
-__asm__(".align 4\nvide: ret");
+extern __visible void vide(void);
+__asm__(".globl vide\n\t.align 4\nvide: ret");
static void init_amd_k5(struct cpuinfo_x86 *c)
{
@@ -823,8 +823,8 @@ static const struct cpu_dev amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
#ifdef CONFIG_X86_32
- .c_models = {
- { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[3] = "486 DX/2",
[7] = "486 DX/2-WB",
@@ -835,7 +835,7 @@ static const struct cpu_dev amd_cpu_dev = {
}
},
},
- .c_size_cache = amd_size_cache,
+ .legacy_cache_size = amd_size_cache,
#endif
.c_early_init = early_init_amd,
.c_detect_tlb = cpu_detect_tlb_amd,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index fbf6c3bc2400..8d5652dc99dd 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -468,10 +468,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
#endif
}
+#ifdef CONFIG_X86_32
static unsigned int
centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
-#ifdef CONFIG_X86_32
/* VIA C3 CPUs (670-68F) need further shifting. */
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
size >>= 8;
@@ -484,16 +484,18 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
if ((c->x86 == 6) && (c->x86_model == 9) &&
(c->x86_mask == 1) && (size == 65))
size -= 1;
-#endif
return size;
}
+#endif
static const struct cpu_dev centaur_cpu_dev = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_early_init = early_init_centaur,
.c_init = init_centaur,
- .c_size_cache = centaur_size_cache,
+#ifdef CONFIG_X86_32
+ .legacy_cache_size = centaur_size_cache,
+#endif
.c_x86_vendor = X86_VENDOR_CENTAUR,
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 25eb2747b063..6abc172b8258 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -346,7 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
/* Look up CPU names by table lookup. */
static const char *table_lookup_model(struct cpuinfo_x86 *c)
{
- const struct cpu_model_info *info;
+#ifdef CONFIG_X86_32
+ const struct legacy_cpu_model_info *info;
if (c->x86_model >= 16)
return NULL; /* Range check */
@@ -354,13 +355,14 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
if (!this_cpu)
return NULL;
- info = this_cpu->c_models;
+ info = this_cpu->legacy_models;
- while (info && info->family) {
+ while (info->family) {
if (info->family == c->x86)
return info->model_names[c->x86_model];
info++;
}
+#endif
return NULL; /* Not found */
}
@@ -450,8 +452,8 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
#else
/* do processor-specific cache resizing */
- if (this_cpu->c_size_cache)
- l2size = this_cpu->c_size_cache(c, l2size);
+ if (this_cpu->legacy_cache_size)
+ l2size = this_cpu->legacy_cache_size(c, l2size);
/* Allow user to override all this if necessary. */
if (cachesize_override != -1)
@@ -1076,7 +1078,7 @@ struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) debug_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
- irq_stack_union) __aligned(PAGE_SIZE);
+ irq_stack_union) __aligned(PAGE_SIZE) __visible;
/*
* The following four percpu variables are hot. Align current_task to
@@ -1093,7 +1095,10 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
@@ -1169,6 +1174,8 @@ void debug_stack_reset(void)
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4041c24ae7db..c37dc37e8317 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,12 +1,6 @@
#ifndef ARCH_X86_CPU_H
#define ARCH_X86_CPU_H
-struct cpu_model_info {
- int vendor;
- int family;
- const char *model_names[16];
-};
-
/* attempt to consolidate cpu attributes */
struct cpu_dev {
const char *c_vendor;
@@ -14,15 +8,23 @@ struct cpu_dev {
/* some have two possibilities for cpuid string */
const char *c_ident[2];
- struct cpu_model_info c_models[4];
-
void (*c_early_init)(struct cpuinfo_x86 *);
void (*c_bsp_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
- unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
int c_x86_vendor;
+#ifdef CONFIG_X86_32
+ /* Optional vendor specific routine to obtain the cache size. */
+ unsigned int (*legacy_cache_size)(struct cpuinfo_x86 *,
+ unsigned int);
+
+ /* Family/stepping-based lookup table for model names. */
+ struct legacy_cpu_model_info {
+ int family;
+ const char *model_names[16];
+ } legacy_models[5];
+#endif
};
struct _tlb_table {
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 87279212d318..36ce402a3fa5 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -25,11 +25,6 @@
#include <asm/processor.h>
#include <asm/hypervisor.h>
-/*
- * Hypervisor detect order. This is specified explicitly here because
- * some hypervisors might implement compatibility modes for other
- * hypervisors and therefore need to be detected in specific sequence.
- */
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
#ifdef CONFIG_XEN_PVHVM
@@ -49,15 +44,19 @@ static inline void __init
detect_hypervisor_vendor(void)
{
const struct hypervisor_x86 *h, * const *p;
+ uint32_t pri, max_pri = 0;
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
h = *p;
- if (h->detect()) {
+ pri = h->detect();
+ if (pri != 0 && pri > max_pri) {
+ max_pri = pri;
x86_hyper = h;
- printk(KERN_INFO "Hypervisor detected: %s\n", h->name);
- break;
}
}
+
+ if (max_pri)
+ printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
}
void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ec7299566f79..dc1ec0dff939 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -665,8 +665,8 @@ static const struct cpu_dev intel_cpu_dev = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
#ifdef CONFIG_X86_32
- .c_models = {
- { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[0] = "486 DX-25/33",
[1] = "486 DX-50",
@@ -679,7 +679,7 @@ static const struct cpu_dev intel_cpu_dev = {
[9] = "486 DX/4-WB"
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
+ { .family = 5, .model_names =
{
[0] = "Pentium 60/66 A-step",
[1] = "Pentium 60/66",
@@ -690,7 +690,7 @@ static const struct cpu_dev intel_cpu_dev = {
[8] = "Mobile Pentium MMX"
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
+ { .family = 6, .model_names =
{
[0] = "Pentium Pro A-step",
[1] = "Pentium Pro",
@@ -704,7 +704,7 @@ static const struct cpu_dev intel_cpu_dev = {
[11] = "Pentium III (Tualatin)",
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
+ { .family = 15, .model_names =
{
[0] = "Pentium 4 (Unknown)",
[1] = "Pentium 4 (Willamette)",
@@ -714,7 +714,7 @@ static const struct cpu_dev intel_cpu_dev = {
}
},
},
- .c_size_cache = intel_size_cache,
+ .legacy_cache_size = intel_size_cache,
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index cd8b166a1735..de8b60a53f69 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m;
/* Only corrected MC is reported */
- if (!corrected || !(mem_err->validation_bits &
- CPER_MEM_VALID_PHYSICAL_ADDRESS))
+ if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
mce_setup(&m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 5b7d4fa5d3b7..09edd0b65fef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
+extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
+void cmci_disable_bank(int bank);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+static inline void cmci_disable_bank(int bank) { }
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 87a65c939bcd..b3218cdee95f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
+/*
+ * MCA banks controlled through firmware first for corrected errors.
+ * This is a global list of banks for which we won't enable CMCI and we
+ * won't poll. Firmware controls these banks and is responsible for
+ * reporting corrected errors through GHES. Uncorrected/recoverable
+ * errors are still notified through a machine check.
+ */
+mce_banks_t mce_banks_ce_disabled;
+
static DEFINE_PER_CPU(struct work_struct, mce_work);
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
&mce_chrdev_ops,
};
+static void __mce_disable_bank(void *arg)
+{
+ int bank = *((int *)arg);
+ __clear_bit(bank, __get_cpu_var(mce_poll_banks));
+ cmci_disable_bank(bank);
+}
+
+void mce_disable_bank(int bank)
+{
+ if (bank >= mca_cfg.banks) {
+ pr_warn(FW_BUG
+ "Ignoring request to disable invalid MCA bank %d.\n",
+ bank);
+ return;
+ }
+ set_bit(bank, mce_banks_ce_disabled);
+ on_each_cpu(__mce_disable_bank, &bank, 1);
+}
+
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index d56405309dc1..4cfe0458ca66 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
if (test_bit(i, owned))
continue;
+ /* Skip banks in firmware first mode */
+ if (test_bit(i, mce_banks_ce_disabled))
+ continue;
+
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
@@ -271,6 +275,19 @@ void cmci_recheck(void)
local_irq_restore(flags);
}
+/* Caller must hold the lock on cmci_discover_lock */
+static void __cmci_disable_bank(int bank)
+{
+ u64 val;
+
+ if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
+ return;
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ val &= ~MCI_CTL2_CMCI_EN;
+ wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ __clear_bit(bank, __get_cpu_var(mce_banks_owned));
+}
+
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
@@ -280,20 +297,12 @@ void cmci_clear(void)
unsigned long flags;
int i;
int banks;
- u64 val;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
- for (i = 0; i < banks; i++) {
- if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
- continue;
- /* Disable CMCI */
- rdmsrl(MSR_IA32_MCx_CTL2(i), val);
- val &= ~MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(i), val);
- __clear_bit(i, __get_cpu_var(mce_banks_owned));
- }
+ for (i = 0; i < banks; i++)
+ __cmci_disable_bank(i);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
@@ -327,6 +336,19 @@ void cmci_reenable(void)
cmci_discover(banks);
}
+void cmci_disable_bank(int bank)
+{
+ int banks;
+ unsigned long flags;
+
+ if (!cmci_supported(&banks))
+ return;
+
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ __cmci_disable_bank(bank);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
static void intel_init_cmci(void)
{
int banks;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 8f4be53ea04b..9f7ca266864a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -15,6 +15,7 @@
#include <linux/clocksource.h>
#include <linux/module.h>
#include <linux/hardirq.h>
+#include <linux/efi.h>
#include <linux/interrupt.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
@@ -23,24 +24,29 @@
#include <asm/desc.h>
#include <asm/idle.h>
#include <asm/irq_regs.h>
+#include <asm/i8259.h>
+#include <asm/apic.h>
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
-static bool __init ms_hyperv_platform(void)
+static uint32_t __init ms_hyperv_platform(void)
{
u32 eax;
u32 hyp_signature[3];
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return false;
+ return 0;
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
- return eax >= HYPERV_CPUID_MIN &&
- eax <= HYPERV_CPUID_MAX &&
- !memcmp("Microsoft Hv", hyp_signature, 12);
+ if (eax >= HYPERV_CPUID_MIN &&
+ eax <= HYPERV_CPUID_MAX &&
+ !memcmp("Microsoft Hv", hyp_signature, 12))
+ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+
+ return 0;
}
static cycle_t read_hv_clock(struct clocksource *arg)
@@ -73,6 +79,30 @@ static void __init ms_hyperv_init_platform(void)
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
+ /*
+ * Get the APIC frequency.
+ */
+ u64 hv_lapic_frequency;
+
+ rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
+ hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
+ lapic_timer_frequency = hv_lapic_frequency;
+ printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
+ lapic_timer_frequency);
+
+ /*
+ * On Hyper-V, when we are booting off an EFI firmware stack,
+ * we do not have many legacy devices including PIC, PIT etc.
+ */
+ if (efi_enabled(EFI_BOOT)) {
+ printk(KERN_INFO "HyperV: Using null_legacy_pic\n");
+ legacy_pic = &null_legacy_pic;
+ }
+ }
+#endif
+
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d4cdfa67509e..ce2d0a2c3e4f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -683,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+ count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Save MTRR state */
@@ -696,6 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
+ count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Intel (P6) standard MTRRs */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a7c7305030cc..8e132931614d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
- int ret;
u64 start_clock;
u64 finish_clock;
+ int ret;
if (!atomic_read(&active_events))
return NMI_DONE;
- start_clock = local_clock();
+ start_clock = sched_clock();
ret = x86_pmu.handle_irq(regs);
- finish_clock = local_clock();
+ finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
@@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return 0;
+ err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
@@ -1883,20 +1883,21 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ if (!sched_clock_stable)
return;
- if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
- return;
-
- userpg->cap_usr_time = 1;
+ userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = this_cpu_read(cyc2ns_offset);
}
/*
@@ -1988,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
frame.return_address = 0;
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))
@@ -2040,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
frame.return_address = 0;
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 97e557bc4c91..fd00bb29425d 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -164,6 +164,11 @@ struct cpu_hw_events {
struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
/*
+ * Intel checkpoint mask
+ */
+ u64 intel_cp_status;
+
+ /*
* manage shared (per-core, per-cpu) registers
* used on Intel NHM/WSM/SNB
*/
@@ -440,6 +445,7 @@ struct x86_pmu {
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
+ bool lbr_double_abort; /* duplicated lbr aborts */
/*
* Extra registers for events
@@ -641,6 +647,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4cbe03287b08..beeb7cc07044 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -347,8 +347,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
struct amd_nb *nb;
int i;
- nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
- cpu_to_node(cpu));
+ nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
if (!nb)
return NULL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a45d8d4ace10..0fa4f242f050 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -123,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
@@ -143,8 +145,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -162,23 +165,34 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ EVENT_CONSTRAINT_END
+};
+
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
@@ -882,6 +896,140 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE SNB_DMND_RFO
+#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS SNB_RESP_ANY
+#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+ },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
@@ -1036,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
wrmsrl(hwc->config_base, ctrl_val);
}
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+ return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -1049,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ cpuc->intel_cp_status &= ~(1ull << hwc->idx);
/*
* must disable before any actual event
@@ -1123,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (event->attr.exclude_guest)
cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+ if (unlikely(event_is_checkpointed(event)))
+ cpuc->intel_cp_status |= (1ull << hwc->idx);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc);
return;
@@ -1141,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
int intel_pmu_save_and_restart(struct perf_event *event)
{
x86_perf_event_update(event);
+ /*
+ * For a checkpointed counter always reset back to 0. This
+ * avoids a situation where the counter overflows, aborts the
+ * transaction and is then set back to shortly before the
+ * overflow, and overflows and aborts again.
+ */
+ if (unlikely(event_is_checkpointed(event))) {
+ /* No race with NMIs because the counter should not be armed */
+ wrmsrl(event->hw.event_base, 0);
+ local64_set(&event->hw.prev_count, 0);
+ }
return x86_perf_event_set_period(event);
}
@@ -1224,6 +1392,13 @@ again:
x86_pmu.drain_pebs(regs);
}
+ /*
+ * Checkpointed counters can lead to 'spurious' PMIs because the
+ * rollback caused by the PMI will have cleared the overflow status
+ * bit. Therefore always force probe these counters.
+ */
+ status |= cpuc->intel_cp_status;
+
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
@@ -1301,11 +1476,11 @@ static void intel_fixup_er(struct perf_event *event, int idx)
if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01b7;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}
@@ -1689,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
event->attr.precise_ip > 0))
return -EOPNOTSUPP;
+ if (event_is_checkpointed(event)) {
+ /*
+ * Sampling of checkpointed events can cause situations where
+ * the CPU constantly aborts because of a overflow, which is
+ * then checkpointed back and ignored. Forbid checkpointing
+ * for sampling.
+ *
+ * But still allow a long sampling period, so that perf stat
+ * from KVM works.
+ */
+ if (event->attr.sample_period > 0 &&
+ event->attr.sample_period < 0x7fffffff)
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -2034,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
}
}
-EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_capacity),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_capacity),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL
@@ -2176,6 +2391,22 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
+ case 55: /* Atom 22nm "Silvermont" */
+ case 77: /* Avoton "Silvermont" */
+ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_atom();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_slm_extra_regs;
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ pr_cont("Silvermont events, ");
+ break;
+
case 37: /* 32 nm nehalem, "Clarkdale" */
case 44: /* 32 nm nehalem, "Gulftown" */
case 47: /* 32 nm Xeon E7 */
@@ -2288,6 +2519,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.lbr_double_abort = true;
pr_cont("Haswell events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57a63c1..ae96cfa5eddd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -12,6 +12,7 @@
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE PAGE_SIZE
+#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
* pebs_record_32 for p4 and core not supported
@@ -182,18 +183,32 @@ struct pebs_record_nhm {
* Same as pebs_record_nhm, with two additional fields.
*/
struct pebs_record_hsw {
- struct pebs_record_nhm nhm;
- /*
- * Real IP of the event. In the Intel documentation this
- * is called eventingrip.
- */
- u64 real_ip;
- /*
- * TSX tuning information field: abort cycles and abort flags.
- */
- u64 tsx_tuning;
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 status, dla, dse, lat;
+ u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+ struct {
+ u32 cycles_last_block : 32,
+ hle_abort : 1,
+ rtm_abort : 1,
+ instruction_abort : 1,
+ non_instruction_abort : 1,
+ retry : 1,
+ data_conflict : 1,
+ capacity_writes : 1,
+ capacity_reads : 1;
+ };
+ u64 value;
};
+#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
+
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -214,20 +229,35 @@ void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
+static DEFINE_PER_CPU(void *, insn_buffer);
+
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh = 1; /* always use a single PEBS record */
- void *buffer;
+ void *buffer, *ibuffer;
if (!x86_pmu.pebs)
return 0;
- buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
+ /*
+ * HSW+ already provides us the eventing ip; no need to allocate this
+ * buffer then.
+ */
+ if (x86_pmu.intel_cap.pebs_format < 2) {
+ ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuffer) {
+ kfree(buffer);
+ return -ENOMEM;
+ }
+ per_cpu(insn_buffer, cpu) = ibuffer;
+ }
+
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu)
if (!ds || !x86_pmu.pebs)
return;
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
+
kfree((void *)(unsigned long)ds->pebs_buffer_base);
ds->pebs_buffer_base = 0;
}
@@ -262,7 +295,7 @@ static int alloc_bts_buffer(int cpu)
if (!x86_pmu.bts)
return 0;
- buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -295,7 +328,7 @@ static int alloc_ds_buffer(int cpu)
int node = cpu_to_node(cpu);
struct debug_store *ds;
- ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+ ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
if (unlikely(!ds))
return -ENOMEM;
@@ -517,6 +550,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
@@ -558,6 +617,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
@@ -688,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
unsigned long old_to, to = cpuc->lbr_entries[0].to;
unsigned long ip = regs->ip;
int is_64bit = 0;
+ void *kaddr;
/*
* We don't need to fixup if the PEBS assist is fault like
@@ -711,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
* unsigned math, either ip is before the start (impossible) or
* the basic block is larger than 1 page (sanity)
*/
- if ((ip - to) > PAGE_SIZE)
+ if ((ip - to) > PEBS_FIXUP_SIZE)
return 0;
/*
@@ -722,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 1;
}
+ if (!kernel_ip(ip)) {
+ int size, bytes;
+ u8 *buf = this_cpu_read(insn_buffer);
+
+ size = ip - to; /* Must fit our buffer, see above */
+ bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+ if (bytes != 0)
+ return 0;
+
+ kaddr = buf;
+ } else {
+ kaddr = (void *)to;
+ }
+
do {
struct insn insn;
- u8 buf[MAX_INSN_SIZE];
- void *kaddr;
old_to = to;
- if (!kernel_ip(ip)) {
- int bytes, size = MAX_INSN_SIZE;
-
- bytes = copy_from_user_nmi(buf, (void __user *)to, size);
- if (bytes != size)
- return 0;
-
- kaddr = buf;
- } else
- kaddr = (void *)to;
#ifdef CONFIG_X86_64
is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
#endif
insn_init(&insn, kaddr, is_64bit);
insn_get_length(&insn);
+
to += insn.length;
+ kaddr += insn.length;
} while (to < ip);
if (to == ip) {
@@ -759,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
}
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+ if (pebs->tsx_tuning) {
+ union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ return tsx.cycles_last_block;
+ }
+ return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
+{
+ u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+ txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ return txn;
+}
+
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs, void *__pebs)
{
/*
- * We cast to pebs_record_nhm to get the load latency data
- * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+ * We cast to the biggest pebs_record but are careful not to
+ * unconditionally access the 'extra' entries.
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct pebs_record_nhm *pebs = __pebs;
- struct pebs_record_hsw *pebs_hsw = __pebs;
+ struct pebs_record_hsw *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -827,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.sp = pebs->sp;
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs.ip = pebs_hsw->real_ip;
+ regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
regs.flags |= PERF_EFLAGS_EXACT;
@@ -835,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.flags &= ~PERF_EFLAGS_EXACT;
if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1)
data.addr = pebs->dla;
+ if (x86_pmu.intel_cap.pebs_format >= 2) {
+ /* Only set the TSX weight when no memory weight. */
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+ data.weight = intel_hsw_weight(pebs);
+
+ if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
+ data.txn = intel_hsw_transaction(pebs);
+ }
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -886,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
- void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct perf_event *event = NULL;
+ void *at, *top;
u64 status = 0;
int bit;
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
ds->pebs_index = ds->pebs_buffer_base;
+ if (unlikely(at > top))
+ return;
+
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+ "Unexpected number of pebs records %ld\n",
+ (long)(top - at) / x86_pmu.pebs_record_size);
+
for (; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
@@ -924,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
}
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct pebs_record_nhm *at, *top;
- int n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
- ds->pebs_index = ds->pebs_buffer_base;
-
- n = top - at;
- if (n <= 0)
- return;
-
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events,
- "Unexpected number of pebs records %d\n", n);
-
- return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct pebs_record_hsw *at, *top;
- int n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
- n = top - at;
- if (n <= 0)
- return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events,
- "Unexpected number of pebs records %d\n", n);
-
- return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
/*
* BTS, PEBS probe and setup
*/
@@ -1013,7 +1067,7 @@ void intel_ds_init(void)
case 2:
pr_cont("PEBS fmt2%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
default:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a5005e..d82d155aca8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
+ int out = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
}
from = (u64)((((s64)from) << skip) >> skip);
- cpuc->lbr_entries[i].from = from;
- cpuc->lbr_entries[i].to = to;
- cpuc->lbr_entries[i].mispred = mis;
- cpuc->lbr_entries[i].predicted = pred;
- cpuc->lbr_entries[i].in_tx = in_tx;
- cpuc->lbr_entries[i].abort = abort;
- cpuc->lbr_entries[i].reserved = 0;
+ /*
+ * Some CPUs report duplicated abort records,
+ * with the second entry not having an abort bit set.
+ * Skip them here. This loop runs backwards,
+ * so we need to undo the previous record.
+ * If the abort just happened outside the window
+ * the extra entry cannot be removed.
+ */
+ if (abort && x86_pmu.lbr_double_abort && out > 0)
+ out--;
+
+ cpuc->lbr_entries[out].from = from;
+ cpuc->lbr_entries[out].to = to;
+ cpuc->lbr_entries[out].mispred = mis;
+ cpuc->lbr_entries[out].predicted = pred;
+ cpuc->lbr_entries[out].in_tx = in_tx;
+ cpuc->lbr_entries[out].abort = abort;
+ cpuc->lbr_entries[out].reserved = 0;
+ out++;
}
- cpuc->lbr_stack.nr = i;
+ cpuc->lbr_stack.nr = out;
}
void intel_pmu_lbr_read(void)
@@ -478,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
/* may fail if text not present */
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
- if (bytes != size)
+ if (bytes != 0)
return X86_BR_NONE;
addr = buf;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 1fb6c72717bd..29c248799ced 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -6,6 +6,8 @@ static struct intel_uncore_type **pci_uncores = empty_uncore;
/* pci bus to socket mapping */
static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+
static DEFINE_RAW_SPINLOCK(uncore_box_lock);
/* mask of cpus that collect uncore events */
@@ -45,6 +47,24 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
@@ -281,7 +301,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
};
static struct attribute *snbep_uncore_pcu_formats_attr[] = {
- &format_attr_event.attr,
+ &format_attr_event_ext.attr,
&format_attr_occ_sel.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
@@ -301,6 +321,24 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
NULL,
};
@@ -356,13 +394,16 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
};
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
+ .init_box = snbep_uncore_pci_init_box, \
+ .disable_box = snbep_uncore_pci_disable_box, \
+ .enable_box = snbep_uncore_pci_enable_box, \
+ .disable_event = snbep_uncore_pci_disable_event, \
+ .read_counter = snbep_uncore_pci_read_counter
+
static struct intel_uncore_ops snbep_uncore_pci_ops = {
- .init_box = snbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_uncore_pci_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_uncore_pci_enable_event, \
};
static struct event_constraint snbep_uncore_cbox_constraints[] = {
@@ -726,6 +767,61 @@ static struct intel_uncore_type *snbep_msr_uncores[] = {
NULL,
};
+enum {
+ SNBEP_PCI_QPI_PORT0_FILTER,
+ SNBEP_PCI_QPI_PORT1_FILTER,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+ reg1->idx = 0;
+ reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+ reg1->config = event->attr.config1;
+ reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+ reg2->config = event->attr.config2;
+ }
+ return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+ struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx];
+ WARN_ON_ONCE(!filter_pdev);
+ if (filter_pdev) {
+ pci_write_config_dword(filter_pdev, reg1->reg,
+ (u32)reg1->config);
+ pci_write_config_dword(filter_pdev, reg1->reg + 4,
+ (u32)(reg1->config >> 32));
+ pci_write_config_dword(filter_pdev, reg2->reg,
+ (u32)reg2->config);
+ pci_write_config_dword(filter_pdev, reg2->reg + 4,
+ (u32)(reg2->config >> 32));
+ }
+ }
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_qpi_enable_event,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
#define SNBEP_UNCORE_PCI_COMMON_INIT() \
.perf_ctr = SNBEP_PCI_PMON_CTR0, \
.event_ctl = SNBEP_PCI_PMON_CTL0, \
@@ -755,17 +851,18 @@ static struct intel_uncore_type snbep_uncore_imc = {
};
static struct intel_uncore_type snbep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &snbep_uncore_pci_ops,
- .event_descs = snbep_uncore_qpi_events,
- .format_group = &snbep_uncore_qpi_format_group,
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .event_descs = snbep_uncore_qpi_events,
+ .format_group = &snbep_uncore_qpi_format_group,
};
@@ -807,43 +904,53 @@ static struct intel_uncore_type *snbep_pci_uncores[] = {
static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
{ /* Home Agent */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
- .driver_data = SNBEP_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
},
{ /* MC Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
},
{ /* MC Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
},
{ /* MC Channel 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
},
{ /* MC Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
},
{ /* QPI Port 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
- .driver_data = SNBEP_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
},
{ /* QPI Port 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
- .driver_data = SNBEP_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
},
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
- .driver_data = SNBEP_PCI_UNCORE_R2PCIE,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
},
{ /* R3QPI Link 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
- .driver_data = SNBEP_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
},
{ /* R3QPI Link 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
- .driver_data = SNBEP_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* end: all zeroes */ }
};
@@ -890,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid)
}
}
+ if (!err) {
+ /*
+ * For PCI bus with no UBOX device, find the next bus
+ * that has UBOX device and use its mapping.
+ */
+ i = -1;
+ for (bus = 255; bus >= 0; bus--) {
+ if (pcibus_to_physid[bus] >= 0)
+ i = pcibus_to_physid[bus];
+ else
+ pcibus_to_physid[bus] = i;
+ }
+ }
+
if (ubox_dev)
pci_dev_put(ubox_dev);
@@ -992,6 +1113,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = {
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
NULL,
};
@@ -1205,17 +1344,83 @@ static struct intel_uncore_type ivt_uncore_imc = {
IVT_UNCORE_PCI_COMMON_INIT(),
};
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx],
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count = 0;
+
+ pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+ pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static struct intel_uncore_ops ivt_uncore_irp_ops = {
+ .init_box = ivt_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = ivt_uncore_irp_disable_event,
+ .enable_event = ivt_uncore_irp_enable_event,
+ .read_counter = ivt_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivt_uncore_irp = {
+ .name = "irp",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_mask = IVT_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &ivt_uncore_irp_ops,
+ .format_group = &ivt_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivt_uncore_qpi_ops = {
+ .init_box = ivt_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_qpi_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
static struct intel_uncore_type ivt_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 3,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &ivt_uncore_pci_ops,
- .format_group = &ivt_uncore_qpi_format_group,
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &ivt_uncore_qpi_ops,
+ .format_group = &ivt_uncore_qpi_format_group,
};
static struct intel_uncore_type ivt_uncore_r2pcie = {
@@ -1239,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = {
enum {
IVT_PCI_UNCORE_HA,
IVT_PCI_UNCORE_IMC,
+ IVT_PCI_UNCORE_IRP,
IVT_PCI_UNCORE_QPI,
IVT_PCI_UNCORE_R2PCIE,
IVT_PCI_UNCORE_R3QPI,
@@ -1247,6 +1453,7 @@ enum {
static struct intel_uncore_type *ivt_pci_uncores[] = {
[IVT_PCI_UNCORE_HA] = &ivt_uncore_ha,
[IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc,
+ [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp,
[IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi,
[IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie,
[IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi,
@@ -1256,71 +1463,85 @@ static struct intel_uncore_type *ivt_pci_uncores[] = {
static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
- .driver_data = IVT_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
},
{ /* Home Agent 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
- .driver_data = IVT_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1),
},
{ /* MC0 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0),
},
{ /* MC0 Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1),
},
{ /* MC0 Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2),
},
{ /* MC0 Channel 4 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3),
},
{ /* MC1 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4),
},
{ /* MC1 Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5),
},
{ /* MC1 Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6),
},
{ /* MC1 Channel 4 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0),
},
{ /* QPI0 Port 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
},
{ /* QPI0 Port 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1),
},
{ /* QPI1 Port 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2),
},
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
- .driver_data = IVT_PCI_UNCORE_R2PCIE,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0),
},
{ /* R3QPI0 Link 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0),
},
{ /* R3QPI0 Link 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1),
},
{ /* R3QPI1 Link 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* end: all zeroes */ }
};
@@ -2599,14 +2820,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
{
struct intel_uncore_box *box;
int i, size;
size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
- box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
+ box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
@@ -2701,7 +2922,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
return c;
}
- if (event->hw.config == ~0ULL)
+ if (event->attr.config == UNCORE_FIXED_EVENT)
return &constraint_fixed;
if (type->constraints) {
@@ -2924,7 +3145,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
- fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -3005,7 +3226,9 @@ static int uncore_pmu_event_init(struct perf_event *event)
*/
if (pmu->type->single_fixed && pmu->pmu_idx > 0)
return -EINVAL;
- hwc->config = ~0ULL;
+
+ /* fixed counters have event field hardcoded to zero */
+ hwc->config = 0ULL;
} else {
hwc->config = event->attr.config & pmu->type->event_mask;
if (pmu->type->ops->hw_config) {
@@ -3167,17 +3390,25 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
-static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, phys_id;
+ struct intel_uncore_type *type;
+ int phys_id;
phys_id = pcibus_to_physid[pdev->bus->number];
if (phys_id < 0)
return -ENODEV;
- box = uncore_alloc_box(type, 0);
+ if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+ extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev;
+ pci_set_drvdata(pdev, NULL);
+ return 0;
+ }
+
+ type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3185,21 +3416,11 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
*/
- for (i = 0; i < type->num_boxes; i++) {
- pmu = &type->pmus[i];
- if (pmu->func_id == pdev->devfn)
- break;
- if (pmu->func_id < 0) {
- pmu->func_id = pdev->devfn;
- break;
- }
- pmu = NULL;
- }
-
- if (!pmu) {
- kfree(box);
- return -EINVAL;
- }
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ if (pmu->func_id < 0)
+ pmu->func_id = pdev->devfn;
+ else
+ WARN_ON_ONCE(pmu->func_id != pdev->devfn);
box->phys_id = phys_id;
box->pci_dev = pdev;
@@ -3217,9 +3438,22 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
- struct intel_uncore_pmu *pmu = box->pmu;
- int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+ struct intel_uncore_pmu *pmu;
+ int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+
+ box = pci_get_drvdata(pdev);
+ if (!box) {
+ for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+ if (extra_pci_dev[phys_id][i] == pdev) {
+ extra_pci_dev[phys_id][i] = NULL;
+ break;
+ }
+ }
+ WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+ return;
+ }
+ pmu = box->pmu;
if (WARN_ON_ONCE(phys_id != box->phys_id))
return;
@@ -3240,12 +3474,6 @@ static void uncore_pci_remove(struct pci_dev *pdev)
kfree(box);
}
-static int uncore_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- return uncore_pci_add(pci_uncores[id->driver_data], pdev);
-}
-
static int __init uncore_pci_init(void)
{
int ret;
@@ -3385,7 +3613,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;
- box = uncore_alloc_box(type, cpu);
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 47b3d00c9d89..a80ab71a883d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -12,6 +12,15 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV 0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX 2
+
+/* support up to 8 sockets */
+#define UNCORE_SOCKET_MAX 8
+
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
/* SNB event control */
@@ -108,6 +117,7 @@
(SNBEP_PMON_CTL_EV_SEL_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
SNBEP_PMON_CTL_INVERT | \
SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index aee6317b902f..06fe3ed8b851 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -11,15 +11,12 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
unsigned int cpu)
{
#ifdef CONFIG_SMP
- if (c->x86_max_cores * smp_num_siblings > 1) {
- seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n",
- cpumask_weight(cpu_core_mask(cpu)));
- seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
- seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
- seq_printf(m, "apicid\t\t: %d\n", c->apicid);
- seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
- }
+ seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+ seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu)));
+ seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+ seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+ seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+ seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
#endif
}
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index 202759a14121..75c5ad5d35cc 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -11,8 +11,8 @@
static const struct cpu_dev umc_cpu_dev = {
.c_vendor = "UMC",
.c_ident = { "UMC UMC UMC" },
- .c_models = {
- { .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[1] = "U5D",
[2] = "U5S",
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 7076878404ec..628a059a9a06 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -93,7 +93,7 @@ static void __init vmware_platform_setup(void)
* serial key should be enough, as this will always have a VMware
* specific string when running under VMware hypervisor.
*/
-static bool __init vmware_platform(void)
+static uint32_t __init vmware_platform(void)
{
if (cpu_has_hypervisor) {
unsigned int eax;
@@ -102,12 +102,12 @@ static bool __init vmware_platform(void)
cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
&hyper_vendor_id[1], &hyper_vendor_id[2]);
if (!memcmp(hyper_vendor_id, "VMwareVMware", 12))
- return true;
+ return CPUID_VMWARE_INFO_LEAF;
} else if (dmi_available && dmi_name_in_serial("VMware") &&
__vmware_platform())
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 74467feb4dc5..18677a90d6a3 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -127,10 +127,12 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
- lapic_shutdown();
-#if defined(CONFIG_X86_IO_APIC)
+#ifdef CONFIG_X86_IO_APIC
+ /* Prevent crash_kexec() from deadlocking on ioapic_lock. */
+ ioapic_zap_locks();
disable_IO_APIC();
#endif
+ lapic_shutdown();
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 69eb2fa25494..376dc7873447 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
}
#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
- unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
{
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..174da5fc5a7b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -658,15 +658,18 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata)
+void __init parse_e820_ext(u64 phys_addr, u32 data_len)
{
int entries;
struct e820entry *extmap;
+ struct setup_data *sdata;
+ sdata = early_memremap(phys_addr, data_len);
entries = sdata->len / sizeof(struct e820entry);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ early_iounmap(sdata, data_len);
printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 63bdb29b2549..b3cd3ebae077 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/pci_ids.h>
+#include <drm/i915_drm.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/io_apic.h>
@@ -216,6 +217,157 @@ static void __init intel_remapping_check(int num, int slot, int func)
}
+/*
+ * Systems with Intel graphics controllers set aside memory exclusively
+ * for gfx driver use. This memory is not marked in the E820 as reserved
+ * or as RAM, and so is subject to overlap from E820 manipulation later
+ * in the boot process. On some systems, MMIO space is allocated on top,
+ * despite the efforts of the "RAM buffer" approach, which simply rounds
+ * memory boundaries up to 64M to try to catch space that may decode
+ * as RAM and so is not suitable for MMIO.
+ *
+ * And yes, so far on current devices the base addr is always under 4G.
+ */
+static u32 __init intel_stolen_base(int num, int slot, int func)
+{
+ u32 base;
+
+ /*
+ * For the PCI IDs in this quirk, the stolen base is always
+ * in 0x5c, aka the BDSM register (yes that's really what
+ * it's called).
+ */
+ base = read_pci_config(num, slot, func, 0x5c);
+ base &= ~((1<<20) - 1);
+
+ return base;
+}
+
+#define KB(x) ((x) * 1024)
+#define MB(x) (KB (KB (x)))
+#define GB(x) (MB (KB (x)))
+
+static size_t __init gen3_stolen_size(int num, int slot, int func)
+{
+ size_t stolen_size;
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
+
+ switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
+ case I855_GMCH_GMS_STOLEN_1M:
+ stolen_size = MB(1);
+ break;
+ case I855_GMCH_GMS_STOLEN_4M:
+ stolen_size = MB(4);
+ break;
+ case I855_GMCH_GMS_STOLEN_8M:
+ stolen_size = MB(8);
+ break;
+ case I855_GMCH_GMS_STOLEN_16M:
+ stolen_size = MB(16);
+ break;
+ case I855_GMCH_GMS_STOLEN_32M:
+ stolen_size = MB(32);
+ break;
+ case I915_GMCH_GMS_STOLEN_48M:
+ stolen_size = MB(48);
+ break;
+ case I915_GMCH_GMS_STOLEN_64M:
+ stolen_size = MB(64);
+ break;
+ case G33_GMCH_GMS_STOLEN_128M:
+ stolen_size = MB(128);
+ break;
+ case G33_GMCH_GMS_STOLEN_256M:
+ stolen_size = MB(256);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_96M:
+ stolen_size = MB(96);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_160M:
+ stolen_size = MB(160);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_224M:
+ stolen_size = MB(224);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_352M:
+ stolen_size = MB(352);
+ break;
+ default:
+ stolen_size = 0;
+ break;
+ }
+
+ return stolen_size;
+}
+
+static size_t __init gen6_stolen_size(int num, int slot, int func)
+{
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
+ gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GMS_MASK;
+
+ return gmch_ctrl << 25; /* 32 MB units */
+}
+
+typedef size_t (*stolen_size_fn)(int num, int slot, int func);
+
+static struct pci_device_id intel_stolen_ids[] __initdata = {
+ INTEL_I915G_IDS(gen3_stolen_size),
+ INTEL_I915GM_IDS(gen3_stolen_size),
+ INTEL_I945G_IDS(gen3_stolen_size),
+ INTEL_I945GM_IDS(gen3_stolen_size),
+ INTEL_VLV_M_IDS(gen3_stolen_size),
+ INTEL_VLV_D_IDS(gen3_stolen_size),
+ INTEL_PINEVIEW_IDS(gen3_stolen_size),
+ INTEL_I965G_IDS(gen3_stolen_size),
+ INTEL_G33_IDS(gen3_stolen_size),
+ INTEL_I965GM_IDS(gen3_stolen_size),
+ INTEL_GM45_IDS(gen3_stolen_size),
+ INTEL_G45_IDS(gen3_stolen_size),
+ INTEL_IRONLAKE_D_IDS(gen3_stolen_size),
+ INTEL_IRONLAKE_M_IDS(gen3_stolen_size),
+ INTEL_SNB_D_IDS(gen6_stolen_size),
+ INTEL_SNB_M_IDS(gen6_stolen_size),
+ INTEL_IVB_M_IDS(gen6_stolen_size),
+ INTEL_IVB_D_IDS(gen6_stolen_size),
+ INTEL_HSW_D_IDS(gen6_stolen_size),
+ INTEL_HSW_M_IDS(gen6_stolen_size),
+};
+
+static void __init intel_graphics_stolen(int num, int slot, int func)
+{
+ size_t size;
+ int i;
+ u32 start;
+ u16 device, subvendor, subdevice;
+
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
+ subvendor = read_pci_config_16(num, slot, func,
+ PCI_SUBSYSTEM_VENDOR_ID);
+ subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID);
+
+ for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) {
+ if (intel_stolen_ids[i].device == device) {
+ stolen_size_fn stolen_size =
+ (stolen_size_fn)intel_stolen_ids[i].driver_data;
+ size = stolen_size(num, slot, func);
+ start = intel_stolen_base(num, slot, func);
+ if (size && start) {
+ /* Mark this space as reserved */
+ e820_add_region(start, size, E820_RESERVED);
+ sanitize_e820_map(e820.map,
+ ARRAY_SIZE(e820.map),
+ &e820.nr_map);
+ }
+ return;
+ }
+ }
+}
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -251,6 +403,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+ { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
+ QFLAG_APPLY_ONCE, intel_graphics_stolen },
{}
};
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index d15f575a861b..01d1c187c9f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -14,9 +14,11 @@
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/fixmap.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
+#include <linux/efi.h>
+#include <asm/efi.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -234,6 +236,11 @@ static int __init setup_early_printk(char *buf)
early_console_register(&early_hsu_console, keep);
}
#endif
+#ifdef CONFIG_EARLY_PRINTK_EFI
+ if (!strncmp(buf, "efi", 3))
+ early_console_register(&early_efi_console, keep);
+#endif
+
buf++;
}
return 0;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2cfbc3a3a2dd..fd1bc1b15e6d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -362,12 +362,9 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_all
need_resched:
- movl TI_flags(%ebp), %ecx # need_resched set ?
- testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ cmpl $0,PER_CPU_VAR(__preempt_count)
+ jnz restore_all
testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
call preempt_schedule_irq
@@ -1176,6 +1173,9 @@ ftrace_restore_flags:
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $__PAGE_OFFSET, %esp
+ jb ftrace_stub /* Paging not enabled yet? */
+
cmpl $0, function_trace_stop
jne ftrace_stub
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1b69951a81e2..603be7c70675 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64)
TRACE_IRQS_OFF
.endm
-ENTRY(save_rest)
- PARTIAL_FRAME 1 (REST_SKIP+8)
- movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
- movq %r11, 8(%rsp) /* return address */
- FIXUP_TOP_OF_STACK %r11, 16
- ret
- CFI_ENDPROC
-END(save_rest)
-
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
@@ -1118,10 +1103,8 @@ retint_signal:
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel)
- cmpl $0,TI_preempt_count(%rcx)
+ cmpl $0,PER_CPU_VAR(__preempt_count)
jnz retint_restore_args
- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
- jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
call preempt_schedule_irq
@@ -1357,7 +1340,7 @@ bad_gs:
.previous
/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
+ENTRY(do_softirq_own_stack)
CFI_STARTPROC
pushq_cfi %rbp
CFI_REL_OFFSET rbp,0
@@ -1374,7 +1357,7 @@ ENTRY(call_softirq)
decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
-END(call_softirq)
+END(do_softirq_own_stack)
#ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 138463a24877..c61a14a4a310 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,14 +29,14 @@ static void __init i386_default_early_setup(void)
reserve_ebda_region();
}
-void __init i386_start_kernel(void)
+asmlinkage void __init i386_start_kernel(void)
{
sanitize_boot_params(&boot_params);
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
- case X86_SUBARCH_MRST:
- x86_mrst_early_setup();
+ case X86_SUBARCH_INTEL_MID:
+ x86_intel_mid_early_setup();
break;
case X86_SUBARCH_CE4100:
x86_ce4100_early_setup();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 55b67614ed94..1be8e43b669e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
-void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5dd87a89f011..81ba27679f18 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -409,6 +409,7 @@ enable_paging:
/*
* Check if it is 486
*/
+ movb $4,X86 # at least 486
cmpl $-1,X86_CPUID
je is486
@@ -436,7 +437,6 @@ enable_paging:
movl %edx,X86_CAPABILITY
is486:
- movb $4,X86
movl $0x50022,%ecx # set AM, WP, NE and MP
movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 0fa69127209a..05fd74f537d6 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 9a5c460404dc..2e977b5d61dd 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -312,8 +312,7 @@ static void init_8259A(int auto_eoi)
*/
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
- /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
- to 0x20-0x27 on i386 */
+ /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
/* 8259A-1 (the master) has a slave on IR2 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3a8185c042a2..22d0687e7fda 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -177,7 +177,7 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -215,7 +215,7 @@ void __smp_x86_platform_ipi(void)
x86_platform_ipi_callback();
}
-void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -229,7 +229,7 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
-void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -247,7 +247,7 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
}
#endif
-void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4186755f1d7c..d7fcbedc9c43 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
-
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu)
THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(hardirq_ctx, cpu) = irqctx;
@@ -149,35 +145,21 @@ void irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
{
- unsigned long flags;
struct thread_info *curctx;
union irq_ctx *irqctx;
u32 *isp;
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- curctx = current_thread_info();
- irqctx = __this_cpu_read(softirq_ctx);
- irqctx->tinfo.task = curctx->task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- /* build the stack frame on the softirq stack */
- isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
+ curctx = current_thread_info();
+ irqctx = __this_cpu_read(softirq_ctx);
+ irqctx->tinfo.task = curctx->task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
- call_on_stack(__do_softirq, isp);
- /*
- * Shouldn't happen, we returned above if in_interrupt():
- */
- WARN_ON_ONCE(softirq_count());
- }
+ /* build the stack frame on the softirq stack */
+ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
- local_irq_restore(flags);
+ call_on_stack(__do_softirq, isp);
}
bool handle_irq(unsigned irq, struct pt_regs *regs)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d04d3ecded62..4d1c746892eb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -87,24 +87,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
generic_handle_irq_desc(irq, desc);
return true;
}
-
-
-extern void call_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- __u32 pending;
- unsigned long flags;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
- pending = local_softirq_pending();
- /* Switch to interrupt stack */
- if (pending) {
- call_softirq();
- WARN_ON_ONCE(softirq_count());
- }
- local_irq_restore(flags);
-}
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 636a55e4a13c..1de84e3ab4e0 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -22,14 +22,14 @@ static inline void __smp_irq_work_interrupt(void)
irq_work_run();
}
-void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void smp_irq_work_interrupt(struct pt_regs *regs)
{
irq_work_entering_irq();
__smp_irq_work_interrupt();
exiting_irq();
}
-void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
irq_work_entering_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 2889b3d43882..ee11b7dfbfbb 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,20 +24,71 @@ union jump_code_union {
} __attribute__((packed));
};
+static void bug_at(unsigned char *ip, int line)
+{
+ /*
+ * The location is not an op that we were expecting.
+ * Something went wrong. Crash the box, as something could be
+ * corrupting the kernel.
+ */
+ pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
+ ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+ BUG();
+}
+
static void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
- void *(*poker)(void *, const void *, size_t))
+ void *(*poker)(void *, const void *, size_t),
+ int init)
{
union jump_code_union code;
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
if (type == JUMP_LABEL_ENABLE) {
+ /*
+ * We are enabling this jump label. If it is not a nop
+ * then something must have gone wrong.
+ */
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);
- } else
+ } else {
+ /*
+ * We are disabling this jump label. If it is not what
+ * we think it is, then something must have gone wrong.
+ * If this is the first initialization call, then we
+ * are converting the default nop to the ideal nop.
+ */
+ if (init) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ } else {
+ code.jump = 0xe9;
+ code.offset = entry->target -
+ (entry->code + JUMP_LABEL_NOP_SIZE);
+ if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+ }
- (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+ /*
+ * Make text_poke_bp() a default fallback poker.
+ *
+ * At the time the change is being done, just ignore whether we
+ * are doing nop -> jump or jump -> nop transition, and assume
+ * always nop being the 'currently valid' instruction
+ *
+ */
+ if (poker)
+ (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+ else
+ text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
+ (void *)entry->code + JUMP_LABEL_NOP_SIZE);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -45,15 +96,38 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
get_online_cpus();
mutex_lock(&text_mutex);
- __jump_label_transform(entry, type, text_poke_smp);
+ __jump_label_transform(entry, type, NULL, 0);
mutex_unlock(&text_mutex);
put_online_cpus();
}
+static enum {
+ JL_STATE_START,
+ JL_STATE_NO_UPDATE,
+ JL_STATE_UPDATE,
+} jlstate __initdata_or_module = JL_STATE_START;
+
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- __jump_label_transform(entry, type, text_poke_early);
+ /*
+ * This function is called at boot up and when modules are
+ * first loaded. Check if the default nop, the one that is
+ * inserted at compile time, is the ideal nop. If it is, then
+ * we do not need to update the nop, and we can leave it as is.
+ * If it is not, then we need to update the nop to the ideal nop.
+ */
+ if (jlstate == JL_STATE_START) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+
+ if (memcmp(ideal_nop, default_nop, 5) != 0)
+ jlstate = JL_STATE_UPDATE;
+ else
+ jlstate = JL_STATE_NO_UPDATE;
+ }
+ if (jlstate == JL_STATE_UPDATE)
+ __jump_label_transform(entry, type, text_poke_early, 1);
}
#endif
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2e9d4b5af036..c6ee63f927ab 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -82,14 +82,9 @@ extern void synthesize_reljump(void *from, void *to);
extern void synthesize_relcall(void *from, void *to);
#ifdef CONFIG_OPTPROBES
-extern int arch_init_optprobes(void);
extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
#else /* !CONFIG_OPTPROBES */
-static inline int arch_init_optprobes(void)
-{
- return 0;
-}
static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
{
return 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 211bce445522..79a3f9682871 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -661,7 +661,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
/*
* Called from kretprobe_trampoline
*/
-static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
@@ -1068,7 +1068,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
int __init arch_init_kprobes(void)
{
- return arch_init_optprobes();
+ return 0;
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 76dc6f095724..898160b42e43 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -88,9 +88,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long v
*(unsigned long *)addr = val;
}
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
- asm volatile (
+asm (
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
#ifdef CONFIG_X86_64
@@ -129,7 +127,6 @@ static void __used __kprobes kprobes_optinsn_template_holder(void)
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n");
-}
#define TMPL_MOVE_IDX \
((long)&optprobe_template_val - (long)&optprobe_template_entry)
@@ -371,31 +368,6 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
return 0;
}
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
- u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
- /* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
-
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
/*
* Replace breakpoints (int3) with relative jumps.
* Caller must call with locking kprobe_mutex and text_mutex.
@@ -403,37 +375,38 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
void __kprobes arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- int c = 0;
+ u8 insn_buf[RELATIVEJUMP_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
WARN_ON(kprobe_disabled(&op->kp));
- /* Setup param */
- setup_optimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ op->optinsn.insn);
+
list_del_init(&op->list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
}
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
}
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
+ u8 insn_buf[RELATIVEJUMP_SIZE];
+
/* Set int3 to first byte for kprobes */
insn_buf[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
+ text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ op->optinsn.insn);
}
/*
@@ -444,34 +417,11 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
struct list_head *done_list)
{
struct optimized_kprobe *op, *tmp;
- int c = 0;
list_for_each_entry_safe(op, tmp, oplist, list) {
- /* Setup param */
- setup_unoptimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
+ arch_unoptimize_kprobe(op);
list_move(&op->list, done_list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
}
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3). */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
- u8 buf[RELATIVEJUMP_SIZE];
-
- /* Set int3 to first byte for kprobes */
- buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
}
int __kprobes
@@ -491,22 +441,3 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
}
return 0;
}
-
-int __kprobes arch_init_optprobes(void)
-{
- /* Allocate code buffer and parameter array */
- jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_bufs)
- return -ENOMEM;
-
- jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_params) {
- kfree(jump_poke_bufs);
- jump_poke_bufs = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32cc55b8..b2046e4d0b59 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kprobes.h>
+#include <linux/debugfs.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
WARN_ON(kvm_register_clock("primary cpu clock"));
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
+ kvm_spinlock_init();
}
static void kvm_guest_cpu_online(void *dummy)
@@ -498,11 +500,9 @@ void __init kvm_guest_init(void)
#endif
}
-static bool __init kvm_detect(void)
+static uint32_t __init kvm_detect(void)
{
- if (!kvm_para_available())
- return false;
- return true;
+ return kvm_cpuid_base();
}
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
@@ -523,3 +523,274 @@ static __init int activate_jump_labels(void)
return 0;
}
arch_initcall(activate_jump_labels);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+static void kvm_kick_cpu(int cpu)
+{
+ int apicid;
+ unsigned long flags = 0;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
+}
+
+enum kvm_contention_stat {
+ TAKEN_SLOW,
+ TAKEN_SLOW_PICKUP,
+ RELEASED_SLOW,
+ RELEASED_SLOW_KICKED,
+ NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS 30
+
+static struct kvm_spinlock_stats
+{
+ u32 contention_stats[NR_CONTENTION_STATS];
+ u32 histo_spin_blocked[HISTO_BUCKETS+1];
+ u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+ u8 ret;
+ u8 old;
+
+ old = ACCESS_ONCE(zero_stats);
+ if (unlikely(old)) {
+ ret = cmpxchg(&zero_stats, old, 0);
+ /* This ensures only one fellow resets the stat */
+ if (ret == old)
+ memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+ }
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+ check_zero();
+ spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+ return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+ unsigned index;
+
+ index = ilog2(delta);
+ check_zero();
+
+ if (index < HISTO_BUCKETS)
+ array[index]++;
+ else
+ array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+ u32 delta;
+
+ delta = sched_clock() - start;
+ __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+ spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+ d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
+ if (!d_kvm_debug)
+ printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+ return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+ struct dentry *d_kvm;
+
+ d_kvm = kvm_init_debugfs();
+ if (d_kvm == NULL)
+ return -ENOMEM;
+
+ d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+ debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+ debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW]);
+ debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+ debugfs_create_u32("released_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW]);
+ debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+ debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+ &spinlock_stats.time_blocked);
+
+ debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+ return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else /* !CONFIG_KVM_DEBUG_FS */
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+ return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+ struct arch_spinlock *lock;
+ __ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+ struct kvm_lock_waiting *w;
+ int cpu;
+ u64 start;
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ w = &__get_cpu_var(klock_waiting);
+ cpu = smp_processor_id();
+ start = spin_time_start();
+
+ /*
+ * Make sure an interrupt handler can't upset things in a
+ * partially setup state.
+ */
+ local_irq_save(flags);
+
+ /*
+ * The ordering protocol on this is that the "lock" pointer
+ * may only be set non-NULL if the "want" ticket is correct.
+ * If we're updating "want", we must first clear "lock".
+ */
+ w->lock = NULL;
+ smp_wmb();
+ w->want = want;
+ smp_wmb();
+ w->lock = lock;
+
+ add_stats(TAKEN_SLOW, 1);
+
+ /*
+ * This uses set_bit, which is atomic but we should not rely on its
+ * reordering gurantees. So barrier is needed after this call.
+ */
+ cpumask_set_cpu(cpu, &waiting_cpus);
+
+ barrier();
+
+ /*
+ * Mark entry to slowpath before doing the pickup test to make
+ * sure we don't deadlock with an unlocker.
+ */
+ __ticket_enter_slowpath(lock);
+
+ /*
+ * check again make sure it didn't become free while
+ * we weren't looking.
+ */
+ if (ACCESS_ONCE(lock->tickets.head) == want) {
+ add_stats(TAKEN_SLOW_PICKUP, 1);
+ goto out;
+ }
+
+ /*
+ * halt until it's our turn and kicked. Note that we do safe halt
+ * for irq enabled case to avoid hang when lock info is overwritten
+ * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ */
+ if (arch_irqs_disabled_flags(flags))
+ halt();
+ else
+ safe_halt();
+
+out:
+ cpumask_clear_cpu(cpu, &waiting_cpus);
+ w->lock = NULL;
+ local_irq_restore(flags);
+ spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+ int cpu;
+
+ add_stats(RELEASED_SLOW, 1);
+ for_each_cpu(cpu, &waiting_cpus) {
+ const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
+ if (ACCESS_ONCE(w->lock) == lock &&
+ ACCESS_ONCE(w->want) == ticket) {
+ add_stats(RELEASED_SLOW_KICKED, 1);
+ kvm_kick_cpu(cpu);
+ break;
+ }
+ }
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+ /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ return;
+
+ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+ pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+
+static __init int kvm_spinlock_init_jump(void)
+{
+ if (!kvm_para_available())
+ return 0;
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ return 0;
+
+ static_key_slow_inc(&paravirt_ticketlocks_enabled);
+ printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+
+ return 0;
+}
+early_initcall(kvm_spinlock_init_jump);
+
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7123b5df479d..af99f71aeb7f 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
+ uci->cpu_sig.rev = rev;
return 0;
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 88458faea2f8..05266b5aae22 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -46,7 +46,7 @@ static struct class *msr_class;
static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
mutex_lock(&inode->i_mutex);
switch (orig) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ba77ebc2c353..6fcb49ce50a1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
u64 before, delta, whole_msecs;
int remainder_ns, decimal_msecs, thishandled;
- before = local_clock();
+ before = sched_clock();
thishandled = a->handler(type, regs);
handled += thishandled;
- delta = local_clock() - before;
+ delta = sched_clock() - before;
trace_nmi_handler(a->handler, (int)delta, thishandled);
if (delta < nmi_longest_ns)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..bbb6c7316341 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
*/
#include <linux/spinlock.h>
#include <linux/module.h>
+#include <linux/jump_label.h>
#include <asm/paravirt.h>
-static inline void
-default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
- arch_spin_lock(lock);
-}
-
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
- .spin_is_locked = __ticket_spin_is_locked,
- .spin_is_contended = __ticket_spin_is_contended,
-
- .spin_lock = __ticket_spin_lock,
- .spin_lock_flags = default_spin_lock_flags,
- .spin_trylock = __ticket_spin_trylock,
- .spin_unlock = __ticket_spin_unlock,
+ .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
+ .unlock_kick = paravirt_nop,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
+struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index cd6de64cc480..1b10af835c31 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -62,11 +62,6 @@ void __init default_banner(void)
pv_info.name);
}
-/* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code) \
- extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-
/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
@@ -324,7 +319,7 @@ struct pv_time_ops pv_time_ops = {
.steal_clock = native_steal_clock,
};
-struct pv_irq_ops pv_irq_ops = {
+__visible struct pv_irq_ops pv_irq_ops = {
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -336,7 +331,7 @@ struct pv_irq_ops pv_irq_ops = {
#endif
};
-struct pv_cpu_ops pv_cpu_ops = {
+__visible struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg,
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S
new file mode 100644
index 000000000000..ca7f0d58a87d
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/asm.h>
+#include <asm/calling.h>
+
+ENTRY(___preempt_schedule)
+ CFI_STARTPROC
+ SAVE_ALL
+ call preempt_schedule
+ RESTORE_ALL
+ ret
+ CFI_ENDPROC
+
+#ifdef CONFIG_CONTEXT_TRACKING
+
+ENTRY(___preempt_schedule_context)
+ CFI_STARTPROC
+ SAVE_ALL
+ call preempt_schedule_context
+ RESTORE_ALL
+ ret
+ CFI_ENDPROC
+
+#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 83369e5a1d27..3fb8d95ab8b5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -36,7 +36,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
* The switch back from broadcast mode needs to be
* called with interrupts disabled.
*/
- local_irq_disable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
- local_irq_enable();
+ local_irq_disable();
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+ local_irq_enable();
} else
default_idle();
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8adefca71dc..c2ec1aa6d454 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(start_thread);
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
@@ -292,6 +292,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
set_iopl_mask(next->iopl);
/*
+ * If it were not for PREEMPT_ACTIVE we could guarantee that the
+ * preempt_count of all tasks was equal here and this would not be
+ * needed.
+ */
+ task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+ this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+
+ /*
* Now maybe handle debug registers and/or IO bitmaps
*/
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 05646bab4ca6..45ab4d6fc8a7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-DEFINE_PER_CPU(unsigned long, old_rsp);
+asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@@ -274,7 +274,7 @@ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
* Kprobes not supported here. Set the probe on schedule instead.
* Function graph tracer not supported too.
*/
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p);
+ /*
+ * If it were not for PREEMPT_ACTIVE we could guarantee that the
+ * preempt_count of all tasks was equal here and this would not be
+ * needed.
+ */
+ task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+ this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2cb9470ea85b..a16bae3f83b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
-static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
-
-static struct pvclock_vsyscall_time_info *
-pvclock_get_vsyscall_user_time_info(int cpu)
-{
- if (!pvclock_vdso_info) {
- BUG();
- return NULL;
- }
-
- return &pvclock_vdso_info[cpu];
-}
-
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
-{
- return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
-}
-
#ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
- void *v)
-{
- struct task_migration_notifier *mn = v;
- struct pvclock_vsyscall_time_info *pvti;
-
- pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
- /* this is NULL when pvclock vsyscall is not initialized */
- if (unlikely(pvti == NULL))
- return NOTIFY_DONE;
-
- pvti->migrate_count++;
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
- .notifier_call = pvclock_task_migrate,
-};
-
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
- pvclock_vdso_info = i;
-
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
-
- register_task_migration_notifier(&pvclock_migrate);
-
return 0;
}
#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 563ed91e6faa..da3c599584a3 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -61,7 +61,7 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_BIOS) {
reboot_type = BOOT_BIOS;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
- "BIOS", d->ident);
+ d->ident, "BIOS");
}
return 0;
}
@@ -117,7 +117,7 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_CF9) {
reboot_type = BOOT_CF9;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
- "PCI", d->ident);
+ d->ident, "PCI");
}
return 0;
}
@@ -127,7 +127,7 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_KBD) {
reboot_type = BOOT_KBD;
pr_info("%s series board detected. Selecting %s-method for reboot.\n",
- "KBD", d->ident);
+ d->ident, "KBD");
}
return 0;
}
@@ -136,228 +136,256 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
* This is a single dmi_table handling all reboot quirks.
*/
static struct dmi_system_id __initdata reboot_dmi_table[] = {
- { /* Handle problems with rebooting on Dell E520's */
- .callback = set_bios_reboot,
- .ident = "Dell E520",
+
+ /* Acer */
+ { /* Handle reboot issue on Acer Aspire one */
+ .callback = set_kbd_reboot,
+ .ident = "Acer Aspire One A110",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
- { /* Handle problems with rebooting on Dell 1300's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 1300",
+
+ /* Apple */
+ { /* Handle problems with rebooting on Apple MacBook5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBook5",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
},
},
- { /* Handle problems with rebooting on Dell 300's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 300",
+ { /* Handle problems with rebooting on Apple MacBookPro5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBookPro5",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
- .callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ { /* Handle problems with rebooting on Apple Macmini3,1 */
+ .callback = set_pci_reboot,
+ .ident = "Apple Macmini3,1",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
+ { /* Handle problems with rebooting on the iMac9,1. */
+ .callback = set_pci_reboot,
+ .ident = "Apple iMac9,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
+ },
+ },
+
+ /* ASUS */
+ { /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ .ident = "ASUS P4S800",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
- DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
+
+ /* Dell */
+ { /* Handle problems with rebooting on Dell DXP061 */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ .ident = "Dell DXP061",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
- DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
+ { /* Handle problems with rebooting on Dell E520's */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 330",
+ .ident = "Dell E520",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
- DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
- .callback = set_bios_reboot,
- .ident = "Dell OptiPlex 360",
+ { /* Handle problems with rebooting on the Latitude E5410. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E5410",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
- DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"),
},
},
- { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
- .callback = set_bios_reboot,
- .ident = "Dell OptiPlex 760",
+ { /* Handle problems with rebooting on the Latitude E5420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E5420",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
- DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
},
},
- { /* Handle problems with rebooting on Dell 2400's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 2400",
+ { /* Handle problems with rebooting on the Latitude E6320. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E6320",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
},
},
- { /* Handle problems with rebooting on Dell T5400's */
- .callback = set_bios_reboot,
- .ident = "Dell Precision T5400",
+ { /* Handle problems with rebooting on the Latitude E6420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E6420",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
},
},
- { /* Handle problems with rebooting on Dell T7400's */
+ { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
.callback = set_bios_reboot,
- .ident = "Dell Precision T7400",
+ .ident = "Dell OptiPlex 330",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
},
},
- { /* Handle problems with rebooting on HP laptops */
+ { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
.callback = set_bios_reboot,
- .ident = "HP Compaq Laptop",
+ .ident = "Dell OptiPlex 360",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
+ DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
},
},
- { /* Handle problems with rebooting on Dell XPS710 */
+ { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
.callback = set_bios_reboot,
- .ident = "Dell XPS710",
+ .ident = "Dell OptiPlex 745",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
},
},
- { /* Handle problems with rebooting on Dell DXP061 */
+ { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
.callback = set_bios_reboot,
- .ident = "Dell DXP061",
+ .ident = "Dell OptiPlex 745",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
},
},
- { /* Handle problems with rebooting on Sony VGN-Z540N */
+ { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
.callback = set_bios_reboot,
- .ident = "Sony VGN-Z540N",
+ .ident = "Dell OptiPlex 745",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
},
},
- { /* Handle problems with rebooting on ASUS P4S800 */
+ { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
.callback = set_bios_reboot,
- .ident = "ASUS P4S800",
+ .ident = "Dell OptiPlex 760",
.matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
+ DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
},
},
-
- { /* Handle reboot issue on Acer Aspire one */
- .callback = set_kbd_reboot,
- .ident = "Acer Aspire One A110",
+ { /* Handle problems with rebooting on the OptiPlex 990. */
+ .callback = set_pci_reboot,
+ .ident = "Dell OptiPlex 990",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
- DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
},
},
- { /* Handle problems with rebooting on Apple MacBook5 */
- .callback = set_pci_reboot,
- .ident = "Apple MacBook5",
+ { /* Handle problems with rebooting on Dell 300's */
+ .callback = set_bios_reboot,
+ .ident = "Dell PowerEdge 300",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
},
},
- { /* Handle problems with rebooting on Apple MacBookPro5 */
- .callback = set_pci_reboot,
- .ident = "Apple MacBookPro5",
+ { /* Handle problems with rebooting on Dell 1300's */
+ .callback = set_bios_reboot,
+ .ident = "Dell PowerEdge 1300",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
},
},
- { /* Handle problems with rebooting on Apple Macmini3,1 */
- .callback = set_pci_reboot,
- .ident = "Apple Macmini3,1",
+ { /* Handle problems with rebooting on Dell 2400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell PowerEdge 2400",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
},
},
- { /* Handle problems with rebooting on the iMac9,1. */
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
.callback = set_pci_reboot,
- .ident = "Apple iMac9,1",
+ .ident = "Dell PowerEdge C6100",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
},
},
- { /* Handle problems with rebooting on the Latitude E6320. */
+ { /* Handle problems with rebooting on the Precision M6600. */
.callback = set_pci_reboot,
- .ident = "Dell Latitude E6320",
+ .ident = "Dell Precision M6600",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
- { /* Handle problems with rebooting on the Latitude E5420. */
- .callback = set_pci_reboot,
- .ident = "Dell Latitude E5420",
+ { /* Handle problems with rebooting on Dell T5400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T5400",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
},
},
- { /* Handle problems with rebooting on the Latitude E6420. */
- .callback = set_pci_reboot,
- .ident = "Dell Latitude E6420",
+ { /* Handle problems with rebooting on Dell T7400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T7400",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
},
},
- { /* Handle problems with rebooting on the OptiPlex 990. */
- .callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ { /* Handle problems with rebooting on Dell XPS710 */
+ .callback = set_bios_reboot,
+ .ident = "Dell XPS710",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
},
},
- { /* Handle problems with rebooting on the Precision M6600. */
- .callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+
+ /* Hewlett-Packard */
+ { /* Handle problems with rebooting on HP laptops */
+ .callback = set_bios_reboot,
+ .ident = "HP Compaq Laptop",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
+ },
+ },
+
+ /* Sony */
+ { /* Handle problems with rebooting on Sony VGN-Z540N */
+ .callback = set_bios_reboot,
+ .ident = "Sony VGN-Z540N",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
},
},
+
{ }
};
@@ -511,10 +539,13 @@ static void native_machine_emergency_restart(void)
case BOOT_CF9_COND:
if (port_cf9_safe) {
- u8 cf9 = inb(0xcf9) & ~6;
+ u8 reboot_code = reboot_mode == REBOOT_WARM ?
+ 0x06 : 0x0E;
+ u8 cf9 = inb(0xcf9) & ~reboot_code;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
- outb(cf9|6, 0xcf9); /* Actually do the reset */
+ /* Actually do the reset */
+ outb(cf9|reboot_code, 0xcf9);
udelay(50);
}
reboot_type = BOOT_KBD;
@@ -526,6 +557,10 @@ static void native_machine_emergency_restart(void)
void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
+#ifdef CONFIG_X86_IO_APIC
+ disable_IO_APIC();
+#endif
+
#ifdef CONFIG_SMP
/*
* Stop all of the others. Also disable the local irq to
@@ -538,10 +573,6 @@ void native_machine_shutdown(void)
lapic_shutdown();
-#ifdef CONFIG_X86_IO_APIC
- disable_IO_APIC();
-#endif
-
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0aa29394ed6f..ca9622a25e95 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -12,7 +12,7 @@
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/time.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/rtc.h>
#ifdef CONFIG_X86_32
@@ -189,9 +189,17 @@ static __init int add_rtc_cmos(void)
return 0;
/* Intel MID platforms don't have ioport rtc */
- if (mrst_identify_cpu())
+ if (intel_mid_identify_cpu())
return -ENODEV;
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
+ /* This warning can likely go away again in a year or two. */
+ pr_info("ACPI: not registering RTC platform device\n");
+ return -ENODEV;
+ }
+#endif
+
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f8ec57815c05..918d489fa53d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -206,9 +206,9 @@ EXPORT_SYMBOL(boot_cpu_data);
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-unsigned long mmu_cr4_features;
+__visible unsigned long mmu_cr4_features;
#else
-unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -426,25 +426,23 @@ static void __init reserve_initrd(void)
static void __init parse_setup_data(void)
{
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- u32 data_len, map_len;
+ u32 data_len, map_len, data_type;
map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
(u64)sizeof(struct setup_data));
data = early_memremap(pa_data, map_len);
data_len = data->len + sizeof(struct setup_data);
- if (data_len > map_len) {
- early_iounmap(data, map_len);
- data = early_memremap(pa_data, data_len);
- map_len = data_len;
- }
+ data_type = data->type;
+ pa_next = data->next;
+ early_iounmap(data, map_len);
- switch (data->type) {
+ switch (data_type) {
case SETUP_E820_EXT:
- parse_e820_ext(data);
+ parse_e820_ext(pa_data, data_len);
break;
case SETUP_DTB:
add_dtb(pa_data);
@@ -452,8 +450,7 @@ static void __init parse_setup_data(void)
default:
break;
}
- pa_data = data->next;
- early_iounmap(data, map_len);
+ pa_data = pa_next;
}
}
@@ -996,6 +993,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
dmi_scan_machine();
+ dmi_memdev_walk();
dmi_set_dump_stack_arch_desc();
/*
@@ -1070,7 +1068,7 @@ void __init setup_arch(char **cmdline_p)
cleanup_highmap();
- memblock.current_limit = ISA_END_ADDRESS;
+ memblock_set_current_limit(ISA_END_ADDRESS);
memblock_x86_fill();
/*
@@ -1103,7 +1101,7 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
- memblock.current_limit = get_max_mapped();
+ memblock_set_current_limit(get_max_mapped());
dma_contiguous_reserve(0);
/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cf913587d4dd..9e5de6813e1f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -358,7 +358,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -423,7 +423,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -490,7 +490,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+ compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
@@ -533,7 +533,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
-unsigned long sys_sigreturn(void)
+asmlinkage unsigned long sys_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
@@ -562,7 +562,7 @@ badframe:
}
#endif /* CONFIG_X86_32 */
-long sys_rt_sigreturn(void)
+asmlinkage long sys_rt_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
@@ -728,7 +728,7 @@ static void do_signal(struct pt_regs *regs)
* notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
-void
+__visible void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
user_exit();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index cdaa347dfcad..7c3a5a61f2e4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -256,7 +256,7 @@ static inline void __smp_reschedule_interrupt(void)
scheduler_ipi();
}
-void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
__smp_reschedule_interrupt();
@@ -271,7 +271,7 @@ static inline void smp_entering_irq(void)
irq_enter();
}
-void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
/*
* Need to call irq_enter() before calling the trace point.
@@ -295,14 +295,14 @@ static inline void __smp_call_function_interrupt(void)
inc_irq_stat(irq_call_count);
}
-void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void smp_call_function_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
__smp_call_function_interrupt();
exiting_irq();
}
-void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -317,14 +317,14 @@ static inline void __smp_call_function_single_interrupt(void)
inc_irq_stat(irq_call_count);
}
-void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
-void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
smp_entering_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index aecc98a93d1b..2a165580fa16 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,11 +73,10 @@
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
-
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-
#include <asm/realmode.h>
+#include <asm/misc.h>
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -648,21 +647,46 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
+void smp_announce(void)
+{
+ int num_nodes = num_online_nodes();
+
+ printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
+ num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
+}
+
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
int node = early_cpu_to_node(cpu);
+ static int width, node_width;
+
+ if (!width)
+ width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
+
+ if (!node_width)
+ node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
+
+ if (cpu == 1)
+ printk(KERN_INFO "x86: Booting SMP configuration:\n");
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
if (current_node > (-1))
- pr_cont(" OK\n");
+ pr_cont("\n");
current_node = node;
- pr_info("Booting Node %3d, Processors ", node);
+
+ printk(KERN_INFO ".... node %*s#%d, CPUs: ",
+ node_width - num_digits(node), " ", node);
}
- pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
- return;
+
+ /* Add padding for the BSP */
+ if (cpu == 1)
+ pr_cont("%*s", width + 1, " ");
+
+ pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
+
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
node, cpu, apicid);
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index 147fcd4941c4..e9bcd57d8a9e 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -15,7 +15,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void);
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 5c7f8c20da74..4ac730b37f0b 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -4,6 +4,7 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/asm-offsets.h>
+#include <asm/syscall.h>
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
@@ -19,11 +20,9 @@
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
-typedef void (*sys_call_ptr_t)(void);
-
extern void sys_ni_syscall(void);
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c
new file mode 100644
index 000000000000..193ec2ce46c7
--- /dev/null
+++ b/arch/x86/kernel/sysfb.c
@@ -0,0 +1,74 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * Simple-Framebuffer support for x86 systems
+ * Create a platform-device for any available boot framebuffer. The
+ * simple-framebuffer platform device is already available on DT systems, so
+ * this module parses the global "screen_info" object and creates a suitable
+ * platform device compatible with the "simple-framebuffer" DT object. If
+ * the framebuffer is incompatible, we instead create a legacy
+ * "vesa-framebuffer", "efi-framebuffer" or "platform-framebuffer" device and
+ * pass the screen_info as platform_data. This allows legacy drivers
+ * to pick these devices up without messing with simple-framebuffer drivers.
+ * The global "screen_info" is still valid at all times.
+ *
+ * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer"
+ * platform devices, but only use legacy framebuffer devices for
+ * backwards compatibility.
+ *
+ * TODO: We set the dev_id field of all platform-devices to 0. This allows
+ * other x86 OF/DT parsers to create such devices, too. However, they must
+ * start at offset 1 for this to work.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static __init int sysfb_init(void)
+{
+ struct screen_info *si = &screen_info;
+ struct simplefb_platform_data mode;
+ struct platform_device *pd;
+ const char *name;
+ bool compatible;
+ int ret;
+
+ sysfb_apply_efi_quirks();
+
+ /* try to create a simple-framebuffer device */
+ compatible = parse_mode(si, &mode);
+ if (compatible) {
+ ret = create_simplefb(si, &mode);
+ if (!ret)
+ return 0;
+ }
+
+ /* if the FB is incompatible, create a legacy framebuffer device */
+ if (si->orig_video_isVGA == VIDEO_TYPE_EFI)
+ name = "efi-framebuffer";
+ else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+ name = "vesa-framebuffer";
+ else
+ name = "platform-framebuffer";
+
+ pd = platform_device_register_resndata(NULL, name, 0,
+ NULL, 0, si, sizeof(*si));
+ return IS_ERR(pd) ? PTR_ERR(pd) : 0;
+}
+
+/* must execute after PCI subsystem for EFI quirks */
+device_initcall(sysfb_init);
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
new file mode 100644
index 000000000000..b285d4e8c68e
--- /dev/null
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -0,0 +1,214 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * EFI Quirks
+ * Several EFI systems do not correctly advertise their boot framebuffers.
+ * Hence, we use this static table of known broken machines and fix up the
+ * information so framebuffer drivers can load corectly.
+ */
+
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/screen_info.h>
+#include <video/vga.h>
+#include <asm/sysfb.h>
+
+enum {
+ OVERRIDE_NONE = 0x0,
+ OVERRIDE_BASE = 0x1,
+ OVERRIDE_STRIDE = 0x2,
+ OVERRIDE_HEIGHT = 0x4,
+ OVERRIDE_WIDTH = 0x8,
+};
+
+struct efifb_dmi_info efifb_dmi_list[] = {
+ [M_I17] = { "i17", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, /* guess */
+ [M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE },
+ [M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, /* guess */
+ [M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080, OVERRIDE_NONE },
+ [M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440, OVERRIDE_NONE },
+ [M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768, OVERRIDE_NONE },
+ [M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768, OVERRIDE_NONE },
+ [M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ /* 11" Macbook Air 3,1 passes the wrong stride */
+ [M_MBA_3] = { "mba3", 0, 2048 * 4, 0, 0, OVERRIDE_STRIDE },
+ [M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_2] = { "mbp2", 0, 0, 0, 0, OVERRIDE_NONE }, /* placeholder */
+ [M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050, OVERRIDE_NONE },
+ [M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MBP_8_2] = { "mbp82", 0x90010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
+};
+
+#define choose_value(dmivalue, fwvalue, field, flags) ({ \
+ typeof(fwvalue) _ret_ = fwvalue; \
+ if ((flags) & (field)) \
+ _ret_ = dmivalue; \
+ else if ((fwvalue) == 0) \
+ _ret_ = dmivalue; \
+ _ret_; \
+ })
+
+static int __init efifb_set_system(const struct dmi_system_id *id)
+{
+ struct efifb_dmi_info *info = id->driver_data;
+
+ if (info->base == 0 && info->height == 0 && info->width == 0 &&
+ info->stride == 0)
+ return 0;
+
+ /* Trust the bootloader over the DMI tables */
+ if (screen_info.lfb_base == 0) {
+#if defined(CONFIG_PCI)
+ struct pci_dev *dev = NULL;
+ int found_bar = 0;
+#endif
+ if (info->base) {
+ screen_info.lfb_base = choose_value(info->base,
+ screen_info.lfb_base, OVERRIDE_BASE,
+ info->flags);
+
+#if defined(CONFIG_PCI)
+ /* make sure that the address in the table is actually
+ * on a VGA device's PCI BAR */
+
+ for_each_pci_dev(dev) {
+ int i;
+ if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+ continue;
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ resource_size_t start, end;
+
+ start = pci_resource_start(dev, i);
+ if (start == 0)
+ break;
+ end = pci_resource_end(dev, i);
+ if (screen_info.lfb_base >= start &&
+ screen_info.lfb_base < end) {
+ found_bar = 1;
+ }
+ }
+ }
+ if (!found_bar)
+ screen_info.lfb_base = 0;
+#endif
+ }
+ }
+ if (screen_info.lfb_base) {
+ screen_info.lfb_linelength = choose_value(info->stride,
+ screen_info.lfb_linelength, OVERRIDE_STRIDE,
+ info->flags);
+ screen_info.lfb_width = choose_value(info->width,
+ screen_info.lfb_width, OVERRIDE_WIDTH,
+ info->flags);
+ screen_info.lfb_height = choose_value(info->height,
+ screen_info.lfb_height, OVERRIDE_HEIGHT,
+ info->flags);
+ if (screen_info.orig_video_isVGA == 0)
+ screen_info.orig_video_isVGA = VIDEO_TYPE_EFI;
+ } else {
+ screen_info.lfb_linelength = 0;
+ screen_info.lfb_width = 0;
+ screen_info.lfb_height = 0;
+ screen_info.orig_video_isVGA = 0;
+ return 0;
+ }
+
+ printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x "
+ "(%dx%d, stride %d)\n", id->ident,
+ screen_info.lfb_base, screen_info.lfb_width,
+ screen_info.lfb_height, screen_info.lfb_linelength);
+
+ return 1;
+}
+
+#define EFIFB_DMI_SYSTEM_ID(vendor, name, enumid) \
+ { \
+ efifb_set_system, \
+ name, \
+ { \
+ DMI_MATCH(DMI_BIOS_VENDOR, vendor), \
+ DMI_MATCH(DMI_PRODUCT_NAME, name) \
+ }, \
+ &efifb_dmi_list[enumid] \
+ }
+
+static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac5,1", M_I20),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook2,1", M_MB),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir3,1", M_MBA_3),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro8,2", M_MBP_8_2),
+ {},
+};
+
+__init void sysfb_apply_efi_quirks(void)
+{
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+ !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+ dmi_check_system(efifb_dmi_system_table);
+}
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
new file mode 100644
index 000000000000..86179d409893
--- /dev/null
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -0,0 +1,95 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * simple-framebuffer probing
+ * Try to convert "screen_info" into a "simple-framebuffer" compatible mode.
+ * If the mode is incompatible, we return "false" and let the caller create
+ * legacy nodes instead.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static const char simplefb_resname[] = "BOOTFB";
+static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
+
+/* try parsing x86 screen_info into a simple-framebuffer mode struct */
+__init bool parse_mode(const struct screen_info *si,
+ struct simplefb_platform_data *mode)
+{
+ const struct simplefb_format *f;
+ __u8 type;
+ unsigned int i;
+
+ type = si->orig_video_isVGA;
+ if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(formats); ++i) {
+ f = &formats[i];
+ if (si->lfb_depth == f->bits_per_pixel &&
+ si->red_size == f->red.length &&
+ si->red_pos == f->red.offset &&
+ si->green_size == f->green.length &&
+ si->green_pos == f->green.offset &&
+ si->blue_size == f->blue.length &&
+ si->blue_pos == f->blue.offset &&
+ si->rsvd_size == f->transp.length &&
+ si->rsvd_pos == f->transp.offset) {
+ mode->format = f->name;
+ mode->width = si->lfb_width;
+ mode->height = si->lfb_height;
+ mode->stride = si->lfb_linelength;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+__init int create_simplefb(const struct screen_info *si,
+ const struct simplefb_platform_data *mode)
+{
+ struct platform_device *pd;
+ struct resource res;
+ unsigned long len;
+
+ /* don't use lfb_size as it may contain the whole VMEM instead of only
+ * the part that is occupied by the framebuffer */
+ len = mode->height * mode->stride;
+ len = PAGE_ALIGN(len);
+ if (len > (u64)si->lfb_size << 16) {
+ printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
+ return -EINVAL;
+ }
+
+ /* setup IORESOURCE_MEM as framebuffer memory */
+ memset(&res, 0, sizeof(res));
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res.name = simplefb_resname;
+ res.start = si->lfb_base;
+ res.end = si->lfb_base + len - 1;
+ if (res.end <= res.start)
+ return -EINVAL;
+
+ pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0,
+ &res, 1, mode, sizeof(*mode));
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ return 0;
+}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index addf7b58f4e8..91a4496db434 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -301,6 +301,15 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
return 0;
}
+static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
+{
+ if (!tboot_enabled())
+ return 0;
+
+ pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+ return -ENODEV;
+}
+
static atomic_t ap_wfs_count;
static int tboot_wait_for_aps(int num_aps)
@@ -422,6 +431,7 @@ static __init int tboot_late_init(void)
#endif
acpi_os_set_prepare_sleep(&tboot_sleep);
+ acpi_os_set_prepare_extended_sleep(&tboot_extended_sleep);
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1b23a1c92746..729aa779ff75 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,6 +58,7 @@
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
+#include <asm/alternative.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -87,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs)
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- inc_preempt_count();
+ preempt_count_inc();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -102,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- dec_preempt_count();
+ preempt_count_dec();
}
static int __kprobes
@@ -327,6 +328,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
ftrace_int3_handler(regs))
return;
#endif
+ if (poke_int3_handler(regs))
+ return;
+
prev_state = exception_enter();
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6ff49247edf8..930e5d48f560 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);
+int check_tsc_disabled(void)
+{
+ return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 10c4f3006afd..da6b35a98260 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -199,6 +199,15 @@ SECTIONS
__x86_cpu_dev_end = .;
}
+#ifdef CONFIG_X86_INTEL_MID
+ .x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
+ LOAD_OFFSET) {
+ __x86_intel_mid_dev_start = .;
+ *(.x86_intel_mid_dev.init)
+ __x86_intel_mid_dev_end = .;
+ }
+#endif
+
/*
* start address and size of operations which during runtime
* can be patched with virtualization friendly instructions or
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b014d9414d08..040681928e9d 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index);
#endif
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 5f24c71accaa..8ce0072cd700 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -107,6 +107,8 @@ struct x86_platform_ops x86_platform = {
};
EXPORT_SYMBOL_GPL(x86_platform);
+
+#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
.compose_msi_msg = native_compose_msi_msg,
@@ -116,6 +118,28 @@ struct x86_msi_ops x86_msi = {
.setup_hpet_msi = default_setup_hpet_msi,
};
+/* MSI arch specific hooks */
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+ x86_msi.teardown_msi_irqs(dev);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+ x86_msi.teardown_msi_irq(irq);
+}
+
+void arch_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ x86_msi.restore_msi_irqs(dev, irq);
+}
+#endif
+
struct x86_io_apic_ops x86_io_apic_ops = {
.init = native_io_apic_init_mappings,
.read = native_io_apic_read,