147 files changed, 2951 insertions, 1566 deletions
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 10dcec7e7321..f7b84aced654 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -43,8 +43,8 @@
 
 	pmu {
 		compatible = "arm,cortex-a9-pmu";
-		interrupts = <0 8 0x04
-			      0 9 0x04>;
+		interrupts = <0 6 0x04
+			      0 7 0x04>;
 	};
 
 	L2: l2-cache {
@@ -119,8 +119,8 @@
 		gmac0: eth@e2000000 {
 			compatible = "st,spear600-gmac";
 			reg = <0xe2000000 0x8000>;
-			interrupts = <0 23 0x4
-				      0 24 0x4>;
+			interrupts = <0 33 0x4
+				      0 34 0x4>;
 			interrupt-names = "macirq", "eth_wake_irq";
 			status = "disabled";
 		};
@@ -202,6 +202,7 @@
 			kbd@e0300000 {
 				compatible = "st,spear300-kbd";
 				reg = <0xe0300000 0x1000>;
+				interrupts = <0 52 0x4>;
 				status = "disabled";
 			};
 
@@ -224,7 +225,7 @@
 			serial@e0000000 {
 				compatible = "arm,pl011", "arm,primecell";
 				reg = <0xe0000000 0x1000>;
-				interrupts = <0 36 0x4>;
+				interrupts = <0 35 0x4>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index c13fd1f3b09f..e4e912f95024 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -15,8 +15,8 @@
 /include/ "spear320.dtsi"
 
 / {
-	model = "ST SPEAr300 Evaluation Board";
-	compatible = "st,spear300-evb", "st,spear300";
+	model = "ST SPEAr320 Evaluation Board";
+	compatible = "st,spear320-evb", "st,spear320";
 	#address-cells = <1>;
 	#size-cells = <1>;
 
@@ -26,7 +26,7 @@
 
 	ahb {
 		pinmux@b3000000 {
-			st,pinmux-mode = <3>;
+			st,pinmux-mode = <4>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&state_default>;
 
diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index 089f0a42c50e..a3c36e47d7ef 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi
@@ -181,6 +181,7 @@
 			timer@f0000000 {
 				compatible = "st,spear-timer";
 				reg = <0xf0000000 0x400>;
+				interrupt-parent = <&vic0>;
 				interrupts = <16>;
 			};
 		};
diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index 0f41bd1c47c3..66db5f13af84 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -87,7 +87,7 @@ void __init spear3xx_map_io(void)
 
 static void __init spear3xx_timer_init(void)
 {
-	char pclk_name[] = "pll3_48m_clk";
+	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
 
 	spear3xx_clk_init();
diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c
index 2e2e3596583e..9af67d003c62 100644
--- a/arch/arm/mach-spear6xx/spear6xx.c
+++ b/arch/arm/mach-spear6xx/spear6xx.c
@@ -423,7 +423,7 @@ void __init spear6xx_map_io(void)
 
 static void __init spear6xx_timer_init(void)
 {
-	char pclk_name[] = "pll3_48m_clk";
+	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
 
 	spear6xx_clk_init();
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index f7264621e58d..149fbefc1a4d 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,9 +180,7 @@ void __cpuinit start_secondary(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	local_irq_enable();
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1113b8aba07f..963d2db53bfa 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -382,7 +382,6 @@ smp_callin (void)
 	set_numa_node(cpu_to_node_map[cpuid]);
 	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
 
-	ipi_call_lock_irq();
 	spin_lock(&vector_lock);
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
@@ -390,7 +389,6 @@ smp_callin (void)
 	set_cpu_online(cpuid, true);
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
-	ipi_call_unlock_irq();
 
 	smp_setup_percpu_timer();
 
diff --git a/arch/m32r/boot/compressed/Makefile b/arch/m32r/boot/compressed/Makefile
index 177716b1d613..01729c2979ba 100644
--- a/arch/m32r/boot/compressed/Makefile
+++ b/arch/m32r/boot/compressed/Makefile
@@ -43,9 +43,9 @@ endif
 
 OBJCOPYFLAGS += -R .empty_zero_page
 
-suffix_$(CONFIG_KERNEL_GZIP)	= gz
-suffix_$(CONFIG_KERNEL_BZIP2)	= bz2
-suffix_$(CONFIG_KERNEL_LZMA)	= lzma
+suffix-$(CONFIG_KERNEL_GZIP)	= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	= bz2
+suffix-$(CONFIG_KERNEL_LZMA)	= lzma
 
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
 	$(call if_changed,ld)
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 370d60881977..28a09529f206 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -28,7 +28,7 @@ static unsigned long free_mem_ptr;
 static unsigned long free_mem_end_ptr;
 
 #ifdef CONFIG_KERNEL_BZIP2
-static void *memset(void *s, int c, size_t n)
+void *memset(void *s, int c, size_t n)
 {
 	char *ss = s;
 
@@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n)
 #endif
 
 #ifdef CONFIG_KERNEL_GZIP
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	char *d = dest;
+	const char *s = src;
+	while (n--)
+		*d++ = *s++;
+
+	return dest;
+}
+
 #define BOOT_HEAP_SIZE             0x10000
 #include "../../../../lib/decompress_inflate.c"
 #endif
diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h
index 527527584dd0..4313aa62b51b 100644
--- a/arch/m32r/include/asm/ptrace.h
+++ b/arch/m32r/include/asm/ptrace.h
@@ -113,9 +113,6 @@ struct pt_regs {
 
 #define PTRACE_OLDSETOPTIONS	21
 
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD	0x00000001
-
 #ifdef __KERNEL__
 
 #include <asm/m32r.h>		/* M32R_PSW_BSM, M32R_PSW_BPM */
diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h
index cf7829a61551..c689b828dfe2 100644
--- a/arch/m32r/include/asm/smp.h
+++ b/arch/m32r/include/asm/smp.h
@@ -79,11 +79,6 @@ static __inline__ int cpu_number_map(int cpu)
 	return cpu;
 }
 
-static __inline__ unsigned int num_booting_cpus(void)
-{
-	return cpumask_weight(&cpu_callout_map);
-}
-
 extern void smp_send_timer(void);
 extern unsigned long send_IPI_mask_phys(const cpumask_t*, int, int);
 
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 4c03361537aa..51f5e9aa4901 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child)
 
 	if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0)
 	    != sizeof(insn))
-		return -EIO;
+		return;
 
 	compute_next_pc(insn, pc, &next_pc, child);
 	if (next_pc & 0x80000000)
-		return -EIO;
+		return;
 
 	if (embed_debug_trap(child, next_pc))
-		return -EIO;
+		return;
 
 	invalidate_cache();
-	return 0;
 }
 
 void user_disable_single_step(struct task_struct *child)
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index f3fb2c029cfc..d0f60b97bbc5 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 			case -ERESTARTNOINTR:
 				regs->r0 = regs->orig_r0;
 				if (prev_insn(regs) < 0)
-					return -EFAULT;
+					return;
 		}
 	}
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 09ab87ee6fef..b3e10fdd3898 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -288,6 +288,7 @@ config MIPS_MALTA
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_HAS_CPU_MIPS64_R1
+	select SYS_HAS_CPU_MIPS64_R2
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_CPU_RM7000
 	select SYS_HAS_EARLY_PRINTK
@@ -1423,6 +1424,7 @@ config CPU_SB1
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
+	select ARCH_SPARSEMEM_ENABLE
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_SMP
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index 6210b8d84109..b311be45a720 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -21,6 +21,7 @@ config BCM47XX_BCMA
 	select BCMA
 	select BCMA_HOST_SOC
 	select BCMA_DRIVER_MIPS
+	select BCMA_HOST_PCI if PCI
 	select BCMA_DRIVER_PCI_HOSTMODE if PCI
 	default y
 	help
diff --git a/arch/mips/bcm63xx/dev-pcmcia.c b/arch/mips/bcm63xx/dev-pcmcia.c
index de4d917fd54d..a551bab5ecb9 100644
--- a/arch/mips/bcm63xx/dev-pcmcia.c
+++ b/arch/mips/bcm63xx/dev-pcmcia.c
@@ -79,11 +79,11 @@ static int __init config_pcmcia_cs(unsigned int cs,
 	return ret;
 }
 
-static const __initdata struct {
+static const struct {
 	unsigned int	cs;
 	unsigned int	base;
 	unsigned int	size;
-} pcmcia_cs[3] = {
+} pcmcia_cs[3] __initconst = {
 	{
 		.cs	= MPI_CS_PCMCIA_COMMON,
 		.base	= BCM_PCMCIA_COMMON_BASE_PA,
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index f9e275a50d98..2f4f6d5e05b6 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -82,10 +82,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
 	help
 	  Lock the kernel's implementation of memcpy() into L2.
 
-config ARCH_SPARSEMEM_ENABLE
-	def_bool y
-	select SPARSEMEM_STATIC
-
 config IOMMU_HELPER
 	bool
 
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4b93048044eb..ee1fb9f7f517 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -185,7 +185,6 @@ static void __cpuinit octeon_init_secondary(void)
 	octeon_init_cvmcount();
 
 	octeon_irq_setup_secondary();
-	raw_local_irq_enable();
 }
 
 /**
@@ -233,6 +232,7 @@ static void octeon_smp_finish(void)
 
 	/* to generate the first CPU timer interrupt */
 	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+	local_irq_enable();
 }
 
 /**
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 2e1ad4c652b7..82ad35ce2b45 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -17,7 +17,6 @@
 #include <linux/irqflags.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
-#include <asm/bug.h>
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/cpu-features.h>
 #include <asm/sgidefs.h>
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 285a41fa0b18..eee10dc07ac1 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_CMPXCHG_H
 #define __ASM_CMPXCHG_H
 
+#include <linux/bug.h>
 #include <linux/irqflags.h>
 #include <asm/war.h>
 
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index f9fa2a479dd0..95e40c1e8ed1 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -94,6 +94,7 @@
 #define PRID_IMP_24KE		0x9600
 #define PRID_IMP_74K		0x9700
 #define PRID_IMP_1004K		0x9900
+#define PRID_IMP_M14KC		0x9c00
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE
@@ -260,12 +261,12 @@ enum cpu_type_enum {
 	 */
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
-	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC,
+	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_M14KC,
 
 	/*
 	 * MIPS64 class processors
 	 */
-	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
+	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
 	CPU_XLR, CPU_XLP,
 
@@ -288,7 +289,7 @@ enum cpu_type_enum {
 #define MIPS_CPU_ISA_M64R2	0x00000100
 
 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_I | MIPS_CPU_ISA_II | \
-	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 )
+	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2)
 #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \
 	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)
 
diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h
index 86548da650e7..991b659e2548 100644
--- a/arch/mips/include/asm/gic.h
+++ b/arch/mips/include/asm/gic.h
@@ -206,7 +206,7 @@
 
 #define GIC_VPE_EIC_SHADOW_SET_BASE	0x0100
 #define GIC_VPE_EIC_SS(intr) \
-	(GIC_EIC_SHADOW_SET_BASE + (4 * intr))
+	(GIC_VPE_EIC_SHADOW_SET_BASE + (4 * intr))
 
 #define GIC_VPE_EIC_VEC_BASE		0x0800
 #define GIC_VPE_EIC_VEC(intr) \
@@ -330,6 +330,17 @@ struct gic_intr_map {
 #define GIC_FLAG_TRANSPARENT   0x02
 };
 
+/*
+ * This is only used in EIC mode. This helps to figure out which
+ * shared interrupts we need to process when we get a vector interrupt.
+ */
+#define GIC_MAX_SHARED_INTR  0x5
+struct gic_shared_intr_map {
+	unsigned int num_shared_intr;
+	unsigned int intr_list[GIC_MAX_SHARED_INTR];
+	unsigned int local_intr_mask;
+};
+
 extern void gic_init(unsigned long gic_base_addr,
 	unsigned long gic_addrspace_size, struct gic_intr_map *intrmap,
 	unsigned int intrmap_size, unsigned int irqbase);
@@ -338,5 +349,7 @@ extern unsigned int gic_get_int(void);
 extern void gic_send_ipi(unsigned int intr);
 extern unsigned int plat_ipi_call_int_xlate(unsigned int);
 extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
+extern void gic_bind_eic_interrupt(int irq, int set);
+extern unsigned int gic_get_timer_pending(void);
 
 #endif /* _ASM_GICREGS_H */
diff --git a/arch/mips/include/asm/inst.h b/arch/mips/include/asm/inst.h
index 7ebfc392e58d..ab84064283db 100644
--- a/arch/mips/include/asm/inst.h
+++ b/arch/mips/include/asm/inst.h
@@ -251,7 +251,7 @@ struct f_format {	/* FPU register format */
 	unsigned int func : 6;
 };
 
-struct ma_format {	/* FPU multipy and add format (MIPS IV) */
+struct ma_format {	/* FPU multiply and add format (MIPS IV) */
 	unsigned int opcode : 6;
 	unsigned int fr : 5;
 	unsigned int ft : 5;
@@ -324,7 +324,7 @@ struct f_format {	/* FPU register format */
 	unsigned int opcode : 6;
 };
 
-struct ma_format {	/* FPU multipy and add format (MIPS IV) */
+struct ma_format {	/* FPU multiply and add format (MIPS IV) */
 	unsigned int fmt : 2;
 	unsigned int func : 4;
 	unsigned int fd : 5;
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index a58f22998a86..29d9c23c20c7 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
+#include <asm/bug.h>
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index fb698dc09bc9..78dbb8a86da2 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -136,6 +136,7 @@ extern void free_irqno(unsigned int irq);
  * IE7.  Since R2 their number has to be read from the c0_intctl register.
  */
 #define CP0_LEGACY_COMPARE_IRQ 7
+#define CP0_LEGACY_PERFCNT_IRQ 7
 
 extern int cp0_compare_irq;
 extern int cp0_compare_irq_shift;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 94d4faad29a1..fdcd78ca1b03 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -99,7 +99,7 @@
 #define CKCTL_6368_USBH_CLK_EN		(1 << 15)
 #define CKCTL_6368_DISABLE_GLESS_EN	(1 << 16)
 #define CKCTL_6368_NAND_CLK_EN		(1 << 17)
-#define CKCTL_6368_IPSEC_CLK_EN		(1 << 17)
+#define CKCTL_6368_IPSEC_CLK_EN		(1 << 18)
 
 #define CKCTL_6368_ALL_SAFE_EN		(CKCTL_6368_SWPKT_USB_EN |	\
 					CKCTL_6368_SWPKT_SAR_EN |	\
diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h
index d11aa02a956a..5447d9fc4219 100644
--- a/arch/mips/include/asm/mips-boards/maltaint.h
+++ b/arch/mips/include/asm/mips-boards/maltaint.h
@@ -86,6 +86,16 @@
 #define GIC_CPU_INT4		4 /* .			*/
 #define GIC_CPU_INT5		5 /* Core Interrupt 5   */
 
+/* MALTA GIC local interrupts */
+#define GIC_INT_TMR             (GIC_CPU_INT5)
+#define GIC_INT_PERFCTR         (GIC_CPU_INT5)
+
+/* GIC constants */
+/* Add 2 to convert non-eic hw int # to eic vector # */
+#define GIC_CPU_TO_VEC_OFFSET   (2)
+/* If we map an intr to pin X, GIC will actually generate vector X+1 */
+#define GIC_PIN_TO_VEC_OFFSET   (1)
+
 #define GIC_EXT_INTR(x)		x
 
 /* External Interrupts used for IPI */
diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h
index c9420aa97e32..e71ff4c317f2 100644
--- a/arch/mips/include/asm/mipsmtregs.h
+++ b/arch/mips/include/asm/mipsmtregs.h
@@ -48,7 +48,7 @@
 #define CP0_VPECONF0		$1, 2
 #define CP0_VPECONF1		$1, 3
 #define CP0_YQMASK		$1, 4
-#define CP0_VPESCHEDULE	$1, 5
+#define CP0_VPESCHEDULE		$1, 5
 #define CP0_VPESCHEFBK		$1, 6
 #define CP0_TCSTATUS		$2, 1
 #define CP0_TCBIND		$2, 2
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 5d33621b5658..4f8ddba8c360 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -22,7 +22,7 @@ struct task_struct;
  * switch_to(n) should switch tasks to task nr n, first
  * checking that n isn't the current task, in which case it does nothing.
  */
-extern asmlinkage void *resume(void *last, void *next, void *next_ti);
+extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu);
 
 extern unsigned int ll_bit;
 extern struct task_struct *ll_task;
@@ -66,11 +66,13 @@ do {									\
 
 #define switch_to(prev, next, last)					\
 do {									\
+	u32 __usedfpu;							\
 	__mips_mt_fpaff_switch_to(prev);				\
 	if (cpu_has_dsp)						\
 		__save_dsp(prev);					\
 	__clear_software_ll_bit();					\
-	(last) = resume(prev, next, task_thread_info(next));		\
+	__usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU);	\
+	(last) = resume(prev, next, task_thread_info(next), __usedfpu);	\
 } while (0)
 
 #define finish_arch_switch(prev)					\
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index e2eca7d10598..ca97e0ecb64b 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -60,6 +60,8 @@ struct thread_info {
 register struct thread_info *__current_thread_info __asm__("$28");
 #define current_thread_info()  __current_thread_info
 
+#endif /* !__ASSEMBLY__ */
+
 /* thread information allocation */
 #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
 #define THREAD_SIZE_ORDER (1)
@@ -85,8 +87,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#endif /* !__ASSEMBLY__ */
-
 #define PREEMPT_ACTIVE		0x10000000
 
 /*
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6ae7ce4ac63e..f4630e1082ab 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -4,7 +4,7 @@
  * Copyright (C) xxxx  the Anonymous
  * Copyright (C) 1994 - 2006 Ralf Baechle
  * Copyright (C) 2003, 2004  Maciej W. Rozycki
- * Copyright (C) 2001, 2004  MIPS Inc.
+ * Copyright (C) 2001, 2004, 2011, 2012  MIPS Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -199,6 +199,7 @@ void __init check_wait(void)
 		cpu_wait = rm7k_wait_irqoff;
 		break;
 
+	case CPU_M14KC:
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_1004K:
@@ -810,6 +811,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_5KC;
 		__cpu_name[cpu] = "MIPS 5Kc";
 		break;
+	case PRID_IMP_5KE:
+		c->cputype = CPU_5KE;
+		__cpu_name[cpu] = "MIPS 5KE";
+		break;
 	case PRID_IMP_20KC:
 		c->cputype = CPU_20KC;
 		__cpu_name[cpu] = "MIPS 20Kc";
@@ -831,6 +836,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_74K;
 		__cpu_name[cpu] = "MIPS 74Kc";
 		break;
+	case PRID_IMP_M14KC:
+		c->cputype = CPU_M14KC;
+		__cpu_name[cpu] = "MIPS M14Kc";
+		break;
 	case PRID_IMP_1004K:
 		c->cputype = CPU_1004K;
 		__cpu_name[cpu] = "MIPS 1004Kc";
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 57ba13edb03a..3fc1691110dc 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05 by Ralf Baechle
+ * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05, 12 by Ralf Baechle
  * Copyright (C) 1999, 2000, 01 Silicon Graphics, Inc.
  */
 #include <linux/interrupt.h>
@@ -35,6 +35,12 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(kernel_thread);
 
 /*
+ * Functions that operate on entire pages.  Mostly used by memory management.
+ */
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+/*
  * Userspace access stuff.
  */
 EXPORT_SYMBOL(__copy_user);
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index ce89c8061708..0441f54b2a6a 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -31,7 +31,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti)
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 	.align	7
 	LEAF(resume)
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index f29099b104c4..eb5e394a4650 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -162,11 +162,6 @@ static unsigned int counters_total_to_per_cpu(unsigned int counters)
 	return counters >> vpe_shift();
 }
 
-static unsigned int counters_per_cpu_to_total(unsigned int counters)
-{
-	return counters << vpe_shift();
-}
-
 #else /* !CONFIG_MIPS_MT_SMP */
 #define vpe_id()	0
 
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 293898391e67..9c51be5a163a 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -43,7 +43,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti) )
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 LEAF(resume)
 	mfc0	t1, CP0_STATUS
@@ -51,18 +51,9 @@ LEAF(resume)
 	cpu_save_nonscratch a0
 	sw	ra, THREAD_REG31(a0)
 
-	/*
-	 * check if we need to save FPU registers
-	 */
-	lw	t3, TASK_THREAD_INFO(a0)
-	lw	t0, TI_FLAGS(t3)
-	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
-	nor	t1, zero, t1
+	beqz	a3, 1f
 
-	and	t0, t0, t1
-	sw	t0, TI_FLAGS(t3)
+	PTR_L	t3, TASK_THREAD_INFO(a0)
 
 	/*
 	 * clear saved user stack CU1 bit
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 9414f9354469..42d2a3938420 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -41,7 +41,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti)
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 	.align	5
 	LEAF(resume)
@@ -53,16 +53,10 @@
 	/*
 	 * check if we need to save FPU registers
 	 */
-	PTR_L	t3, TASK_THREAD_INFO(a0)
-	LONG_L	t0, TI_FLAGS(t3)
-	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
-	nor	t1, zero, t1
 
-	and	t0, t0, t1
-	LONG_S	t0, TI_FLAGS(t3)
+	beqz    a3, 1f
 
+	PTR_L	t3, TASK_THREAD_INFO(a0)
 	/*
 	 * clear saved user stack CU1 bit
 	 */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 3046e2986006..8e393b8443f7 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -15,7 +15,6 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/reboot.h>
@@ -197,13 +196,6 @@ static void bmips_init_secondary(void)
 
 	write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));
 #endif
-
-	/* make sure there won't be a timer interrupt for a little while */
-	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
-
-	irq_enable_hazard();
-	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE);
-	irq_enable_hazard();
 }
 
 /*
@@ -212,6 +204,13 @@ static void bmips_init_secondary(void)
 static void bmips_smp_finish(void)
 {
 	pr_info("SMP: CPU%d is running\n", smp_processor_id());
+
+	/* make sure there won't be a timer interrupt for a little while */
+	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+
+	irq_enable_hazard();
+	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE);
+	irq_enable_hazard();
 }
 
 /*
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 48650c818040..1268392f1d27 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -122,13 +122,21 @@ asmlinkage __cpuinit void start_secondary(void)
 
 	notify_cpu_starting(cpu);
 
-	mp_ops->smp_finish();
+	set_cpu_online(cpu, true);
+
 	set_cpu_sibling_map(cpu);
 
 	cpu_set(cpu, cpu_callin_map);
 
 	synchronise_count_slave();
 
+	/*
+	 * irq will be enabled in ->smp_finish(), enabling it too early
+	 * is dangerous.
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
+	mp_ops->smp_finish();
+
 	cpu_idle();
 }
 
@@ -196,8 +204,6 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	while (!cpu_isset(cpu, cpu_callin_map))
 		udelay(100);
 
-	set_cpu_online(cpu, true);
-
 	return 0;
 }
 
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index f5dd38f1d015..15b5f3cfd20c 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -322,7 +322,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
 
 /*
  * Common setup before any secondaries are started
- * Make sure all CPU's are in a sensible state before we boot any of the
+ * Make sure all CPUs are in a sensible state before we boot any of the
  * secondaries.
  *
  * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly
@@ -340,12 +340,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 	/*
 	 * TCContext gets an offset from the base of the IPIQ array
 	 * to be used in low-level code to detect the presence of
-	 * an active IPI queue
+	 * an active IPI queue.
 	 */
 	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
-	/* In general, all TCs should have the same cpu_data indications */
+	/* In general, all TCs should have the same cpu_data indications. */
 	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
 	/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */
 	if (cpu_data[0].cputype == CPU_34K ||
@@ -358,8 +358,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 }
 
 /*
- * Tweak to get Count registes in as close a sync as possible.
- * Value seems good for 34K-class cores.
+ * Tweak to get Count registes in as close a sync as possible.  The
+ * value seems good for 34K-class cores.
  */
 
 #define CP0_SKEW 8
@@ -615,7 +615,6 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 
 void smtc_init_secondary(void)
 {
-	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
@@ -631,6 +630,8 @@ void smtc_smp_finish(void)
 	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
 		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
 
+	local_irq_enable();
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index 99f913c8d7a6..842d55e411fd 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -111,7 +111,6 @@ void __cpuinit synchronise_count_master(void)
 void __cpuinit synchronise_count_slave(void)
 {
 	int i;
-	unsigned long flags;
 	unsigned int initcount;
 	int ncpus;
 
@@ -123,8 +122,6 @@ void __cpuinit synchronise_count_slave(void)
 	return;
 #endif
 
-	local_irq_save(flags);
-
 	/*
 	 * Not every cpu is online at the time this gets called,
 	 * so we first wait for the master to say everyone is ready
@@ -154,7 +151,5 @@ void __cpuinit synchronise_count_slave(void)
 	}
 	/* Arrange for an interrupt in a short while */
 	write_c0_compare(read_c0_count() + COUNTON);
-
-	local_irq_restore(flags);
 }
 #undef NR_LOOPS
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 2d0c2a277f52..c3c293543703 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -132,6 +132,9 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)
 	unsigned long ra = regs->regs[31];
 	unsigned long pc = regs->cp0_epc;
 
+	if (!task)
+		task = current;
+
 	if (raw_show_trace || !__kernel_text_address(pc)) {
 		show_raw_backtrace(sp);
 		return;
@@ -1249,6 +1252,7 @@ static inline void parity_protection_init(void)
 		break;
 
 	case CPU_5KC:
+	case CPU_5KE:
 		write_c0_ecc(0x80000000);
 		back_to_back_c0_hazard();
 		/* Set the PE bit (bit 31) in the c0_errctl register. */
@@ -1498,6 +1502,7 @@ extern void flush_tlb_handlers(void);
  * Timer interrupt
  */
 int cp0_compare_irq;
+EXPORT_SYMBOL_GPL(cp0_compare_irq);
 int cp0_compare_irq_shift;
 
 /*
@@ -1597,7 +1602,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu)
 			cp0_perfcount_irq = -1;
 	} else {
 		cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ;
-		cp0_compare_irq_shift = cp0_compare_irq;
+		cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ;
 		cp0_perfcount_irq = -1;
 	}
 
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 924da5eb7031..df243a64f430 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 #include <asm-generic/vmlinux.lds.h>
 
 #undef mips
@@ -72,7 +73,7 @@ SECTIONS
 	.data : {	/* Data */
 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
 
-		INIT_TASK_DATA(PAGE_SIZE)
+		INIT_TASK_DATA(THREAD_SIZE)
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index 4aa20280613e..fd6203f14f1f 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -3,8 +3,8 @@
 #
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
-				   gup.o init.o mmap.o page.o tlbex.o \
-				   tlbex-fault.o uasm.o
+				   gup.o init.o mmap.o page.o page-funcs.o \
+				   tlbex.o tlbex-fault.o uasm.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 5109be96d98d..f092c265dc63 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -977,7 +977,7 @@ static void __cpuinit probe_pcache(void)
 			c->icache.linesz = 2 << lsize;
 		else
 			c->icache.linesz = lsize;
-		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);
 		c->icache.ways = 1 + ((config1 >> 16) & 7);
 
 		icache_size = c->icache.sets *
@@ -997,7 +997,7 @@ static void __cpuinit probe_pcache(void)
 			c->dcache.linesz = 2 << lsize;
 		else
 			c->dcache.linesz= lsize;
-		c->dcache.sets = 64 << ((config1 >> 13) & 7);
+		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);
 		c->dcache.ways = 1 + ((config1 >> 7) & 7);
 
 		dcache_size = c->dcache.sets *
@@ -1051,6 +1051,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_R14000:
 		break;
 
+	case CPU_M14KC:
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_74K:
diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S
new file mode 100644
index 000000000000..48a6b38ff13e
--- /dev/null
+++ b/arch/mips/mm/page-funcs.S
@@ -0,0 +1,50 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Micro-assembler generated clear_page/copy_page functions.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <asm/asm.h>
+#include <asm/regdef.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#define cpu_clear_page_function_name	clear_page_cpu
+#define cpu_copy_page_function_name	copy_page_cpu
+#else
+#define cpu_clear_page_function_name	clear_page
+#define cpu_copy_page_function_name	copy_page
+#endif
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x058 bytes
+ * R4600 v1.7:				0x05c bytes
+ * R4600 v2.0:				0x060 bytes
+ * With prefetching, 16 word strides	0x120 bytes
+ */
+EXPORT(__clear_page_start)
+LEAF(cpu_clear_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 288
+END(cpu_clear_page_function_name)
+EXPORT(__clear_page_end)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x11c bytes
+ * R4600 v1.7:				0x080 bytes
+ * R4600 v2.0:				0x07c bytes
+ * With prefetching, 16 word strides	0x540 bytes
+ */
+EXPORT(__copy_page_start)
+LEAF(cpu_copy_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 1344
+END(cpu_copy_page_function_name)
+EXPORT(__copy_page_end)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index cc0b626858b3..98f530e18216 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2007  Maciej W. Rozycki
  * Copyright (C) 2008  Thiemo Seufer
+ * Copyright (C) 2012  MIPS Technologies, Inc.
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -71,45 +72,6 @@ static struct uasm_reloc __cpuinitdata relocs[5];
 #define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
 #define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
 
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x058 bytes
- * R4600 v1.7:				0x05c bytes
- * R4600 v2.0:				0x060 bytes
- * With prefetching, 16 word strides	0x120 bytes
- */
-
-static u32 clear_page_array[0x120 / 4];
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
-#else
-void clear_page(void *page) __attribute__((alias("clear_page_array")));
-#endif
-
-EXPORT_SYMBOL(clear_page);
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x11c bytes
- * R4600 v1.7:				0x080 bytes
- * R4600 v2.0:				0x07c bytes
- * With prefetching, 16 word strides	0x540 bytes
- */
-static u32 copy_page_array[0x540 / 4];
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-void
-copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
-#else
-void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
-#endif
-
-EXPORT_SYMBOL(copy_page);
-
-
 static int pref_bias_clear_store __cpuinitdata;
 static int pref_bias_copy_load __cpuinitdata;
 static int pref_bias_copy_store __cpuinitdata;
@@ -282,10 +244,15 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)
 		}
 }
 
+extern u32 __clear_page_start;
+extern u32 __clear_page_end;
+extern u32 __copy_page_start;
+extern u32 __copy_page_end;
+
 void __cpuinit build_clear_page(void)
 {
 	int off;
-	u32 *buf = (u32 *)&clear_page_array;
+	u32 *buf = &__clear_page_start;
 	struct uasm_label *l = labels;
 	struct uasm_reloc *r = relocs;
 	int i;
@@ -356,17 +323,17 @@ void __cpuinit build_clear_page(void)
 	uasm_i_jr(&buf, RA);
 	uasm_i_nop(&buf);
 
-	BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
+	BUG_ON(buf > &__clear_page_end);
 
 	uasm_resolve_relocs(relocs, labels);
 
 	pr_debug("Synthesized clear page handler (%u instructions).\n",
-		 (u32)(buf - clear_page_array));
+		 (u32)(buf - &__clear_page_start));
 
 	pr_debug("\t.set push\n");
 	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (buf - clear_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
+	for (i = 0; i < (buf - &__clear_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
 	pr_debug("\t.set pop\n");
 }
 
@@ -427,7 +394,7 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 void __cpuinit build_copy_page(void)
 {
 	int off;
-	u32 *buf = (u32 *)&copy_page_array;
+	u32 *buf = &__copy_page_start;
 	struct uasm_label *l = labels;
 	struct uasm_reloc *r = relocs;
 	int i;
@@ -595,21 +562,23 @@ void __cpuinit build_copy_page(void)
 	uasm_i_jr(&buf, RA);
 	uasm_i_nop(&buf);
 
-	BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
+	BUG_ON(buf > &__copy_page_end);
 
 	uasm_resolve_relocs(relocs, labels);
 
 	pr_debug("Synthesized copy page handler (%u instructions).\n",
-		 (u32)(buf - copy_page_array));
+		 (u32)(buf - &__copy_page_start));
 
 	pr_debug("\t.set push\n");
 	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (buf - copy_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
+	for (i = 0; i < (buf - &__copy_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
 	pr_debug("\t.set pop\n");
 }
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+extern void clear_page_cpu(void *page);
+extern void copy_page_cpu(void *to, void *from);
 
 /*
  * Pad descriptors to cacheline, since each is exclusively owned by a
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 0bc485b3cd60..03eb0ef91580 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -9,6 +9,7 @@
  * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
  * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2008, 2009 Cavium Networks, Inc.
+ * Copyright (C) 2011  MIPS Technologies, Inc.
  *
  * ... and the days got worse and worse and now you see
  * I've gone completly out of my mind.
@@ -494,6 +495,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_R14000:
 	case CPU_4KC:
 	case CPU_4KEC:
+	case CPU_M14KC:
 	case CPU_SB1:
 	case CPU_SB1A:
 	case CPU_4KSC:
diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c
index bf80921f2f56..284dea54faf5 100644
--- a/arch/mips/mti-malta/malta-pci.c
+++ b/arch/mips/mti-malta/malta-pci.c
@@ -241,8 +241,9 @@ void __init mips_pcibios_init(void)
 		return;
 	}
 
-	if (controller->io_resource->start < 0x00001000UL)	/* FIXME */
-		controller->io_resource->start = 0x00001000UL;
+	/* Change start address to avoid conflicts with ACPI and SMB devices */
+	if (controller->io_resource->start < 0x00002000UL)
+		controller->io_resource->start = 0x00002000UL;
 
 	iomem_resource.end &= 0xfffffffffULL;			/* 64 GB */
 	ioport_resource.end = controller->io_resource->end;
@@ -253,7 +254,7 @@ void __init mips_pcibios_init(void)
 }
 
 /* Enable PCI 2.1 compatibility in PIIX4 */
-static void __init quirk_dlcsetup(struct pci_dev *dev)
+static void __devinit quirk_dlcsetup(struct pci_dev *dev)
 {
 	u8 odlc, ndlc;
 	(void) pci_read_config_byte(dev, 0x82, &odlc);
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index b7f37d4982fa..2e28f653f66d 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -111,7 +111,7 @@ static void __init pci_clock_check(void)
 	unsigned int __iomem *jmpr_p =
 		(unsigned int *) ioremap(MALTA_JMPRS_REG, sizeof(unsigned int));
 	int jmpr = (__raw_readl(jmpr_p) >> 2) & 0x07;
-	static const int pciclocks[] __initdata = {
+	static const int pciclocks[] __initconst = {
 		33, 20, 25, 30, 12, 16, 37, 10
 	};
 	int pciclock = pciclocks[jmpr];
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index acb677a1227c..b3df7c2aad1e 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -82,8 +82,10 @@ void __init prom_free_prom_memory(void)
 
 void xlp_mmu_init(void)
 {
+	/* enable extended TLB and Large Fixed TLB */
 	write_c0_config6(read_c0_config6() | 0x24);
-	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
+
+	/* set page mask of Fixed TLB in config7 */
 	write_c0_config7(PM_DEFAULT_MASK >>
 		(13 + (ffz(PM_DEFAULT_MASK >> 13) / 2)));
 }
@@ -100,6 +102,10 @@ void __init prom_init(void)
 	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));
 #ifdef CONFIG_SMP
 	nlm_wakeup_secondary_cpus(0xffffffff);
+
+	/* update TLB size after waking up threads */
+	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
+
 	register_smp_ops(&nlm_smp_ops);
 #endif
 }
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index d1f2d4c52d42..b6e378211a2c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -78,6 +78,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 
 	switch (current_cpu_type()) {
 	case CPU_5KC:
+	case CPU_M14KC:
 	case CPU_20KC:
 	case CPU_24K:
 	case CPU_25KF:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index baba3bcaa3c2..4d80a856048d 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -322,6 +322,10 @@ static int __init mipsxx_init(void)
 
 	op_model_mipsxx_ops.num_counters = counters;
 	switch (current_cpu_type()) {
+	case CPU_M14KC:
+		op_model_mipsxx_ops.cpu_type = "mips/M14Kc";
+		break;
+
 	case CPU_20KC:
 		op_model_mipsxx_ops.cpu_type = "mips/20K";
 		break;
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index d5d4c018fb04..0857ab8c3919 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -48,7 +48,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-static void __init loongson2e_nec_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_nec_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 
@@ -60,7 +60,7 @@ static void __init loongson2e_nec_fixup(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, 0xe4, 1 << 5);
 }
 
-static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func0_fixup(struct pci_dev *pdev)
 {
 	unsigned char c;
 
@@ -135,7 +135,7 @@ static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev)
 	printk(KERN_INFO"via686b fix: ISA bridge done\n");
 }
 
-static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func1_fixup(struct pci_dev *pdev)
 {
 	printk(KERN_INFO"via686b fix: IDE\n");
 
@@ -168,19 +168,19 @@ static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev)
 	printk(KERN_INFO"via686b fix: IDE done\n");
 }
 
-static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func2_fixup(struct pci_dev *pdev)
 {
 	/* irq routing */
 	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10);
 }
 
-static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func3_fixup(struct pci_dev *pdev)
 {
 	/* irq routing */
 	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11);
 }
 
-static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func5_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 	unsigned char c;
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 4b9768d5d729..a7b917dcf604 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -96,21 +96,21 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 }
 
 /* CS5536 SPEC. fixup */
-static void __init loongson_cs5536_isa_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_isa_fixup(struct pci_dev *pdev)
 {
 	/* the uart1 and uart2 interrupt in PIC is enabled as default */
 	pci_write_config_dword(pdev, PCI_UART1_INT_REG, 1);
 	pci_write_config_dword(pdev, PCI_UART2_INT_REG, 1);
 }
 
-static void __init loongson_cs5536_ide_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ide_fixup(struct pci_dev *pdev)
 {
 	/* setting the mutex pin as IDE function */
 	pci_write_config_dword(pdev, PCI_IDE_CFG_REG,
 			       CS5536_IDE_FLASH_SIGNATURE);
 }
 
-static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_acc_fixup(struct pci_dev *pdev)
 {
 	/* enable the AUDIO interrupt in PIC  */
 	pci_write_config_dword(pdev, PCI_ACC_INT_REG, 1);
@@ -118,14 +118,14 @@ static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev)
 	pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xc0);
 }
 
-static void __init loongson_cs5536_ohci_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ohci_fixup(struct pci_dev *pdev)
 {
 	/* enable the OHCI interrupt in PIC */
 	/* THE OHCI, EHCI, UDC, OTG are shared with interrupt in PIC */
 	pci_write_config_dword(pdev, PCI_OHCI_INT_REG, 1);
 }
 
-static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
 {
 	u32 hi, lo;
 
@@ -137,7 +137,7 @@ static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, PCI_EHCI_FLADJ_REG, 0x2000);
 }
 
-static void __init loongson_nec_fixup(struct pci_dev *pdev)
+static void __devinit loongson_nec_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 0f48498bc231..70073c98ed32 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -49,10 +49,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-static void __init malta_piix_func0_fixup(struct pci_dev *pdev)
+static void __devinit malta_piix_func0_fixup(struct pci_dev *pdev)
 {
 	unsigned char reg_val;
-	static int piixirqmap[16] __initdata = {  /* PIIX PIRQC[A:D] irq mappings */
+	static int piixirqmap[16] __devinitdata = {  /* PIIX PIRQC[A:D] irq mappings */
 		0,  0, 	0,  3,
 		4,  5,  6,  7,
 		0,  9, 10, 11,
@@ -83,7 +83,7 @@ static void __init malta_piix_func0_fixup(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0,
 	 malta_piix_func0_fixup);
 
-static void __init malta_piix_func1_fixup(struct pci_dev *pdev)
+static void __devinit malta_piix_func1_fixup(struct pci_dev *pdev)
 {
 	unsigned char reg_val;
 
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index e08f49cb6875..8e4f8288eca2 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -22,13 +22,13 @@
 
 #include <asm/vr41xx/mpc30x.h>
 
-static const int internal_func_irqs[] __initdata = {
+static const int internal_func_irqs[] __initconst = {
 	VRC4173_CASCADE_IRQ,
 	VRC4173_AC97_IRQ,
 	VRC4173_USB_IRQ,
 };
 
-static const int irq_tab_mpc30x[] __initdata = {
+static const int irq_tab_mpc30x[] __initconst = {
  [12] = VRC4173_PCMCIA1_IRQ,
  [13] = VRC4173_PCMCIA2_IRQ,
  [29] = MQ200_IRQ,
diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index f0bb9146e6c0..d02900a72916 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -15,7 +15,7 @@
  * Set the BCM1250, etc. PCI host bridge's TRDY timeout
  * to the finite max.
  */
-static void __init quirk_sb1250_pci(struct pci_dev *dev)
+static void __devinit quirk_sb1250_pci(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, 0x40, 0xff);
 }
@@ -25,7 +25,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
 /*
  * The BCM1250, etc. PCI/HT bridge reports as a host bridge.
  */
-static void __init quirk_sb1250_ht(struct pci_dev *dev)
+static void __devinit quirk_sb1250_ht(struct pci_dev *dev)
 {
 	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
 }
@@ -35,7 +35,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT,
 /*
  * Set the SP1011 HT/PCI bridge's TRDY timeout to the finite max.
  */
-static void __init quirk_sp1011(struct pci_dev *dev)
+static void __devinit quirk_sp1011(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, 0x64, 0xff);
 }
diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c
index a1e7e6d80c8c..bc13e29d2bb3 100644
--- a/arch/mips/pci/ops-tx4927.c
+++ b/arch/mips/pci/ops-tx4927.c
@@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id)
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
-static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
 {
 	struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus);
 
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 0fbe4c0c170a..fdc24440294c 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -212,7 +212,7 @@ static inline void pci_enable_swapping(struct pci_dev *dev)
 	bridge->b_widget.w_tflush;	/* Flush */
 }
 
-static void __init pci_fixup_ioc3(struct pci_dev *d)
+static void __devinit pci_fixup_ioc3(struct pci_dev *d)
 {
 	pci_disable_swapping(d);
 }
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 1644805a6730..172af1cd5867 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -41,6 +41,7 @@
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 #include <linux/console.h>
+#include <linux/pci_regs.h>
 
 #include <asm/io.h>
 
@@ -156,35 +157,55 @@ struct pci_controller nlm_pci_controller = {
 	.io_offset      = 0x00000000UL,
 };
 
+/*
+ * The top level PCIe links on the XLS PCIe controller appear as
+ * bridges. Given a device, this function finds which link it is
+ * on.
+ */
+static struct pci_dev *xls_get_pcie_link(const struct pci_dev *dev)
+{
+	struct pci_bus *bus, *p;
+
+	/* Find the bridge on bus 0 */
+	bus = dev->bus;
+	for (p = bus->parent; p && p->number != 0; p = p->parent)
+		bus = p;
+
+	return p ? bus->self : NULL;
+}
+
 static int get_irq_vector(const struct pci_dev *dev)
 {
+	struct pci_dev *lnk;
+
 	if (!nlm_chip_is_xls())
-		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/
+		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ */
 
 	/*
 	 * For XLS PCIe, there is an IRQ per Link, find out which
 	 * link the device is on to assign interrupts
-	*/
-	if (dev->bus->self == NULL)
+	 */
+	lnk = xls_get_pcie_link(dev);
+	if (lnk == NULL)
 		return 0;
 
-	switch	(dev->bus->self->devfn) {
-	case 0x0:
+	switch	(PCI_SLOT(lnk->devfn)) {
+	case 0:
 		return PIC_PCIE_LINK0_IRQ;
-	case 0x8:
+	case 1:
 		return PIC_PCIE_LINK1_IRQ;
-	case 0x10:
+	case 2:
 		if (nlm_chip_is_xls_b())
 			return PIC_PCIE_XLSB0_LINK2_IRQ;
 		else
 			return PIC_PCIE_LINK2_IRQ;
-	case 0x18:
+	case 3:
 		if (nlm_chip_is_xls_b())
 			return PIC_PCIE_XLSB0_LINK3_IRQ;
 		else
 			return PIC_PCIE_LINK3_IRQ;
 	}
-	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn);
+	WARN(1, "Unexpected devfn %d\n", lnk->devfn);
 	return 0;
 }
 
@@ -202,7 +223,27 @@ void arch_teardown_msi_irq(unsigned int irq)
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
 	struct msi_msg msg;
+	struct pci_dev *lnk;
 	int irq, ret;
+	u16 val;
+
+	/* MSI not supported on XLR */
+	if (!nlm_chip_is_xls())
+		return 1;
+
+	/*
+	 * Enable MSI on the XLS PCIe controller bridge which was disabled
+	 * at enumeration, the bridge MSI capability is at 0x50
+	 */
+	lnk = xls_get_pcie_link(dev);
+	if (lnk == NULL)
+		return 1;
+
+	pci_read_config_word(lnk, 0x50 + PCI_MSI_FLAGS, &val);
+	if ((val & PCI_MSI_FLAGS_ENABLE) == 0) {
+		val |= PCI_MSI_FLAGS_ENABLE;
+		pci_write_config_word(lnk, 0x50 + PCI_MSI_FLAGS, val);
+	}
 
 	irq = get_irq_vector(dev);
 	if (irq <= 0)
@@ -327,7 +368,7 @@ static int __init pcibios_init(void)
 		}
 	} else {
 		/* XLR PCI controller ACK */
-		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack);
+		irq_set_handler_data(PIC_PCIX_IRQ, xlr_pci_ack);
 	}
 
 	return 0;
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index b71fae231049..5edab2bc6fc0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -115,11 +115,11 @@ static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  */
 static void __cpuinit yos_init_secondary(void)
 {
-	set_c0_status(ST0_CO | ST0_IE | ST0_IM);
 }
 
 static void __cpuinit yos_smp_finish(void)
 {
+	set_c0_status(ST0_CO | ST0_IM | ST0_IE);
 }
 
 /* Hook for after all CPUs are online */
diff --git a/arch/mips/powertv/asic/asic-calliope.c b/arch/mips/powertv/asic/asic-calliope.c
index 0a170e0ffeaa..7773f3d956b0 100644
--- a/arch/mips/powertv/asic/asic-calliope.c
+++ b/arch/mips/powertv/asic/asic-calliope.c
@@ -28,7 +28,7 @@
 
 #define CALLIOPE_ADDR(x)	(CALLIOPE_IO_BASE + (x))
 
-const struct register_map calliope_register_map __initdata = {
+const struct register_map calliope_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = CALLIOPE_ADDR(0x800000)},
 	.eic_cfg_bits = {.phys = CALLIOPE_ADDR(0x800038)},
 	.eic_ready_status = {.phys = CALLIOPE_ADDR(0x80004c)},
diff --git a/arch/mips/powertv/asic/asic-cronus.c b/arch/mips/powertv/asic/asic-cronus.c
index bbc0c122be5e..da076db7b7ed 100644
--- a/arch/mips/powertv/asic/asic-cronus.c
+++ b/arch/mips/powertv/asic/asic-cronus.c
@@ -28,7 +28,7 @@
 
 #define CRONUS_ADDR(x)	(CRONUS_IO_BASE + (x))
 
-const struct register_map cronus_register_map __initdata = {
+const struct register_map cronus_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = CRONUS_ADDR(0x000000)},
 	.eic_cfg_bits = {.phys = CRONUS_ADDR(0x000038)},
 	.eic_ready_status = {.phys = CRONUS_ADDR(0x00004C)},
diff --git a/arch/mips/powertv/asic/asic-gaia.c b/arch/mips/powertv/asic/asic-gaia.c
index 91dda682752c..47683b370e74 100644
--- a/arch/mips/powertv/asic/asic-gaia.c
+++ b/arch/mips/powertv/asic/asic-gaia.c
@@ -23,7 +23,7 @@
 #include <linux/init.h>
 #include <asm/mach-powertv/asic.h>
 
-const struct register_map gaia_register_map __initdata = {
+const struct register_map gaia_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = GAIA_IO_BASE + 0x000000},
 	.eic_cfg_bits = {.phys = GAIA_IO_BASE + 0x000038},
 	.eic_ready_status = {.phys = GAIA_IO_BASE + 0x00004C},
diff --git a/arch/mips/powertv/asic/asic-zeus.c b/arch/mips/powertv/asic/asic-zeus.c
index 4a05bb096476..6ff4b10f09da 100644
--- a/arch/mips/powertv/asic/asic-zeus.c
+++ b/arch/mips/powertv/asic/asic-zeus.c
@@ -28,7 +28,7 @@
 
 #define ZEUS_ADDR(x)	(ZEUS_IO_BASE + (x))
 
-const struct register_map zeus_register_map __initdata = {
+const struct register_map zeus_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = ZEUS_ADDR(0x000000)},
 	.eic_cfg_bits = {.phys = ZEUS_ADDR(0x000038)},
 	.eic_ready_status = {.phys = ZEUS_ADDR(0x00004c)},
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 682efb0c108d..64eb71b15280 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq)
 	return err;
 }
 
-static void __init quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev)
 {
 	int irq;	/* PCI/ISA Bridge interrupt */
 	u8 reg_64;
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 6ab0bee2a54f..4d584ae29ae1 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -459,10 +459,11 @@ static int handle_signal(int sig,
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
 	if (ret)
-		return;
+		return ret;
 
 	signal_delivered(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
+			 test_thread_flag(TIF_SINGLESTEP));
+	return 0;
 }
 
 /*
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 090d35d36973..e62c223e4c45 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -876,9 +876,7 @@ static void __init smp_online(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	local_irq_enable();
 }
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index a47828d31fe6..6266730efd61 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -300,9 +300,7 @@ smp_cpu_init(int cpunum)
 
 	notify_cpu_starting(cpunum);
 
-	ipi_call_lock();
 	set_cpu_online(cpunum, true);
-	ipi_call_unlock();
 
 	/* Initialise the idle task for this CPU */
 	atomic_inc(&init_mm.mm_count);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb34322de4..e1417c42155c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
 	if (system_state == SYSTEM_RUNNING)
 		vdso_data->processorCount++;
 #endif
-	ipi_call_lock();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 	/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
-	ipi_call_unlock();
 
 	local_irq_enable();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 15cca26ccb6c..8dca9c248ac7 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -717,9 +717,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)
 	init_cpu_vtimer();
 	pfault_init();
 	notify_cpu_starting(smp_processor_id());
-	ipi_call_lock();
 	set_cpu_online(smp_processor_id(), true);
-	ipi_call_unlock();
 	local_irq_enable();
 	/* cpu_idle will call schedule for us */
 	cpu_idle();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f591598d92f6..781bcb10b8bd 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -103,8 +103,6 @@ void __cpuinit smp_callin(void)
 	if (cheetah_pcache_forced_on)
 		cheetah_enable_pcache();
 
-	local_irq_enable();
-
 	callin_flag = 1;
 	__asm__ __volatile__("membar #Sync\n\t"
 			     "flush  %%g6" : : : "memory");
@@ -124,9 +122,8 @@ void __cpuinit smp_callin(void)
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		rmb();
 
-	ipi_call_lock_irq();
 	set_cpu_online(cpuid, true);
-	ipi_call_unlock_irq();
+	local_irq_enable();
 
 	/* idle thread is expected to have preempt disabled */
 	preempt_disable();
@@ -1308,9 +1305,7 @@ int __cpu_disable(void)
 	mdelay(1);
 	local_irq_disable();
 
-	ipi_call_lock();
 	set_cpu_online(cpu, false);
-	ipi_call_unlock();
 
 	cpu_map_rebuild();
 
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 84873fbe8f27..e686c5ac90be 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -198,17 +198,7 @@ void __cpuinit online_secondary(void)
 
 	notify_cpu_starting(smp_processor_id());
 
-	/*
-	 * We need to hold call_lock, so there is no inconsistency
-	 * between the time smp_call_function() determines number of
-	 * IPI recipients, and the time when the determination is made
-	 * for which cpus receive the IPI. Holding this
-	 * lock helps us to not include this cpu in a currently in progress
-	 * smp_call_function().
-	 */
-	ipi_call_lock();
 	set_cpu_online(smp_processor_id(), 1);
-	ipi_call_unlock();
 	__get_cpu_var(cpu_state) = CPU_ONLINE;
 
 	/* Set up tile-specific state for this cpu. */
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f2521434554..b0c5276861ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -49,6 +49,9 @@ else
         KBUILD_AFLAGS += -m64
         KBUILD_CFLAGS += -m64
 
+	# Use -mpreferred-stack-boundary=3 if supported.
+	KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
+
         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 49331bedc158..70780689599a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif	/* CONFIG_SMP */
 
+#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"
+
+#define b_replacement(number)	"663"#number
+#define e_replacement(number)	"664"#number
+
+#define alt_slen "662b-661b"
+#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+
+#define ALTINSTR_ENTRY(feature, number)					      \
+	" .long 661b - .\n"				/* label           */ \
+	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
+	" .word " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte " alt_slen "\n"				/* source len      */ \
+	" .byte " alt_rlen(number) "\n"			/* replacement len */
+
+#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
+	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
+	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
-									\
-      "661:\n\t" oldinstr "\n662:\n"					\
-      ".section .altinstructions,\"a\"\n"				\
-      "	 .long 661b - .\n"			/* label           */	\
-      "	 .long 663f - .\n"			/* new instruction */	\
-      "	 .word " __stringify(feature) "\n"	/* feature bit     */	\
-      "	 .byte 662b-661b\n"			/* sourcelen       */	\
-      "	 .byte 664f-663f\n"			/* replacementlen  */	\
-      ".previous\n"							\
-      ".section .discard,\"aw\",@progbits\n"				\
-      "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\
-      ".previous\n"							\
-      ".section .altinstr_replacement, \"ax\"\n"			\
-      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
-      ".previous"
+	OLDINSTR(oldinstr)						\
+	".section .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature, 1)					\
+	".previous\n"							\
+	".section .discard,\"aw\",@progbits\n"				\
+	DISCARD_ENTRY(1)						\
+	".previous\n"							\
+	".section .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+	".previous"
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+	OLDINSTR(oldinstr)						\
+	".section .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature1, 1)					\
+	ALTINSTR_ENTRY(feature2, 2)					\
+	".previous\n"							\
+	".section .discard,\"aw\",@progbits\n"				\
+	DISCARD_ENTRY(1)						\
+	DISCARD_ENTRY(2)						\
+	".previous\n"							\
+	".section .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
+	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+	".previous"
 
 /*
  * This must be included *after* the definition of ALTERNATIVE due to
@@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
 		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
 
 /*
+ * Like alternative_call, but there are two features and respective functions.
+ * If CPU has feature2, function2 is used.
+ * Otherwise, if CPU has feature1, function1 is used.
+ * Otherwise, old function is used.
+ */
+#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
+			   output, input...)				      \
+	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+		"call %P[new2]", feature2)				      \
+		: output : [old] "i" (oldfunc), [new1] "i" (newfunc1),	      \
+		[new2] "i" (newfunc2), ## input)
+
+/*
  * use this macro(s) if you need more than one output parameter
  * in alternative_io
  */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eaff4790ed96..88093c1d44fd 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,8 @@ struct apic {
 	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
+					 const struct cpumask *mask);
 	void (*init_apic_ldr)(void);
 
 	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -331,9 +332,9 @@ struct apic {
 	unsigned long (*set_apic_id)(unsigned int id);
 	unsigned long apic_id_mask;
 
-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
-					       const struct cpumask *andmask);
+	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+				      const struct cpumask *andmask,
+				      unsigned int *apicid);
 
 	/* ipi */
 	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
@@ -537,6 +538,11 @@ static inline const struct cpumask *default_target_cpus(void)
 #endif
 }
 
+static inline const struct cpumask *online_target_cpus(void)
+{
+	return cpu_online_mask;
+}
+
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
 
 
@@ -586,21 +592,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 
 #endif
 
-static inline unsigned int
-default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			    const struct cpumask *andmask,
+			    unsigned int *apicid)
 {
-	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
+				 cpumask_bits(andmask)[0] &
+				 cpumask_bits(cpu_online_mask)[0] &
+				 APIC_ALL_CPUS;
+
+	if (likely(cpu_mask)) {
+		*apicid = (unsigned int)cpu_mask;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
 }
 
-static inline unsigned int
+extern int
 default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			       const struct cpumask *andmask)
+			       const struct cpumask *andmask,
+			       unsigned int *apicid);
+
+static inline void
+flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+			      const struct cpumask *mask)
 {
-	unsigned long mask1 = cpumask_bits(cpumask)[0];
-	unsigned long mask2 = cpumask_bits(andmask)[0];
-	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
 
-	return (unsigned int)(mask1 & mask2 & mask3);
+static inline void
+default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				 const struct cpumask *mask)
+{
+	cpumask_copy(retmask, cpumask_of(cpu));
 }
 
 static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index cc70c1c78ca4..75ce3f47d204 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -4,9 +4,7 @@
 enum reboot_type {
 	BOOT_TRIPLE = 't',
 	BOOT_KBD = 'k',
-#ifdef CONFIG_X86_32
 	BOOT_BIOS = 'b',
-#endif
 	BOOT_ACPI = 'a',
 	BOOT_EFI = 'e',
 	BOOT_CF9 = 'p',
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index dbe82a5c5eac..d3d74698dce9 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count = 0;
 #ifdef TRACE_FLPY_INT
-		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
+		printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
 		       virtual_dma_count, virtual_dma_residue, calls, bytes,
 		       dma_wait);
 		calls = 0;
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index dc580c42851c..c0fa356e90de 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,28 +44,14 @@ struct nmiaction {
 	const char		*name;
 };
 
-#define register_nmi_handler(t, fn, fg, n)		\
+#define register_nmi_handler(t, fn, fg, n, init...)	\
 ({							\
-	static struct nmiaction fn##_na = {		\
+	static struct nmiaction init fn##_na = {	\
 		.handler = (fn),			\
 		.name = (n),				\
 		.flags = (fg),				\
 	};						\
-	__register_nmi_handler((t), &fn##_na);	\
-})
-
-/*
- * For special handlers that register/unregister in the
- * init section only.  This should be considered rare.
- */
-#define register_nmi_handler_initonly(t, fn, fg, n)		\
-({							\
-	static struct nmiaction fn##_na __initdata = {		\
-		.handler = (fn),			\
-		.name = (n),				\
-		.flags = (fg),				\
-	};						\
-	__register_nmi_handler((t), &fn##_na);	\
+	__register_nmi_handler((t), &fn##_na);		\
 })
 
 int __register_nmi_handler(unsigned int, struct nmiaction *);
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b3a531746026..5ad24a89b19b 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -7,9 +7,13 @@
 #undef DEBUG
 
 #ifdef DEBUG
-#define DBG(x...) printk(x)
+#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
 #else
-#define DBG(x...)
+#define DBG(fmt, ...)				\
+do {						\
+	if (0)					\
+		printk(fmt, ##__VA_ARGS__);	\
+} while (0)
 #endif
 
 #define PCI_PROBE_BIOS		0x0001
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index c78f14a0df00..dab39350e51e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -234,7 +234,7 @@ extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
 #else
-static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
 	*nr = 0;
 	return NULL;
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db840c6..f2b489cf1602 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -2,9 +2,9 @@
 #define _ASM_X86_PGTABLE_2LEVEL_H
 
 #define pte_ERROR(e) \
-	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
+	pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)
 #define pgd_ERROR(e) \
-	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+	pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))
 
 /*
  * Certain architectures need to do special things when PTEs
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cb00ccc7d571..4cc9f2b7cdc3 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -9,13 +9,13 @@
  */
 
 #define pte_ERROR(e)							\
-	printk("%s:%d: bad pte %p(%08lx%08lx).\n",			\
+	pr_err("%s:%d: bad pte %p(%08lx%08lx)\n",			\
 	       __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
 #define pmd_ERROR(e)							\
-	printk("%s:%d: bad pmd %p(%016Lx).\n",				\
+	pr_err("%s:%d: bad pmd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pmd_val(e))
 #define pgd_ERROR(e)							\
-	printk("%s:%d: bad pgd %p(%016Lx).\n",				\
+	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
 /* Rules for using set_pte: the pte being assigned *must* be
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709e09ae..8251be02301e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[];
 extern void paging_init(void);
 
 #define pte_ERROR(e)					\
-	printk("%s:%d: bad pte %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pte %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pte_val(e))
 #define pmd_ERROR(e)					\
-	printk("%s:%d: bad pmd %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pmd %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pmd_val(e))
 #define pud_ERROR(e)					\
-	printk("%s:%d: bad pud %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pud %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pud_val(e))
 #define pgd_ERROR(e)					\
-	printk("%s:%d: bad pgd %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pgd %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
 struct mm_struct;
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fce3f4ae5bd6..fe1ec5bcd846 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -21,8 +21,9 @@ struct real_mode_header {
 	u32	wakeup_header;
 #endif
 	/* APM/BIOS reboot */
-#ifdef CONFIG_X86_32
 	u32	machine_real_restart_asm;
+#ifdef CONFIG_X86_64
+	u32	machine_real_restart_seg;
 #endif
 };
 
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 92f297069e87..a82c4f1b4d83 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops;
 
 void native_machine_crash_shutdown(struct pt_regs *regs);
 void native_machine_shutdown(void);
-void machine_real_restart(unsigned int type);
-/* These must match dispatch_table in reboot_32.S */
+void __noreturn machine_real_restart(unsigned int type);
+/* These must match dispatch in arch/x86/realmore/rm/reboot.S */
 #define MRR_BIOS	0
 #define MRR_APM		1
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f48394513c37..2ffa95dc2333 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
-/* We don't mark CPUs online until __cpu_up(), so we need another measure */
-static inline int num_booting_cpus(void)
-{
-	return cpumask_weight(cpu_callout_mask);
-}
 #else /* !CONFIG_SMP */
 #define wbinvd_on_cpu(cpu)     wbinvd()
 static inline int wbinvd_on_all_cpus(void)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8e796fbbf9c6..d8def8b3dba0 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
+copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+__must_check unsigned long
 copy_user_generic_string(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
 {
 	unsigned ret;
 
-	alternative_call(copy_user_generic_unrolled,
+	/*
+	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
+	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+	 * Otherwise, use copy_user_generic_unrolled.
+	 */
+	alternative_call_2(copy_user_generic_unrolled,
 			 copy_user_generic_string,
 			 X86_FEATURE_REP_GOOD,
+			 copy_user_enhanced_fast_string,
+			 X86_FEATURE_ERMS,
 			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
 				     "=d" (len)),
 			 "1" (to), "2" (from), "3" (len)
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 6149b476d9df..a06983cdc125 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -140,6 +140,9 @@
 #define IPI_RESET_LIMIT			1
 /* after this # consecutive successes, bump up the throttle if it was lowered */
 #define COMPLETE_THRESHOLD		5
+/* after this # of giveups (fall back to kernel IPI's) disable the use of
+   the BAU for a period of time */
+#define GIVEUP_LIMIT			100
 
 #define UV_LB_SUBNODEID			0x10
 
@@ -166,7 +169,6 @@
 #define FLUSH_RETRY_TIMEOUT		2
 #define FLUSH_GIVEUP			3
 #define FLUSH_COMPLETE			4
-#define FLUSH_RETRY_BUSYBUG		5
 
 /*
  * tuning the action when the numalink network is extremely delayed
@@ -175,7 +177,7 @@
 						   microseconds */
 #define CONGESTED_REPS			10	/* long delays averaged over
 						   this many broadcasts */
-#define CONGESTED_PERIOD		30	/* time for the bau to be
+#define DISABLED_PERIOD			10	/* time for the bau to be
 						   disabled, in seconds */
 /* see msg_type: */
 #define MSG_NOOP			0
@@ -520,6 +522,12 @@ struct ptc_stats {
 	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */
 	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */
 	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */
+	unsigned long	s_overipilimit;		/* over the ipi reset limit */
+	unsigned long	s_giveuplimit;		/* disables, over giveup limit*/
+	unsigned long	s_enters;		/* entries to the driver */
+	unsigned long	s_ipifordisabled;	/* fall back to IPI; disabled */
+	unsigned long	s_plugged;		/* plugged by h/w bug*/
+	unsigned long	s_congested;		/* giveup on long wait */
 	/* destination statistics */
 	unsigned long	d_alltlb;		/* times all tlb's on this
 						   cpu were flushed */
@@ -586,8 +594,8 @@ struct bau_control {
 	int			timeout_tries;
 	int			ipi_attempts;
 	int			conseccompletes;
-	int			baudisabled;
-	int			set_bau_off;
+	short			nobau;
+	short			baudisabled;
 	short			cpu;
 	short			osnode;
 	short			uvhub_cpu;
@@ -596,14 +604,16 @@ struct bau_control {
 	short			cpus_in_socket;
 	short			cpus_in_uvhub;
 	short			partition_base_pnode;
-	short			using_desc; /* an index, like uvhub_cpu */
-	unsigned int		inuse_map;
+	short			busy;       /* all were busy (war) */
 	unsigned short		message_number;
 	unsigned short		uvhub_quiesce;
 	short			socket_acknowledge_count[DEST_Q_SIZE];
 	cycles_t		send_message;
+	cycles_t		period_end;
+	cycles_t		period_time;
 	spinlock_t		uvhub_lock;
 	spinlock_t		queue_lock;
+	spinlock_t		disable_lock;
 	/* tunables */
 	int			max_concurr;
 	int			max_concurr_const;
@@ -614,9 +624,9 @@ struct bau_control {
 	int			complete_threshold;
 	int			cong_response_us;
 	int			cong_reps;
-	int			cong_period;
-	unsigned long		clocks_per_100_usec;
-	cycles_t		period_time;
+	cycles_t		disabled_period;
+	int			period_giveups;
+	int			giveup_limit;
 	long			period_requests;
 	struct hub_and_pnode	*thp;
 };
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 92e54abf89e0..f90f0a587c66 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -9,15 +9,6 @@
 #include <asm/ipi.h>
 #include <linux/cpumask.h>
 
-/*
- * Need to use more than cpu 0, because we need more vectors
- * when MSI-X are used.
- */
-static const struct cpumask *x2apic_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static int x2apic_apic_id_valid(int apicid)
 {
 	return 1;
@@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-/*
- * For now each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static void
 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
 {
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c090af10ac7d..38155f667144 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -156,7 +156,6 @@ struct x86_cpuinit_ops {
 /**
  * struct x86_platform_ops - platform specific runtime functions
  * @calibrate_tsc:		calibrate TSC
- * @wallclock_init:		init the wallclock device
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
@@ -164,10 +163,10 @@ struct x86_cpuinit_ops {
  * @i8042_detect		pre-detect if i8042 controller exists
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
+ * @apic_post_init:		adjust apic if neeeded
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
-	void (*wallclock_init)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
 	void (*iommu_shutdown)(void);
@@ -177,6 +176,7 @@ struct x86_platform_ops {
 	int (*i8042_detect)(void);
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
+	void (*apic_post_init)(void);
 };
 
 struct pci_dev;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1f84794f0759..931280ff8299 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "SMP alternatives: " fmt
+
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, args...) if (debug_alternative) \
-	printk(KERN_DEBUG fmt, args)
+#define DPRINTK(fmt, ...)				\
+do {							\
+	if (debug_alternative)				\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+} while (0)
 
 /*
  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)
 	 * If this still occurs then you should see a hang
 	 * or crash shortly after this line:
 	 */
-	printk("lockdep: fixing up alternatives.\n");
+	pr_info("lockdep: fixing up alternatives\n");
 #endif
 
 	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)
 	if (smp == smp_mode) {
 		/* nothing */
 	} else if (smp) {
-		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+		pr_info("switching to SMP code\n");
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
-		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+		pr_info("switching to UP code\n");
 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
@@ -546,7 +551,7 @@ void __init alternative_instructions(void)
 #ifdef CONFIG_SMP
 	if (smp_alt_once) {
 		if (1 == num_possible_cpus()) {
-			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+			pr_info("switching to UP code\n");
 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 
@@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data)
 	struct text_poke_param *p;
 	int i;
 
-	if (atomic_dec_and_test(&stop_machine_first)) {
+	if (atomic_xchg(&stop_machine_first, 0)) {
 		for (i = 0; i < tpp->nparams; i++) {
 			p = &tpp->params[i];
 			text_poke(p->addr, p->opcode, p->len);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index be16854591cc..f29f6dd6bc08 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -2,6 +2,9 @@
  * Shared support code for AMD K8 northbridges and derivates.
  * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -258,7 +261,7 @@ void amd_flush_garts(void)
 	}
 	spin_unlock_irqrestore(&gart_lock, flags);
 	if (!flushed)
-		printk("nothing to flush?\n");
+		pr_notice("nothing to flush?\n");
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
@@ -269,11 +272,10 @@ static __init int init_amd_nbs(void)
 	err = amd_cache_northbridges();
 
 	if (err < 0)
-		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+		pr_notice("Cannot enumerate AMD northbridges\n");
 
 	if (amd_cache_gart() < 0)
-		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
-		       "GART support disabled.\n");
+		pr_notice("Cannot initialize GART flush words, GART support disabled\n");
 
 	return err;
 }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 39a222e094af..c421512ca5eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2123,6 +2123,25 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
+int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				   const struct cpumask *andmask,
+				   unsigned int *apicid)
+{
+	unsigned int cpu;
+
+	for_each_cpu_and(cpu, cpumask, andmask) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	}
+
+	if (likely(cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0e881c46e8c8..00c77cf78e9e 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static const struct cpumask *flat_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 /*
  * Set up the logical destination ID.
  *
@@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 }
 
 static void
- flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
 {
 	unsigned long mask = cpumask_bits(cpumask)[0];
 	int cpu = smp_processor_id();
@@ -186,7 +167,7 @@ static struct apic apic_flat =  {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= flat_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
@@ -210,8 +191,7 @@ static struct apic apic_flat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= flat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
@@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static const struct cpumask *physflat_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
 	default_send_IPI_mask_sequence_phys(cpumask, vector);
@@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector)
 	physflat_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	cpu = cpumask_first(cpumask);
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	else
-		return BAD_APICID;
-}
-
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static int physflat_probe(void)
 {
 	if (apic == &apic_physflat || num_possible_cpus() > 8)
@@ -345,13 +282,13 @@ static struct apic apic_physflat =  {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= physflat_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= physflat_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	/* not needed, but shouldn't hurt: */
 	.init_apic_ldr			= flat_init_apic_ldr,
 
@@ -370,8 +307,7 @@ static struct apic apic_physflat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= physflat_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= physflat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= physflat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index a6e4c6e06c08..e145f28b4099 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					  const struct cpumask *mask)
 {
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	cpumask_copy(retmask, cpumask_of(cpu));
 }
 
 static u32 noop_apic_read(u32 reg)
@@ -159,8 +159,7 @@ struct apic apic_noop = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= noop_send_IPI_mask,
 	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 6ec6d5d297c3..bc552cff2578 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
 	return initial_apic_id >> index_msb;
 }
 
-static const struct cpumask *numachip_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
-static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 	union numachip_csr_g3_ext_irq_gen int_gen;
@@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector)
 	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
 
-static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	cpu = cpumask_first(cpumask);
-	if (likely((unsigned)cpu < nr_cpu_ids))
-		return per_cpu(x86_cpu_to_apicid, cpu);
-
-	return BAD_APICID;
-}
-
-static unsigned int
-numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static int __init numachip_probe(void)
 {
 	return apic == &apic_numachip;
@@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= numachip_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= numachip_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xffU << 24,
 
-	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= numachip_send_IPI_mask,
 	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 31fbdbfbf960..d50e3640d5ae 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)
 	return 1;
 }
 
-static const struct cpumask *bigsmp_target_cpus(void)
-{
-#ifdef CONFIG_SMP
-	return cpu_online_mask;
-#else
-	return cpumask_of(0);
-#endif
-}
-
 static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return 0;
@@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
 	return 1;
 }
 
-/* As we are using single CPU as destination, pick only one CPU here */
-static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu = cpumask_first(cpumask);
-
-	if (cpu < nr_cpu_ids)
-		return cpu_physical_id(cpu);
-	return BAD_APICID;
-}
-
-static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			return cpu_physical_id(cpu);
-	}
-	return BAD_APICID;
-}
-
 static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
@@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	{ } /* NULL entry stops DMI scanning */
 };
 
-static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int probe_bigsmp(void)
 {
 	if (def_to_bigsmp)
@@ -205,13 +164,13 @@ static struct apic apic_bigsmp = {
 	/* phys delivery to target CPU: */
 	.irq_dest_mode			= 0,
 
-	.target_cpus			= bigsmp_target_cpus,
+	.target_cpus			= default_target_cpus,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
 	.check_apicid_present		= bigsmp_check_apicid_present,
 
-	.vector_allocation_domain	= bigsmp_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= bigsmp_init_apic_ldr,
 
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
@@ -229,8 +188,7 @@ static struct apic apic_bigsmp = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= bigsmp_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= bigsmp_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= bigsmp_send_IPI_mask,
 	.send_IPI_mask_allbutself	= NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index db4ab1be3c79..0874799a98c6 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)
 		WARN(1, "Command failed, status = %x\n", mip_status);
 }
 
-static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-
 static void es7000_wait_for_init_deassert(atomic_t *deassert)
 {
 	while (!atomic_read(deassert))
@@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
 	return 1;
 }
 
-static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
-	int cpu, uninitialized_var(apicid);
+	unsigned int cpu, uninitialized_var(apicid);
 
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	for_each_cpu(cpu, cpumask) {
+	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
 		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			WARN(1, "Not a valid mask!");
 
-			return BAD_APICID;
+			return -EINVAL;
 		}
-		apicid = new_apicid;
+		apicid |= new_apicid;
 		round++;
 	}
-	return apicid;
+	if (!round)
+		return -EINVAL;
+	*dest_id = apicid;
+	return 0;
 }
 
-static unsigned int
+static int
 es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
-			      const struct cpumask *andmask)
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
-	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return apicid;
+		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
-	apicid = es7000_cpu_mask_to_apicid(cpumask);
+	es7000_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
 
-	return apicid;
+	return 0;
 }
 
 static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = {
 	.check_apicid_used		= es7000_check_apicid_used,
 	.check_apicid_present		= es7000_check_apicid_present,
 
-	.vector_allocation_domain	= es7000_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= es7000_init_apic_ldr_cluster,
 
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
@@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= es7000_send_IPI_mask,
@@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = {
 	.check_apicid_used		= es7000_check_apicid_used,
 	.check_apicid_present		= es7000_check_apicid_present,
 
-	.vector_allocation_domain	= es7000_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= es7000_init_apic_ldr,
 
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
@@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= es7000_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5f0ff597437c..406eee784684 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
-		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
-				node, apic, pin);
+		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
+		       node, apic, pin);
 		return -ENOMEM;
 	}
 	entry->apic = apic;
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	ioapic_mask_entry(apic, pin);
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.irr)
-		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
 		       mpc_ioapic_id(apic), pin);
 }
 
@@ -895,7 +895,7 @@ static int irq_polarity(int idx)
 		}
 		case 2: /* reserved */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			polarity = 1;
 			break;
 		}
@@ -906,7 +906,7 @@ static int irq_polarity(int idx)
 		}
 		default: /* invalid */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			polarity = 1;
 			break;
 		}
@@ -948,7 +948,7 @@ static int irq_trigger(int idx)
 				}
 				default:
 				{
-					printk(KERN_WARNING "broken BIOS!!\n");
+					pr_warn("broken BIOS!!\n");
 					trigger = 1;
 					break;
 				}
@@ -962,7 +962,7 @@ static int irq_trigger(int idx)
 		}
 		case 2: /* reserved */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			trigger = 1;
 			break;
 		}
@@ -973,7 +973,7 @@ static int irq_trigger(int idx)
 		}
 		default: /* invalid */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			trigger = 0;
 			break;
 		}
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 	 * Debugging check, we are in big trouble if this message pops up!
 	 */
 	if (mp_irqs[idx].dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
 
 	if (test_bit(bus, mp_bus_not_pci)) {
 		irq = mp_irqs[idx].srcbusirq;
@@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 8;
-	unsigned int old_vector;
+	static int current_offset = VECTOR_OFFSET_START % 16;
 	int cpu, err;
 	cpumask_var_t tmp_mask;
 
@@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return -ENOMEM;
 
-	old_vector = cfg->vector;
-	if (old_vector) {
-		cpumask_and(tmp_mask, mask, cpu_online_mask);
-		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
-		if (!cpumask_empty(tmp_mask)) {
-			free_cpumask_var(tmp_mask);
-			return 0;
-		}
-	}
-
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
-	for_each_cpu_and(cpu, mask, cpu_online_mask) {
-		int new_cpu;
-		int vector, offset;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
 
-		apic->vector_allocation_domain(cpu, tmp_mask);
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress = 1;
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
 
 		vector = current_vector;
 		offset = current_offset;
 next:
-		vector += 8;
+		vector += 16;
 		if (vector >= first_system_vector) {
-			/* If out of vectors on large boxen, must share them. */
-			offset = (offset + 1) % 8;
+			offset = (offset + 1) % 16;
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
-		if (unlikely(current_vector == vector))
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
 			continue;
+		}
 
 		if (test_bit(vector, used_vectors))
 			goto next;
@@ -1162,7 +1171,7 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (old_vector) {
+		if (cfg->vector) {
 			cfg->move_in_progress = 1;
 			cpumask_copy(cfg->old_domain, cfg->domain);
 		}
@@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 
 	if (!IO_APIC_IRQ(irq))
 		return;
-	/*
-	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
-	 * controllers like 8259. Now that IO-APIC can handle this irq, update
-	 * the cfg->domain.
-	 */
-	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
-		apic->vector_allocation_domain(0, cfg->domain);
 
 	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
+					 &dest)) {
+		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+		__clear_irq_vector(irq, cfg);
+
+		return;
+	}
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
 
 	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
-		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
 		__clear_irq_vector(irq, cfg);
 
@@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
 static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
-					 unsigned int pin, int vector)
+					unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
+	unsigned int dest;
 
 	if (irq_remapping_enabled)
 		return;
@@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 	 * We use logical delivery to get the timer IRQ
 	 * to the first CPU.
 	 */
+	if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
+						  apic->target_cpus(), &dest)))
+		dest = BAD_APICID;
+
 	entry.dest_mode = apic->irq_dest_mode;
 	entry.mask = 0;			/* don't mask IRQ for edge */
-	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+	entry.dest = dest;
 	entry.delivery_mode = apic->irq_delivery_mode;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 		reg_03.raw = io_apic_read(ioapic_idx, 3);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-	printk("\n");
 	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
@@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 				i,
 				ir_entry->index
 			);
-			printk("%1d   %1d    %1d    %1d   %1d   "
+			pr_cont("%1d   %1d    %1d    %1d   %1d   "
 				"%1d    %1d     %X    %02X\n",
 				ir_entry->format,
 				ir_entry->mask,
@@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 				i,
 				entry.dest
 			);
-			printk("%1d    %1d    %1d   %1d   %1d    "
+			pr_cont("%1d    %1d    %1d   %1d   %1d    "
 				"%1d    %1d    %02X\n",
 				entry.mask,
 				entry.trigger,
@@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
-			printk("-> %d:%d", entry->apic, entry->pin);
-		printk("\n");
+			pr_cont("-> %d:%d", entry->apic, entry->pin);
+		pr_cont("\n");
 	}
 
 	printk(KERN_INFO ".................................... done.\n");
@@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base)
 	printk(KERN_DEBUG);
 
 	for (i = 0; i < 8; i++)
-		printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+		pr_cont("%08x", apic_read(base + i*0x10));
 
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
 		}
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 
 __apicdebuginit(void) print_local_APICs(int maxcpu)
@@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
-			printk("could not set ID!\n");
+			pr_cont("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
@@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 	cfg->move_in_progress = 0;
 }
 
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		unsigned int reg;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(cfg))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-	}
-}
-
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -1;
-
-	if (assign_irq_vector(data->irq, data->chip_data, mask))
-		return -1;
-
-	cpumask_copy(data->affinity, mask);
-
-	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
-	return 0;
-}
-
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		    bool force)
-{
-	unsigned int dest, irq = data->irq;
-	unsigned long flags;
-	int ret;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (!ret) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
-	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-	return ret;
-}
-
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
@@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq)
 static inline void irq_complete_move(struct irq_cfg *cfg) { }
 #endif
 
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		unsigned int reg;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(cfg))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+	}
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			  unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -1;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+static int
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
+{
+	unsigned int dest, irq = data->irq;
+	unsigned long flags;
+	int ret;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -1;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (!ret) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		ret = IRQ_SET_MASK_OK_NOCOPY;
+	}
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	return ret;
+}
+
 static void ack_apic_edge(struct irq_data *data)
 {
 	irq_complete_move(data->chip_data);
@@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
 	chip->irq_ack = ir_ack_apic_edge;
 	chip->irq_eoi = ir_ack_apic_level;
 
-#ifdef CONFIG_SMP
 	chip->irq_set_affinity = set_remapped_irq_affinity;
-#endif
 }
 #endif /* CONFIG_IRQ_REMAP */
 
@@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_unmask		= unmask_ioapic_irq,
 	.irq_ack		= ack_apic_edge,
 	.irq_eoi		= ack_apic_level,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= ioapic_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 	if (err)
 		return err;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
 
 	if (irq_remapped(cfg)) {
 		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 	return err;
 }
 
-#ifdef CONFIG_SMP
 static int
 msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 {
@@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 
 	__write_msi_msg(data->msi_desc, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
-#endif /* CONFIG_SMP */
 
 /*
  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
@@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = {
 	.irq_unmask		= unmask_msi_irq,
 	.irq_mask		= mask_msi_irq,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= msi_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_DMAR_TABLE
-#ifdef CONFIG_SMP
 static int
 dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
 		      bool force)
@@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
 
 	dmar_msi_write(irq, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif /* CONFIG_SMP */
-
 static struct irq_chip dmar_msi_type = {
 	.name			= "DMAR_MSI",
 	.irq_unmask		= dmar_msi_unmask,
 	.irq_mask		= dmar_msi_mask,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= dmar_msi_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq)
 
 #ifdef CONFIG_HPET_TIMER
 
-#ifdef CONFIG_SMP
 static int hpet_msi_set_affinity(struct irq_data *data,
 				 const struct cpumask *mask, bool force)
 {
@@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data,
 
 	hpet_msi_write(data->handler_data, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif /* CONFIG_SMP */
-
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
 	.irq_unmask = hpet_msi_unmask,
 	.irq_mask = hpet_msi_mask,
 	.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity = hpet_msi_set_affinity,
-#endif
 	.irq_retrigger = ioapic_retrigger_irq,
 };
 
@@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
  */
 #ifdef CONFIG_HT_IRQ
 
-#ifdef CONFIG_SMP
-
 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	struct ht_irq_msg msg;
@@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 		return -1;
 
 	target_ht_irq(data->irq, dest, cfg->vector);
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif
-
 static struct irq_chip ht_irq_chip = {
 	.name			= "PCI-HT",
 	.irq_mask		= mask_ht_irq,
 	.irq_unmask		= unmask_ht_irq,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= ht_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
 	int err;
 
 	if (disable_apic)
@@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	cfg = irq_cfg(irq);
 	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (!err) {
-		struct ht_irq_msg msg;
-		unsigned dest;
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
 
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(cfg->vector) |
-			((apic->irq_dest_mode == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((apic->irq_delivery_mode != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
+	write_ht_irq_msg(irq, &msg);
 
-		write_ht_irq_msg(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
 
-		irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
+	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 
-		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-	}
-	return err;
+	return 0;
 }
 #endif /* CONFIG_HT_IRQ */
 
@@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
 		/* Sanity check */
 		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
+			       ioapic);
 			return -1;
 		}
 	}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f00a68cca37a..d661ee95cabf 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	return 0x0F;
-}
-
-static inline unsigned int
+static int
 numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			     const struct cpumask *andmask)
+			     const struct cpumask *andmask,
+			     unsigned int *apicid)
 {
-	return 0x0F;
+	*apicid = 0x0F;
+	return 0;
 }
 
 /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
@@ -441,20 +438,6 @@ static int probe_numaq(void)
 	return found_numaq;
 }
 
-static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 static void numaq_setup_portio_remap(void)
 {
 	int num_quads = num_online_nodes();
@@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = {
 	.check_apicid_used		= numaq_check_apicid_used,
 	.check_apicid_present		= numaq_check_apicid_present,
 
-	.vector_allocation_domain	= numaq_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= numaq_init_apic_ldr,
 
 	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map,
@@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= numaq_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= numaq_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= numaq_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1b291da09e60..eb35ef9ee63f 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)
 #endif
 }
 
-static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/*
-	 * Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 /* should be called last. */
 static int probe_default(void)
 {
@@ -105,7 +90,7 @@ static struct apic apic_default = {
 	.check_apicid_used		= default_check_apicid_used,
 	.check_apicid_present		= default_check_apicid_present,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= default_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
@@ -123,8 +108,7 @@ static struct apic apic_default = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= default_send_IPI_mask_logical,
 	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical,
@@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void)
 
 	if (apic->setup_apic_routing)
 		apic->setup_apic_routing();
+
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
 }
 
 void __init generic_apic_probe(void)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 3fe986698929..1793dba7a741 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -23,11 +23,6 @@
 #include <asm/ipi.h>
 #include <asm/setup.h>
 
-static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
-{
-	return hard_smp_processor_id() >> index_msb;
-}
-
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)
 		}
 	}
 
-	if (is_vsmp_box()) {
-		/* need to update phys_pkg_id */
-		apic->phys_pkg_id = apicid_phys_pkg_id;
-	}
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
 }
 
 /* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 659897c00755..77c95c0e1bf7 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -26,6 +26,8 @@
  *
  */
 
+#define pr_fmt(fmt) "summit: %s: " fmt, __func__
+
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/io.h>
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)
 
 static void summit_setup_apic_routing(void)
 {
-	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
-						nr_ioapics);
+	pr_info("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
+		nr_ioapics);
 }
 
 static int summit_cpu_present_to_apicid(int mps_cpu)
@@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid)
 	return 1;
 }
 
-static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
-	int cpu, apicid = 0;
+	unsigned int cpu, apicid = 0;
 
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	for_each_cpu(cpu, cpumask) {
+	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
 		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
-			printk("%s: Not a valid mask!\n", __func__);
-			return BAD_APICID;
+			pr_err("Not a valid mask!\n");
+			return -EINVAL;
 		}
 		apicid |= new_apicid;
 		round++;
 	}
-	return apicid;
+	if (!round)
+		return -EINVAL;
+	*dest_id = apicid;
+	return 0;
 }
 
-static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
-			      const struct cpumask *andmask)
+static int
+summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
-	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return apicid;
+		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
-	apicid = summit_cpu_mask_to_apicid(cpumask);
+	summit_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
 
-	return apicid;
+	return 0;
 }
 
 /*
@@ -320,20 +327,6 @@ static int probe_summit(void)
 	return 0;
 }
 
-static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 #ifdef CONFIG_X86_SUMMIT_NUMA
 static struct rio_table_hdr *rio_table_hdr;
 static struct scal_detail   *scal_devs[MAX_NUMNODES];
@@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		}
 	}
 	if (i == rio_table_hdr->num_rio_dev) {
-		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+		pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
 		return last_bus;
 	}
 
@@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		}
 	}
 	if (i == rio_table_hdr->num_scal_dev) {
-		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+		pr_err("Couldn't find owner Twister for Cyclone!\n");
 		return last_bus;
 	}
 
@@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		num_buses = 9;
 		break;
 	default:
-		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+		pr_info("Unsupported Winnipeg type!\n");
 		return last_bus;
 	}
 
@@ -411,13 +404,15 @@ static int build_detail_arrays(void)
 	int i, scal_detail_size, rio_detail_size;
 
 	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
-		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+		pr_warn("MAX_NUMNODES too low!  Defined as %d, but system has %d nodes\n",
+			MAX_NUMNODES, rio_table_hdr->num_scal_dev);
 		return 0;
 	}
 
 	switch (rio_table_hdr->version) {
 	default:
-		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+		pr_warn("Invalid Rio Grande Table Version: %d\n",
+			rio_table_hdr->version);
 		return 0;
 	case 2:
 		scal_detail_size = 11;
@@ -462,7 +457,7 @@ void setup_summit(void)
 		offset = *((unsigned short *)(ptr + offset));
 	}
 	if (!rio_table_hdr) {
-		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+		pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
 		return;
 	}
 
@@ -509,7 +504,7 @@ static struct apic apic_summit = {
 	.check_apicid_used		= summit_check_apicid_used,
 	.check_apicid_present		= summit_check_apicid_present,
 
-	.vector_allocation_domain	= summit_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= summit_init_apic_ldr,
 
 	.ioapic_phys_id_map		= summit_ioapic_phys_id_map,
@@ -527,7 +522,6 @@ static struct apic apic_summit = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= summit_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= summit_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= summit_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index ff35cff0e1a7..c88baa4ff0e5 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 }
 
 static void
- x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
 }
@@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
-	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
-	 * May as well be the first.
-	 */
-	int cpu = cpumask_first(cpumask);
+	u32 dest = 0;
+	u16 cluster;
+	int i;
 
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_logical_apicid, cpu);
-	else
-		return BAD_APICID;
-}
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		dest = per_cpu(x86_cpu_to_logical_apicid, i);
+		cluster = x2apic_cluster(i);
+		break;
+	}
 
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
-{
-	int cpu;
+	if (!dest)
+		return -EINVAL;
 
-	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		if (cluster != x2apic_cluster(i))
+			continue;
+		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
 	}
 
-	return per_cpu(x86_cpu_to_logical_apicid, cpu);
+	*apicid = dest;
+
+	return 0;
 }
 
 static void init_x2apic_ldr(void)
@@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void)
 		return 0;
 }
 
+static const struct cpumask *x2apic_cluster_target_cpus(void)
+{
+	return cpu_all_mask;
+}
+
+/*
+ * Each x2apic cluster is an allocation domain.
+ */
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					     const struct cpumask *mask)
+{
+	/*
+	 * To minimize vector pressure, default case of boot, device bringup
+	 * etc will use a single cpu for the interrupt destination.
+	 *
+	 * On explicit migration requests coming from irqbalance etc,
+	 * interrupts will be routed to the x2apic cluster (cluster-id
+	 * derived from the first cpu in the mask) members specified
+	 * in the mask.
+	 */
+	if (mask == x2apic_cluster_target_cpus())
+		cpumask_copy(retmask, cpumask_of(cpu));
+	else
+		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+}
+
 static struct apic apic_x2apic_cluster = {
 
 	.name				= "cluster x2apic",
@@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= x2apic_target_cpus,
+	.target_cpus			= x2apic_cluster_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= x2apic_vector_allocation_domain,
+	.vector_allocation_domain	= cluster_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = {
 	.set_apic_id			= x2apic_set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index c17e982db275..e03a1e180e81 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	int cpu = cpumask_first(cpumask);
-
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	else
-		return BAD_APICID;
-}
-
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static void init_x2apic_ldr(void)
 {
 }
@@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= x2apic_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= x2apic_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = {
 	.set_apic_id			= x2apic_set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= x2apic_send_IPI_mask,
 	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c6d03f7a4401..8cfade9510a4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
-static const struct cpumask *uv_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 #ifdef CONFIG_SMP
@@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	int cpu = cpumask_first(cpumask);
-
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
-	else
-		return BAD_APICID;
-}
-
-static unsigned int
+static int
 uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			  const struct cpumask *andmask)
+			  const struct cpumask *andmask,
+			  unsigned int *apicid)
 {
-	int cpu;
+	int unsigned cpu;
 
 	/*
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
@@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
-	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+
+	if (likely(cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= uv_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= uv_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= uv_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= uv_send_IPI_mask,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 07b0c0db466c..d65464e43503 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -201,6 +201,8 @@
  *    http://www.microsoft.com/whdc/archive/amp_12.mspx]
  */
 
+#define pr_fmt(fmt) "apm: " fmt
+
 #include <linux/module.h>
 
 #include <linux/poll.h>
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err)
 		if (error_table[i].key == err)
 			break;
 	if (i < ERROR_COUNT)
-		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
+		pr_notice("%s: %s\n", str, error_table[i].msg);
 	else if (err < 0)
-		printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
+		pr_notice("%s: linux error code %i\n", str, err);
 	else
-		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
+		pr_notice("%s: unknown error code %#2.2x\n",
 		       str, err);
 }
 
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
 			static int notified;
 
 			if (notified++ == 0)
-			    printk(KERN_ERR "apm: an event queue overflowed\n");
+				pr_err("an event queue overflowed\n");
 			if (++as->event_tail >= APM_MAX_EVENTS)
 				as->event_tail = 0;
 		}
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void)
 static int check_apm_user(struct apm_user *as, const char *func)
 {
 	if (as == NULL || as->magic != APM_BIOS_MAGIC) {
-		printk(KERN_ERR "apm: %s passed bad filp\n", func);
+		pr_err("%s passed bad filp\n", func);
 		return 1;
 	}
 	return 0;
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)
 		     as1 = as1->next)
 			;
 		if (as1 == NULL)
-			printk(KERN_ERR "apm: filp not in user list\n");
+			pr_err("filp not in user list\n");
 		else
 			as1->next = as->next;
 	}
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)
 	struct apm_user *as;
 
 	as = kmalloc(sizeof(*as), GFP_KERNEL);
-	if (as == NULL) {
-		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
-		       sizeof(*as));
+	if (as == NULL)
 		return -ENOMEM;
-	}
+
 	as->magic = APM_BIOS_MAGIC;
 	as->event_tail = as->event_head = 0;
 	as->suspends_pending = as->standbys_pending = 0;
@@ -2313,16 +2313,16 @@ static int __init apm_init(void)
 	}
 
 	if (apm_info.disabled) {
-		printk(KERN_NOTICE "apm: disabled on user request.\n");
+		pr_notice("disabled on user request.\n");
 		return -ENODEV;
 	}
 	if ((num_online_cpus() > 1) && !power_off && !smp) {
-		printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+		pr_notice("disabled - APM is not SMP safe.\n");
 		apm_info.disabled = 1;
 		return -ENODEV;
 	}
 	if (!acpi_disabled) {
-		printk(KERN_NOTICE "apm: overridden by ACPI.\n");
+		pr_notice("overridden by ACPI.\n");
 		apm_info.disabled = 1;
 		return -ENODEV;
 	}
@@ -2356,8 +2356,7 @@ static int __init apm_init(void)
 
 	kapmd_task = kthread_create(apm, NULL, "kapmd");
 	if (IS_ERR(kapmd_task)) {
-		printk(KERN_ERR "apm: disabled - Unable to start kernel "
-				"thread.\n");
+		pr_err("disabled - Unable to start kernel thread\n");
 		err = PTR_ERR(kapmd_task);
 		kapmd_task = NULL;
 		remove_proc_entry("apm", NULL);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 46674fbb62ba..c97bb7b5a9f8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -55,8 +55,8 @@ static void __init check_fpu(void)
 
 	if (!boot_cpu_data.hard_math) {
 #ifndef CONFIG_MATH_EMULATION
-		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
-		printk(KERN_EMERG "Giving up.\n");
+		pr_emerg("No coprocessor found and no math emulation present\n");
+		pr_emerg("Giving up\n");
 		for (;;) ;
 #endif
 		return;
@@ -86,7 +86,7 @@ static void __init check_fpu(void)
 
 	boot_cpu_data.fdiv_bug = fdiv_bug;
 	if (boot_cpu_data.fdiv_bug)
-		printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
+		pr_warn("Hmm, FPU with FDIV bug\n");
 }
 
 static void __init check_hlt(void)
@@ -94,16 +94,16 @@ static void __init check_hlt(void)
 	if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
 		return;
 
-	printk(KERN_INFO "Checking 'hlt' instruction... ");
+	pr_info("Checking 'hlt' instruction... ");
 	if (!boot_cpu_data.hlt_works_ok) {
-		printk("disabled\n");
+		pr_cont("disabled\n");
 		return;
 	}
 	halt();
 	halt();
 	halt();
 	halt();
-	printk(KERN_CONT "OK.\n");
+	pr_cont("OK\n");
 }
 
 /*
@@ -116,7 +116,7 @@ static void __init check_popad(void)
 #ifndef CONFIG_X86_POPAD_OK
 	int res, inp = (int) &res;
 
-	printk(KERN_INFO "Checking for popad bug... ");
+	pr_info("Checking for popad bug... ");
 	__asm__ __volatile__(
 	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
 	  : "=&a" (res)
@@ -127,9 +127,9 @@ static void __init check_popad(void)
 	 * CPU hard. Too bad.
 	 */
 	if (res != 12345678)
-		printk(KERN_CONT "Buggy.\n");
+		pr_cont("Buggy\n");
 	else
-		printk(KERN_CONT "OK.\n");
+		pr_cont("OK\n");
 #endif
 }
 
@@ -161,7 +161,7 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 #ifndef CONFIG_SMP
-	printk(KERN_INFO "CPU: ");
+	pr_info("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	check_config();
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index da27c5d2168a..5a5a5dc1ff15 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -7,6 +7,9 @@
  * Copyright 2008 Intel Corporation
  * Author: Andi Kleen
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/thread_info.h>
 #include <linux/capability.h>
 #include <linux/miscdevice.h>
@@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)
 				cpu_relax();
 
 				if (!m->finished && retries >= 4) {
-					pr_err("MCE: skipping error being logged currently!\n");
+					pr_err("skipping error being logged currently!\n");
 					break;
 				}
 			}
@@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)
 {
 	/* mce_severity() should not hand us an ACTION_REQUIRED error */
 	BUG_ON(flags & MF_ACTION_REQUIRED);
-	printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
-		"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+	pr_err("Uncorrected memory error in page 0x%lx ignored\n"
+	       "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
+	       pfn);
 
 	return 0;
 }
@@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
 
 	b = cap & MCG_BANKCNT_MASK;
 	if (!banks)
-		printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+		pr_info("CPU supports %d MCE banks\n", b);
 
 	if (b > MAX_NR_BANKS) {
-		printk(KERN_WARNING
-		       "MCE: Using only %u machine check banks out of %u\n",
+		pr_warn("Using only %u machine check banks out of %u\n",
 			MAX_NR_BANKS, b);
 		b = MAX_NR_BANKS;
 	}
@@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void)
 static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 {
 	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
-		pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
+		pr_info("unknown CPU type - not enabling MCE support\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void)
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
 	       smp_processor_id());
 }
 
@@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str)
 			get_option(&str, &monarch_timeout);
 		}
 	} else {
-		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
-		       str);
+		pr_info("mce argument %s ignored. Please use /sys\n", str);
 		return 0;
 	}
 	return 1;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index bdda2e6c673b..35ffda5d0727 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 
 		/* Compute the maximum size with which we can make a range: */
 		if (range_startk)
-			max_align = ffs(range_startk) - 1;
+			max_align = __ffs(range_startk);
 		else
-			max_align = 32;
+			max_align = BITS_PER_LONG - 1;
 
-		align = fls(range_sizek) - 1;
+		align = __fls(range_sizek);
 		if (align > max_align)
 			align = max_align;
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 75772ae6c65f..e9fe907cd249 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)
 	}
 	pr_debug("MTRR variable ranges %sabled:\n",
 		 mtrr_state.enabled & 2 ? "en" : "dis");
-	if (size_or_mask & 0xffffffffUL)
-		high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
-	else
-		high_width = ffs(size_or_mask>>32) + 32 - 1;
-	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+	high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
 
 	for (i = 0; i < num_var_ranges; ++i) {
 		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a15df4be151f..821d53b696d1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -374,7 +374,7 @@ struct x86_pmu {
 	/*
 	 * Intel DebugStore bits
 	 */
-	int		bts		:1,
+	unsigned int	bts		:1,
 			bts_active	:1,
 			pebs		:1,
 			pebs_active	:1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1f4c8add6759..382366977d4c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -5,6 +5,8 @@
  * among events on a single PMU.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -136,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event)
 	return intel_perfmon_event_map[hw_event];
 }
 
+#define SNB_DMND_DATA_RD	(1ULL << 0)
+#define SNB_DMND_RFO		(1ULL << 1)
+#define SNB_DMND_IFETCH		(1ULL << 2)
+#define SNB_DMND_WB		(1ULL << 3)
+#define SNB_PF_DATA_RD		(1ULL << 4)
+#define SNB_PF_RFO		(1ULL << 5)
+#define SNB_PF_IFETCH		(1ULL << 6)
+#define SNB_LLC_DATA_RD		(1ULL << 7)
+#define SNB_LLC_RFO		(1ULL << 8)
+#define SNB_LLC_IFETCH		(1ULL << 9)
+#define SNB_BUS_LOCKS		(1ULL << 10)
+#define SNB_STRM_ST		(1ULL << 11)
+#define SNB_OTHER		(1ULL << 15)
+#define SNB_RESP_ANY		(1ULL << 16)
+#define SNB_NO_SUPP		(1ULL << 17)
+#define SNB_LLC_HITM		(1ULL << 18)
+#define SNB_LLC_HITE		(1ULL << 19)
+#define SNB_LLC_HITS		(1ULL << 20)
+#define SNB_LLC_HITF		(1ULL << 21)
+#define SNB_LOCAL		(1ULL << 22)
+#define SNB_REMOTE		(0xffULL << 23)
+#define SNB_SNP_NONE		(1ULL << 31)
+#define SNB_SNP_NOT_NEEDED	(1ULL << 32)
+#define SNB_SNP_MISS		(1ULL << 33)
+#define SNB_NO_FWD		(1ULL << 34)
+#define SNB_SNP_FWD		(1ULL << 35)
+#define SNB_HITM		(1ULL << 36)
+#define SNB_NON_DRAM		(1ULL << 37)
+
+#define SNB_DMND_READ		(SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE		(SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+				 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+				 SNB_HITM)
+
+#define SNB_DRAM_ANY		(SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE		(SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS		SNB_RESP_ANY
+#define SNB_L3_MISS		(SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+	},
+ },
+};
+
 static __initconst const u64 snb_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -233,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids
  },
  [ C(NODE) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
 
@@ -1000,7 +1080,7 @@ static void intel_pmu_reset(void)
 
 	local_irq_save(flags);
 
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
@@ -1707,7 +1787,7 @@ static __init void intel_clovertown_quirk(void)
 	 * But taken together it might just make sense to not enable PEBS on
 	 * these chips.
 	 */
-	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+	pr_warn("PEBS disabled due to CPU errata\n");
 	x86_pmu.pebs = 0;
 	x86_pmu.pebs_constraints = NULL;
 }
@@ -1781,8 +1861,8 @@ static __init void intel_arch_events_quirk(void)
 	/* disable event that reported as not presend by cpuid */
 	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
 		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
-		printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
-				intel_arch_events_map[bit].name);
+		pr_warn("CPUID marked event: \'%s\' unavailable\n",
+			intel_arch_events_map[bit].name);
 	}
 }
 
@@ -1801,7 +1881,7 @@ static __init void intel_nehalem_quirk(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
 		ebx.split.no_branch_misses_retired = 0;
 		x86_pmu.events_maskl = ebx.full;
-		printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+		pr_info("CPU erratum AAJ80 worked around\n");
 	}
 }
 
@@ -1962,6 +2042,8 @@ __init int intel_pmu_init(void)
 	case 58: /* IvyBridge */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
 
 		intel_pmu_lbr_init_snb();
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 19faffc60886..7563fda9f033 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -18,6 +18,7 @@ static struct event_constraint constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
 DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
@@ -33,10 +34,81 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+
+static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+	u64 count;
+
+	rdmsrl(event->hw.event_base, count);
+
+	return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+static struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct intel_uncore_extra_reg *er;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+	unsigned long flags;
+	bool ok = false;
+
+	/*
+	 * reg->alloc can be set due to existing state, so for fake box we
+	 * need to ignore this, otherwise we might fail to allocate proper
+	 * fake state for this extra reg constraint.
+	 */
+	if (reg1->idx == EXTRA_REG_NONE ||
+	    (!uncore_box_is_fake(box) && reg1->alloc))
+		return NULL;
+
+	er = &box->shared_regs[reg1->idx];
+	raw_spin_lock_irqsave(&er->lock, flags);
+	if (!atomic_read(&er->ref) ||
+	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
+		atomic_inc(&er->ref);
+		er->config1 = reg1->config;
+		er->config2 = reg2->config;
+		ok = true;
+	}
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+
+	if (ok) {
+		if (!uncore_box_is_fake(box))
+			reg1->alloc = 1;
+		return NULL;
+	}
+
+	return &constraint_empty;
+}
+
+static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct intel_uncore_extra_reg *er;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+	/*
+	 * Only put constraint if extra reg was actually allocated. Also
+	 * takes care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake box we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent box
+	 * state either since it will be thrown out.
+	 */
+	if (uncore_box_is_fake(box) || !reg1->alloc)
+		return;
+
+	er = &box->shared_regs[reg1->idx];
+	atomic_dec(&er->ref);
+	reg1->alloc = 0;
+}
 
 /* Sandy Bridge-EP uncore support */
 static struct intel_uncore_type snbep_uncore_cbox;
@@ -64,18 +136,15 @@ static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
 	pci_write_config_dword(pdev, box_ctl, config);
 }
 
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
 
-	pci_write_config_dword(pdev, hwc->config_base, hwc->config |
-				SNBEP_PMON_CTL_EN);
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
 }
 
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
@@ -83,8 +152,7 @@ static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
 	pci_write_config_dword(pdev, hwc->config_base, hwc->config);
 }
 
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
-					struct perf_event *event)
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct pci_dev *pdev = box->pci_dev;
 	struct hw_perf_event *hwc = &event->hw;
@@ -92,14 +160,15 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
 
 	pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
 	pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
 	return count;
 }
 
 static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
-	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL,
-				SNBEP_PMON_BOX_CTL_INT);
+
+	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
 }
 
 static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
@@ -112,7 +181,6 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
 		rdmsrl(msr, config);
 		config |= SNBEP_PMON_BOX_CTL_FRZ;
 		wrmsrl(msr, config);
-		return;
 	}
 }
 
@@ -126,12 +194,10 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
 		rdmsrl(msr, config);
 		config &= ~SNBEP_PMON_BOX_CTL_FRZ;
 		wrmsrl(msr, config);
-		return;
 	}
 }
 
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
@@ -150,68 +216,15 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
 	wrmsrl(hwc->config_base, hwc->config);
 }
 
-static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box,
-					struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	u64 count;
-
-	rdmsrl(hwc->event_base, count);
-	return count;
-}
-
 static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
 {
 	unsigned msr = uncore_msr_box_ctl(box);
+
 	if (msr)
 		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
 }
 
-static struct event_constraint *
-snbep_uncore_get_constraint(struct intel_uncore_box *box,
-			    struct perf_event *event)
-{
-	struct intel_uncore_extra_reg *er;
-	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-	unsigned long flags;
-	bool ok = false;
-
-	if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc))
-		return NULL;
-
-	er = &box->shared_regs[reg1->idx];
-	raw_spin_lock_irqsave(&er->lock, flags);
-	if (!atomic_read(&er->ref) || er->config1 == reg1->config) {
-		atomic_inc(&er->ref);
-		er->config1 = reg1->config;
-		ok = true;
-	}
-	raw_spin_unlock_irqrestore(&er->lock, flags);
-
-	if (ok) {
-		if (box->phys_id >= 0)
-			reg1->alloc = 1;
-		return NULL;
-	}
-	return &constraint_empty;
-}
-
-static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
-					struct perf_event *event)
-{
-	struct intel_uncore_extra_reg *er;
-	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-
-	if (box->phys_id < 0 || !reg1->alloc)
-		return;
-
-	er = &box->shared_regs[reg1->idx];
-	atomic_dec(&er->ref);
-	reg1->alloc = 0;
-}
-
-static int snbep_uncore_hw_config(struct intel_uncore_box *box,
-				  struct perf_event *event)
+static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
@@ -221,14 +234,16 @@ static int snbep_uncore_hw_config(struct intel_uncore_box *box,
 			SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
 		reg1->config = event->attr.config1 &
 			SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
-	} else if (box->pmu->type == &snbep_uncore_pcu) {
-		reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
-		reg1->config = event->attr.config1 &
-			SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
 	} else {
-		return 0;
+		if (box->pmu->type == &snbep_uncore_pcu) {
+			reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+			reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
+		} else {
+			return 0;
+		}
 	}
 	reg1->idx = 0;
+
 	return 0;
 }
 
@@ -272,10 +287,19 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
 	&format_attr_thresh5.attr,
 	&format_attr_occ_invert.attr,
 	&format_attr_occ_edge.attr,
-	&format_attr_filter_brand0.attr,
-	&format_attr_filter_brand1.attr,
-	&format_attr_filter_brand2.attr,
-	&format_attr_filter_brand3.attr,
+	&format_attr_filter_band0.attr,
+	&format_attr_filter_band1.attr,
+	&format_attr_filter_band2.attr,
+	&format_attr_filter_band3.attr,
+	NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+	&format_attr_event_ext.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
 	NULL,
 };
 
@@ -314,15 +338,20 @@ static struct attribute_group snbep_uncore_pcu_format_group = {
 	.attrs = snbep_uncore_pcu_formats_attr,
 };
 
+static struct attribute_group snbep_uncore_qpi_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_qpi_formats_attr,
+};
+
 static struct intel_uncore_ops snbep_uncore_msr_ops = {
 	.init_box	= snbep_uncore_msr_init_box,
 	.disable_box	= snbep_uncore_msr_disable_box,
 	.enable_box	= snbep_uncore_msr_enable_box,
 	.disable_event	= snbep_uncore_msr_disable_event,
 	.enable_event	= snbep_uncore_msr_enable_event,
-	.read_counter	= snbep_uncore_msr_read_counter,
-	.get_constraint = snbep_uncore_get_constraint,
-	.put_constraint = snbep_uncore_put_constraint,
+	.read_counter	= uncore_msr_read_counter,
+	.get_constraint = uncore_get_constraint,
+	.put_constraint = uncore_put_constraint,
 	.hw_config	= snbep_uncore_hw_config,
 };
 
@@ -485,8 +514,13 @@ static struct intel_uncore_type snbep_uncore_qpi = {
 	.num_counters   = 4,
 	.num_boxes	= 2,
 	.perf_ctr_bits	= 48,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &snbep_uncore_pci_ops,
 	.event_descs	= snbep_uncore_qpi_events,
-	SNBEP_UNCORE_PCI_COMMON_INIT(),
+	.format_group	= &snbep_uncore_qpi_format_group,
 };
 
 
@@ -603,10 +637,8 @@ static void snbep_pci2phy_map_init(void)
 }
 /* end of Sandy Bridge-EP uncore support */
 
-
 /* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -616,20 +648,11 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
 		wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
 }
 
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	wrmsrl(event->hw.config_base, 0);
 }
 
-static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
-					struct perf_event *event)
-{
-	u64 count;
-	rdmsrl(event->hw.event_base, count);
-	return count;
-}
-
 static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0) {
@@ -648,15 +671,15 @@ static struct attribute *snb_uncore_formats_attr[] = {
 };
 
 static struct attribute_group snb_uncore_format_group = {
-	.name = "format",
-	.attrs = snb_uncore_formats_attr,
+	.name		= "format",
+	.attrs		= snb_uncore_formats_attr,
 };
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
-	.read_counter	= snb_uncore_msr_read_counter,
+	.read_counter	= uncore_msr_read_counter,
 };
 
 static struct event_constraint snb_uncore_cbox_constraints[] = {
@@ -697,12 +720,10 @@ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
 
 static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
 {
-	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
-		NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
 }
 
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -744,7 +765,7 @@ static struct intel_uncore_ops nhm_uncore_msr_ops = {
 	.enable_box	= nhm_uncore_msr_enable_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= nhm_uncore_msr_enable_event,
-	.read_counter	= snb_uncore_msr_read_counter,
+	.read_counter	= uncore_msr_read_counter,
 };
 
 static struct intel_uncore_type nhm_uncore = {
@@ -769,8 +790,1041 @@ static struct intel_uncore_type *nhm_msr_uncores[] = {
 };
 /* end of Nehalem uncore support */
 
-static void uncore_assign_hw_event(struct intel_uncore_box *box,
-				struct perf_event *event, int idx)
+/* Nehalem-EX uncore support */
+#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
+				((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(mm_cfg, mm_cfg, "config:63");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+	unsigned msr = uncore_msr_box_ctl(box);
+	u64 config;
+
+	if (msr) {
+		rdmsrl(msr, config);
+		config &= ~((1ULL << uncore_num_counters(box)) - 1);
+		/* WBox has a fixed counter */
+		if (uncore_msr_fixed_ctl(box))
+			config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+		wrmsrl(msr, config);
+	}
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	unsigned msr = uncore_msr_box_ctl(box);
+	u64 config;
+
+	if (msr) {
+		rdmsrl(msr, config);
+		config |= (1ULL << uncore_num_counters(box)) - 1;
+		/* WBox has a fixed counter */
+		if (uncore_msr_fixed_ctl(box))
+			config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+		wrmsrl(msr, config);
+	}
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+		wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+	else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+		wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+	else
+		wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT()				\
+	.init_box	= nhmex_uncore_msr_init_box,		\
+	.disable_box	= nhmex_uncore_msr_disable_box,		\
+	.enable_box	= nhmex_uncore_msr_enable_box,		\
+	.disable_event	= nhmex_uncore_msr_disable_event,	\
+	.read_counter	= uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+	NHMEX_UNCORE_OPS_COMMON_INIT(),
+	.enable_event	= nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_edge.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+	.name		= "format",
+	.attrs		= nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+	.name		= "ubox",
+	.num_counters	= 1,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.event_ctl	= NHMEX_U_MSR_PMON_EV_SEL,
+	.perf_ctr	= NHMEX_U_MSR_PMON_CTR,
+	.event_mask	= NHMEX_U_PMON_RAW_EVENT_MASK,
+	.box_ctl	= NHMEX_U_MSR_PMON_GLOBAL_CTL,
+	.ops		= &nhmex_uncore_ops,
+	.format_group	= &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+	.name = "format",
+	.attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+	.name			= "cbox",
+	.num_counters		= 6,
+	.num_boxes		= 8,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_C0_MSR_PMON_EV_SEL0,
+	.perf_ctr		= NHMEX_C0_MSR_PMON_CTR0,
+	.event_mask		= NHMEX_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+	.msr_offset		= NHMEX_C_MSR_OFFSET,
+	.pair_ctr_ctl		= 1,
+	.ops			= &nhmex_uncore_ops,
+	.format_group		= &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+	.name			= "wbox",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_W_MSR_PMON_CNT0,
+	.perf_ctr		= NHMEX_W_MSR_PMON_EVT_SEL0,
+	.fixed_ctr		= NHMEX_W_MSR_PMON_FIXED_CTR,
+	.fixed_ctl		= NHMEX_W_MSR_PMON_FIXED_CTL,
+	.event_mask		= NHMEX_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_W_MSR_GLOBAL_CTL,
+	.pair_ctr_ctl		= 1,
+	.event_descs		= nhmex_uncore_wbox_events,
+	.ops			= &nhmex_uncore_ops,
+	.format_group		= &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+	int ctr, ev_sel;
+
+	ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+		NHMEX_B_PMON_CTR_SHIFT;
+	ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+		  NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+	/* events that do not use the match/mask registers */
+	if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+	    (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+		return 0;
+
+	if (box->pmu->pmu_idx == 0)
+		reg1->reg = NHMEX_B0_MSR_MATCH;
+	else
+		reg1->reg = NHMEX_B1_MSR_MATCH;
+	reg1->idx = 0;
+	reg1->config = event->attr.config1;
+	reg2->config = event->attr.config2;
+	return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	if (reg1->idx != EXTRA_REG_NONE) {
+		wrmsrl(reg1->reg, reg1->config);
+		wrmsrl(reg1->reg + 1, reg2->config);
+	}
+	wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+		(hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+	EVENT_CONSTRAINT(0 , 1, 0xc0),
+	EVENT_CONSTRAINT(0x40, 2, 0xc0),
+	EVENT_CONSTRAINT(0x80, 4, 0xc0),
+	EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+	EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+	&format_attr_event5.attr,
+	&format_attr_counter.attr,
+	&format_attr_match.attr,
+	&format_attr_mask.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+	.name = "format",
+	.attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+	NHMEX_UNCORE_OPS_COMMON_INIT(),
+	.enable_event		= nhmex_bbox_msr_enable_event,
+	.hw_config		= nhmex_bbox_hw_config,
+	.get_constraint		= uncore_get_constraint,
+	.put_constraint		= uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+	.name			= "bbox",
+	.num_counters		= 4,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_B0_MSR_PMON_CTL0,
+	.perf_ctr		= NHMEX_B0_MSR_PMON_CTR0,
+	.event_mask		= NHMEX_B_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+	.msr_offset		= NHMEX_B_MSR_OFFSET,
+	.pair_ctr_ctl		= 1,
+	.num_shared_regs	= 1,
+	.constraints		= nhmex_uncore_bbox_constraints,
+	.ops			= &nhmex_uncore_bbox_ops,
+	.format_group		= &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+	if (event->attr.config & NHMEX_S_PMON_MM_CFG_EN) {
+		reg1->config = event->attr.config1;
+		reg2->config = event->attr.config2;
+	} else {
+		reg1->config = ~0ULL;
+		reg2->config = ~0ULL;
+	}
+
+	if (box->pmu->pmu_idx == 0)
+		reg1->reg = NHMEX_S0_MSR_MM_CFG;
+	else
+		reg1->reg = NHMEX_S1_MSR_MM_CFG;
+
+	reg1->idx = 0;
+
+	return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	wrmsrl(reg1->reg, 0);
+	if (reg1->config != ~0ULL || reg2->config != ~0ULL) {
+		wrmsrl(reg1->reg + 1, reg1->config);
+		wrmsrl(reg1->reg + 2, reg2->config);
+		wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+	}
+	wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	&format_attr_mm_cfg.attr,
+	&format_attr_match.attr,
+	&format_attr_mask.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+	.name			= "format",
+	.attrs			= nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+	NHMEX_UNCORE_OPS_COMMON_INIT(),
+	.enable_event		= nhmex_sbox_msr_enable_event,
+	.hw_config		= nhmex_sbox_hw_config,
+	.get_constraint		= uncore_get_constraint,
+	.put_constraint		= uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+	.name			= "sbox",
+	.num_counters		= 4,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_S0_MSR_PMON_CTL0,
+	.perf_ctr		= NHMEX_S0_MSR_PMON_CTR0,
+	.event_mask		= NHMEX_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+	.msr_offset		= NHMEX_S_MSR_OFFSET,
+	.pair_ctr_ctl		= 1,
+	.num_shared_regs	= 1,
+	.ops			= &nhmex_uncore_sbox_ops,
+	.format_group		= &nhmex_uncore_sbox_format_group
+};
+
+enum {
+	EXTRA_REG_NHMEX_M_FILTER,
+	EXTRA_REG_NHMEX_M_DSP,
+	EXTRA_REG_NHMEX_M_ISS,
+	EXTRA_REG_NHMEX_M_MAP,
+	EXTRA_REG_NHMEX_M_MSC_THR,
+	EXTRA_REG_NHMEX_M_PGT,
+	EXTRA_REG_NHMEX_M_PLD,
+	EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+	MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+	MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+	MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+	MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+	/* event 0xa uses two extra registers */
+	MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+	MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+	MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+	/* events 0xd ~ 0x10 use the same extra register */
+	MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+	MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+	MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+	MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+	MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+	MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+	MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+	MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+	MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+	EVENT_EXTRA_END
+};
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+	struct intel_uncore_extra_reg *er;
+	unsigned long flags;
+	bool ret = false;
+	u64 mask;
+
+	if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+		er = &box->shared_regs[idx];
+		raw_spin_lock_irqsave(&er->lock, flags);
+		if (!atomic_read(&er->ref) || er->config == config) {
+			atomic_inc(&er->ref);
+			er->config = config;
+			ret = true;
+		}
+		raw_spin_unlock_irqrestore(&er->lock, flags);
+
+		return ret;
+	}
+	/*
+	 * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+	 * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+	 * fields which are shared.
+	 */
+	idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+	if (WARN_ON_ONCE(idx >= 4))
+		return false;
+
+	/* mask of the shared fields */
+	mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+	er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+	raw_spin_lock_irqsave(&er->lock, flags);
+	/* add mask of the non-shared field if it's in use */
+	if (__BITS_VALUE(atomic_read(&er->ref), idx, 8))
+		mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+
+	if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+		atomic_add(1 << (idx * 8), &er->ref);
+		mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+			NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+		er->config &= ~mask;
+		er->config |= (config & mask);
+		ret = true;
+	}
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+
+	return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+	struct intel_uncore_extra_reg *er;
+
+	if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+		er = &box->shared_regs[idx];
+		atomic_dec(&er->ref);
+		return;
+	}
+
+	idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+	er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+	atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+	u64 config = reg1->config;
+
+	/* get the non-shared control bits and shift them */
+	idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+	config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+	if (new_idx > orig_idx) {
+		idx = new_idx - orig_idx;
+		config <<= 3 * idx;
+	} else {
+		idx = orig_idx - new_idx;
+		config >>= 3 * idx;
+	}
+
+	/* add the shared control bits back */
+	config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+	if (modify) {
+		/* adjust the main event selector */
+		if (new_idx > orig_idx)
+			hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+		else
+			hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+		reg1->config = config;
+		reg1->idx = ~0xff | new_idx;
+	}
+	return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+	int i, idx[2], alloc = 0;
+	u64 config1 = reg1->config;
+
+	idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+	idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+	for (i = 0; i < 2; i++) {
+		if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+			idx[i] = 0xff;
+
+		if (idx[i] == 0xff)
+			continue;
+
+		if (!nhmex_mbox_get_shared_reg(box, idx[i],
+				__BITS_VALUE(config1, i, 32)))
+			goto fail;
+		alloc |= (0x1 << i);
+	}
+
+	/* for the match/mask registers */
+	if ((uncore_box_is_fake(box) || !reg2->alloc) &&
+	    !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+		goto fail;
+
+	/*
+	 * If it's a fake box -- as per validate_{group,event}() we
+	 * shouldn't touch event state and we can avoid doing so
+	 * since both will only call get_event_constraints() once
+	 * on each event, this avoids the need for reg->alloc.
+	 */
+	if (!uncore_box_is_fake(box)) {
+		if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+			nhmex_mbox_alter_er(event, idx[0], true);
+		reg1->alloc |= alloc;
+		reg2->alloc = 1;
+	}
+	return NULL;
+fail:
+	if (idx[0] != 0xff && !(alloc & 0x1) &&
+	    idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+		/*
+		 * events 0xd ~ 0x10 are functional identical, but are
+		 * controlled by different fields in the ZDP_CTL_FVC
+		 * register. If we failed to take one field, try the
+		 * rest 3 choices.
+		 */
+		BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+		idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+		idx[0] = (idx[0] + 1) % 4;
+		idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+		if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+			config1 = nhmex_mbox_alter_er(event, idx[0], false);
+			goto again;
+		}
+	}
+
+	if (alloc & 0x1)
+		nhmex_mbox_put_shared_reg(box, idx[0]);
+	if (alloc & 0x2)
+		nhmex_mbox_put_shared_reg(box, idx[1]);
+	return &constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+	if (uncore_box_is_fake(box))
+		return;
+
+	if (reg1->alloc & 0x1)
+		nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+	if (reg1->alloc & 0x2)
+		nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+	reg1->alloc = 0;
+
+	if (reg2->alloc) {
+		nhmex_mbox_put_shared_reg(box, reg2->idx);
+		reg2->alloc = 0;
+	}
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+	if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+		return er->idx;
+	return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct intel_uncore_type *type = box->pmu->type;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+	struct extra_reg *er;
+	unsigned msr;
+	int reg_idx = 0;
+
+	if (WARN_ON_ONCE(reg1->idx != -1))
+		return -EINVAL;
+	/*
+	 * The mbox events may require 2 extra MSRs at the most. But only
+	 * the lower 32 bits in these MSRs are significant, so we can use
+	 * config1 to pass two MSRs' config.
+	 */
+	for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+		if (er->event != (event->hw.config & er->config_mask))
+			continue;
+		if (event->attr.config1 & ~er->valid_mask)
+			return -EINVAL;
+		if (er->idx == __BITS_VALUE(reg1->idx, 0, 8) ||
+		    er->idx == __BITS_VALUE(reg1->idx, 1, 8))
+			continue;
+		if (WARN_ON_ONCE(reg_idx >= 2))
+			return -EINVAL;
+
+		msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+		if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+			return -EINVAL;
+
+		/* always use the 32~63 bits to pass the PLD config */
+		if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+			reg_idx = 1;
+
+		reg1->idx &= ~(0xff << (reg_idx * 8));
+		reg1->reg &= ~(0xffff << (reg_idx * 16));
+		reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+		reg1->reg |= msr << (reg_idx * 16);
+		reg1->config = event->attr.config1;
+		reg_idx++;
+	}
+	/* use config2 to pass the filter config */
+	reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+	if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+		reg2->config = event->attr.config2;
+	else
+		reg2->config = ~0ULL;
+	if (box->pmu->pmu_idx == 0)
+		reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+	else
+		reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+
+	return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+	struct intel_uncore_extra_reg *er;
+	unsigned long flags;
+	u64 config;
+
+	if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+		return box->shared_regs[idx].config;
+
+	er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+	raw_spin_lock_irqsave(&er->lock, flags);
+	config = er->config;
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+	return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+	int idx;
+
+	idx = __BITS_VALUE(reg1->idx, 0, 8);
+	if (idx != 0xff)
+		wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+			nhmex_mbox_shared_reg_config(box, idx));
+	idx = __BITS_VALUE(reg1->idx, 1, 8);
+	if (idx != 0xff)
+		wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+			nhmex_mbox_shared_reg_config(box, idx));
+
+	wrmsrl(reg2->reg, 0);
+	if (reg2->config != ~0ULL) {
+		wrmsrl(reg2->reg + 1,
+			reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+		wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+			(reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+		wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+	}
+
+	wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode,	count_mode,	"config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode,	"config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode,	wrap_mode,	"config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode,	flag_mode,	"config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel,	inc_sel,	"config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel,	set_flag_sel,	"config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg,	filter_cfg,	"config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match,	filter_match,	"config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask,	filter_mask,	"config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp,		dsp,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr,		thr,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc,		fvc,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt,		pgt,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map,		map,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss,		iss,		"config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld,		pld,		"config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+	&format_attr_count_mode.attr,
+	&format_attr_storage_mode.attr,
+	&format_attr_wrap_mode.attr,
+	&format_attr_flag_mode.attr,
+	&format_attr_inc_sel.attr,
+	&format_attr_set_flag_sel.attr,
+	&format_attr_filter_cfg.attr,
+	&format_attr_filter_match.attr,
+	&format_attr_filter_mask.attr,
+	&format_attr_dsp.attr,
+	&format_attr_thr.attr,
+	&format_attr_fvc.attr,
+	&format_attr_pgt.attr,
+	&format_attr_map.attr,
+	&format_attr_iss.attr,
+	&format_attr_pld.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+	.name		= "format",
+	.attrs		= nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+	INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+	INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+	NHMEX_UNCORE_OPS_COMMON_INIT(),
+	.enable_event	= nhmex_mbox_msr_enable_event,
+	.hw_config	= nhmex_mbox_hw_config,
+	.get_constraint	= nhmex_mbox_get_constraint,
+	.put_constraint	= nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+	.name			= "mbox",
+	.num_counters		= 6,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_M0_MSR_PMU_CTL0,
+	.perf_ctr		= NHMEX_M0_MSR_PMU_CNT0,
+	.event_mask		= NHMEX_M_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_M0_MSR_GLOBAL_CTL,
+	.msr_offset		= NHMEX_M_MSR_OFFSET,
+	.pair_ctr_ctl		= 1,
+	.num_shared_regs	= 8,
+	.event_descs		= nhmex_uncore_mbox_events,
+	.ops			= &nhmex_uncore_mbox_ops,
+	.format_group		= &nhmex_uncore_mbox_format_group,
+};
+
+void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	int port;
+
+	/* adjust the main event selector */
+	if (reg1->idx % 2) {
+		reg1->idx--;
+		hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+	} else {
+		reg1->idx++;
+		hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+	}
+
+	/* adjust address or config of extra register */
+	port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
+	switch (reg1->idx % 6) {
+	case 0:
+		reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
+		break;
+	case 1:
+		reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
+		break;
+	case 2:
+		/* the 8~15 bits to the 0~7 bits */
+		reg1->config >>= 8;
+		break;
+	case 3:
+		/* the 0~7 bits to the 8~15 bits */
+		reg1->config <<= 8;
+		break;
+	case 4:
+		reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
+		break;
+	case 5:
+		reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
+		break;
+	};
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+	struct intel_uncore_extra_reg *er;
+	unsigned long flags;
+	int idx, er_idx;
+	u64 config1;
+	bool ok = false;
+
+	if (!uncore_box_is_fake(box) && reg1->alloc)
+		return NULL;
+
+	idx = reg1->idx % 6;
+	config1 = reg1->config;
+again:
+	er_idx = idx;
+	/* the 3rd and 4th events use the same extra register */
+	if (er_idx > 2)
+		er_idx--;
+	er_idx += (reg1->idx / 6) * 5;
+
+	er = &box->shared_regs[er_idx];
+	raw_spin_lock_irqsave(&er->lock, flags);
+	if (idx < 2) {
+		if (!atomic_read(&er->ref) || er->config == reg1->config) {
+			atomic_inc(&er->ref);
+			er->config = reg1->config;
+			ok = true;
+		}
+	} else if (idx == 2 || idx == 3) {
+		/*
+		 * these two events use different fields in a extra register,
+		 * the 0~7 bits and the 8~15 bits respectively.
+		 */
+		u64 mask = 0xff << ((idx - 2) * 8);
+		if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+				!((er->config ^ config1) & mask)) {
+			atomic_add(1 << ((idx - 2) * 8), &er->ref);
+			er->config &= ~mask;
+			er->config |= config1 & mask;
+			ok = true;
+		}
+	} else {
+		if (!atomic_read(&er->ref) ||
+				(er->config == (hwc->config >> 32) &&
+				 er->config1 == reg1->config &&
+				 er->config2 == reg2->config)) {
+			atomic_inc(&er->ref);
+			er->config = (hwc->config >> 32);
+			er->config1 = reg1->config;
+			er->config2 = reg2->config;
+			ok = true;
+		}
+	}
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+
+	if (!ok) {
+		/*
+		 * The Rbox events are always in pairs. The paired
+		 * events are functional identical, but use different
+		 * extra registers. If we failed to take an extra
+		 * register, try the alternative.
+		 */
+		if (idx % 2)
+			idx--;
+		else
+			idx++;
+		if (idx != reg1->idx % 6) {
+			if (idx == 2)
+				config1 >>= 8;
+			else if (idx == 3)
+				config1 <<= 8;
+			goto again;
+		}
+	} else {
+		if (!uncore_box_is_fake(box)) {
+			if (idx != reg1->idx % 6)
+				nhmex_rbox_alter_er(box, event);
+			reg1->alloc = 1;
+		}
+		return NULL;
+	}
+	return &constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct intel_uncore_extra_reg *er;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	int idx, er_idx;
+
+	if (uncore_box_is_fake(box) || !reg1->alloc)
+		return;
+
+	idx = reg1->idx % 6;
+	er_idx = idx;
+	if (er_idx > 2)
+		er_idx--;
+	er_idx += (reg1->idx / 6) * 5;
+
+	er = &box->shared_regs[er_idx];
+	if (idx == 2 || idx == 3)
+		atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+	else
+		atomic_dec(&er->ref);
+
+	reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+	int port, idx;
+
+	idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+		NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+	if (idx >= 0x18)
+		return -EINVAL;
+
+	reg1->idx = idx;
+	reg1->config = event->attr.config1;
+
+	port = idx / 6 + box->pmu->pmu_idx * 4;
+	idx %= 6;
+	switch (idx) {
+	case 0:
+		reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
+		break;
+	case 1:
+		reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
+		break;
+	case 2:
+	case 3:
+		reg1->reg = NHMEX_R_MSR_PORTN_QLX_CFG(port);
+		break;
+	case 4:
+	case 5:
+		if (idx == 4)
+			reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
+		else
+			reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
+		reg2->config = event->attr.config2;
+		hwc->config |= event->attr.config & (~0ULL << 32);
+		break;
+	};
+	return 0;
+}
+
+static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+	struct intel_uncore_extra_reg *er;
+	unsigned long flags;
+	u64 config;
+
+	er = &box->shared_regs[idx];
+
+	raw_spin_lock_irqsave(&er->lock, flags);
+	config = er->config;
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+
+	return config;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+	int idx, er_idx;
+
+	idx = reg1->idx % 6;
+	er_idx = idx;
+	if (er_idx > 2)
+		er_idx--;
+	er_idx += (reg1->idx / 6) * 5;
+
+	switch (idx) {
+	case 0:
+	case 1:
+		wrmsrl(reg1->reg, reg1->config);
+		break;
+	case 2:
+	case 3:
+		wrmsrl(reg1->reg, nhmex_rbox_shared_reg_config(box, er_idx));
+		break;
+	case 4:
+	case 5:
+		wrmsrl(reg1->reg, reg1->config);
+		wrmsrl(reg1->reg + 1, hwc->config >> 32);
+		wrmsrl(reg1->reg + 2, reg2->config);
+		break;
+	};
+
+	wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+		(hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+	&format_attr_event5.attr,
+	&format_attr_xbr_mm_cfg.attr,
+	&format_attr_xbr_match.attr,
+	&format_attr_xbr_mask.attr,
+	&format_attr_qlx_cfg.attr,
+	&format_attr_iperf_cfg.attr,
+	NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+	.name = "format",
+	.attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+	INTEL_UNCORE_EVENT_DESC(qpi0_flit_send,		"event=0x0,iperf_cfg=0x80000000"),
+	INTEL_UNCORE_EVENT_DESC(qpi1_filt_send,		"event=0x6,iperf_cfg=0x80000000"),
+	INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt,		"event=0x0,iperf_cfg=0x40000000"),
+	INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt,		"event=0x6,iperf_cfg=0x40000000"),
+	INTEL_UNCORE_EVENT_DESC(qpi0_date_response,	"event=0x0,iperf_cfg=0xc4"),
+	INTEL_UNCORE_EVENT_DESC(qpi1_date_response,	"event=0x6,iperf_cfg=0xc4"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+	NHMEX_UNCORE_OPS_COMMON_INIT(),
+	.enable_event		= nhmex_rbox_msr_enable_event,
+	.hw_config		= nhmex_rbox_hw_config,
+	.get_constraint		= nhmex_rbox_get_constraint,
+	.put_constraint		= nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+	.name			= "rbox",
+	.num_counters		= 8,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= NHMEX_R_MSR_PMON_CTL0,
+	.perf_ctr		= NHMEX_R_MSR_PMON_CNT0,
+	.event_mask		= NHMEX_R_PMON_RAW_EVENT_MASK,
+	.box_ctl		= NHMEX_R_MSR_GLOBAL_CTL,
+	.msr_offset		= NHMEX_R_MSR_OFFSET,
+	.pair_ctr_ctl		= 1,
+	.num_shared_regs	= 20,
+	.event_descs		= nhmex_uncore_rbox_events,
+	.ops			= &nhmex_uncore_rbox_ops,
+	.format_group		= &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+	&nhmex_uncore_ubox,
+	&nhmex_uncore_cbox,
+	&nhmex_uncore_bbox,
+	&nhmex_uncore_sbox,
+	&nhmex_uncore_mbox,
+	&nhmex_uncore_rbox,
+	&nhmex_uncore_wbox,
+	NULL,
+};
+/* end of Nehalem-EX uncore support */
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -787,8 +1841,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 }
 
-static void uncore_perf_event_update(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
 {
 	u64 prev_count, new_count, delta;
 	int shift;
@@ -858,14 +1911,12 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 	box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
-					  int cpu)
+struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
 {
 	struct intel_uncore_box *box;
 	int i, size;
 
-	size = sizeof(*box) + type->num_shared_regs *
-		sizeof(struct intel_uncore_extra_reg);
+	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
 
 	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
 	if (!box)
@@ -915,12 +1966,11 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
 	 * perf core schedules event on the basis of cpu, uncore events are
 	 * collected by one of the cpus inside a physical package.
 	 */
-	return uncore_pmu_to_box(uncore_event_to_pmu(event),
-				 smp_processor_id());
+	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
 }
 
-static int uncore_collect_events(struct intel_uncore_box *box,
-				struct perf_event *leader, bool dogrp)
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 {
 	struct perf_event *event;
 	int n, max_count;
@@ -952,8 +2002,7 @@ static int uncore_collect_events(struct intel_uncore_box *box,
 }
 
 static struct event_constraint *
-uncore_get_event_constraint(struct intel_uncore_box *box,
-			    struct perf_event *event)
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 {
 	struct intel_uncore_type *type = box->pmu->type;
 	struct event_constraint *c;
@@ -977,15 +2026,13 @@ uncore_get_event_constraint(struct intel_uncore_box *box,
 	return &type->unconstrainted;
 }
 
-static void uncore_put_event_constraint(struct intel_uncore_box *box,
-					struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 {
 	if (box->pmu->type->ops->put_constraint)
 		box->pmu->type->ops->put_constraint(box, event);
 }
 
-static int uncore_assign_events(struct intel_uncore_box *box,
-				int assign[], int n)
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 {
 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
@@ -1407,8 +2454,7 @@ static bool pcidrv_registered;
 /*
  * add a pci uncore device
  */
-static int __devinit uncore_pci_add(struct intel_uncore_type *type,
-				    struct pci_dev *pdev)
+static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
 {
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
@@ -1485,6 +2531,7 @@ static int __devinit uncore_pci_probe(struct pci_dev *pdev,
 	struct intel_uncore_type *type;
 
 	type = (struct intel_uncore_type *)id->driver_data;
+
 	return uncore_pci_add(type, pdev);
 }
 
@@ -1612,8 +2659,8 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
 	return 0;
 }
 
-static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
-					    int old_cpu, int new_cpu)
+static void __cpuinit
+uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
 {
 	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
@@ -1694,8 +2741,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu)
 	uncore_change_context(pci_uncores, -1, cpu);
 }
 
-static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
-					 unsigned long action, void *hcpu)
+static int
+ __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 
@@ -1732,12 +2779,12 @@ static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
 }
 
 static struct notifier_block uncore_cpu_nb __cpuinitdata = {
-	.notifier_call = uncore_cpu_notifier,
+	.notifier_call	= uncore_cpu_notifier,
 	/*
 	 * to migrate uncore events, our notifier should be executed
 	 * before perf core's notifier.
 	 */
-	.priority = CPU_PRI_PERF + 1,
+	.priority	= CPU_PRI_PERF + 1,
 };
 
 static void __init uncore_cpu_setup(void *dummy)
@@ -1767,6 +2814,9 @@ static int __init uncore_cpu_init(void)
 			snbep_uncore_cbox.num_boxes = max_cores;
 		msr_uncores = snbep_msr_uncores;
 		break;
+	case 46:
+		msr_uncores = nhmex_msr_uncores;
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index b13e9ea81def..f3851892e077 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -5,8 +5,6 @@
 #include "perf_event.h"
 
 #define UNCORE_PMU_NAME_LEN		32
-#define UNCORE_BOX_HASH_SIZE		8
-
 #define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC)
 
 #define UNCORE_FIXED_EVENT		0xff
@@ -115,6 +113,10 @@
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
 
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK	\
+				(SNBEP_PMON_RAW_EVENT_MASK | \
+				 SNBEP_PMON_CTL_EV_SEL_EXT)
+
 /* SNB-EP pci control register */
 #define SNBEP_PCI_PMON_BOX_CTL			0xf4
 #define SNBEP_PCI_PMON_CTL0			0xd8
@@ -158,6 +160,193 @@
 #define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc
 #define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd
 
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK	0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK	0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0		(1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET		(1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN		(1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22		(1 << 22)
+#define NHMEX_PMON_CTL_INVERT		(1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK	0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK	(NHMEX_PMON_CTL_EV_SEL_MASK | \
+					 NHMEX_PMON_CTL_UMASK_MASK | \
+					 NHMEX_PMON_CTL_EDGE_DET | \
+					 NHMEX_PMON_CTL_INVERT | \
+					 NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL		0xc00
+#define NHMEX_U_MSR_PMON_CTR			0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL			0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN			(1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL	0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL		(1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL		(1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL		(1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK		\
+		(NHMEX_PMON_CTL_EV_SEL_MASK |	\
+		 NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL		0xd00
+#define NHMEX_C0_MSR_PMON_CTR0			0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0		0xd10
+#define NHMEX_C_MSR_OFFSET			0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL		0xc20
+#define NHMEX_B0_MSR_PMON_CTR0			0xc31
+#define NHMEX_B0_MSR_PMON_CTL0			0xc30
+#define NHMEX_B_MSR_OFFSET			0x40
+#define NHMEX_B0_MSR_MATCH			0xe45
+#define NHMEX_B0_MSR_MASK			0xe46
+#define NHMEX_B1_MSR_MATCH			0xe4d
+#define NHMEX_B1_MSR_MASK			0xe4e
+
+#define NHMEX_B_PMON_CTL_EN			(1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT		1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK		\
+		(0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT		6
+#define NHMEX_B_PMON_CTR_MASK		\
+		(0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK		\
+		(NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+		 NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL		0xc40
+#define NHMEX_S0_MSR_PMON_CTR0			0xc51
+#define NHMEX_S0_MSR_PMON_CTL0			0xc50
+#define NHMEX_S_MSR_OFFSET			0x80
+#define NHMEX_S0_MSR_MM_CFG			0xe48
+#define NHMEX_S0_MSR_MATCH			0xe49
+#define NHMEX_S0_MSR_MASK			0xe4a
+#define NHMEX_S1_MSR_MM_CFG			0xe58
+#define NHMEX_S1_MSR_MATCH			0xe59
+#define NHMEX_S1_MSR_MASK			0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN			(0x1ULL << 63)
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL			0xca0
+#define NHMEX_M0_MSR_PMU_DSP			0xca5
+#define NHMEX_M0_MSR_PMU_ISS			0xca6
+#define NHMEX_M0_MSR_PMU_MAP			0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR		0xca8
+#define NHMEX_M0_MSR_PMU_PGT			0xca9
+#define NHMEX_M0_MSR_PMU_PLD			0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC		0xcab
+#define NHMEX_M0_MSR_PMU_CTL0			0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0			0xcb1
+#define NHMEX_M_MSR_OFFSET			0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG			0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG			0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN			(1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK		0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK		0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT		34
+
+#define NHMEX_M_PMON_CTL_EN			(1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN			(1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT	2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK	\
+	(0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT	4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK	\
+	(0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE		(1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE		(1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT		9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK		\
+	(0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT	19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK	\
+	(0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK			\
+		(NHMEX_M_PMON_CTL_COUNT_MODE_MASK |	\
+		 NHMEX_M_PMON_CTL_STORAGE_MODE_MASK |	\
+		 NHMEX_M_PMON_CTL_WRAP_MODE |		\
+		 NHMEX_M_PMON_CTL_FLAG_MODE |		\
+		 NHMEX_M_PMON_CTL_INC_SEL_MASK |	\
+		 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK	0x1f
+#define NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK	(0x7 << 5)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK	(0x7 << 8)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR	(1 << 23)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK			\
+		(NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK |	\
+		 NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK |	\
+		 NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK  |	\
+		 NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n)	(0x7 << (11 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+				NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+			   NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+				NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+		EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+				MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+		EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+				MBOX_SET_FLAG_SEL_MASK, \
+				(u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL			0xe00
+#define NHMEX_R_MSR_PMON_CTL0			0xe10
+#define NHMEX_R_MSR_PMON_CNT0			0xe11
+#define NHMEX_R_MSR_OFFSET			0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n)		\
+		((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n)		(0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n)		(0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n)		\
+		(((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n)	\
+		(0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n)	\
+		(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n)	\
+		(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n)	\
+		(0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n)	\
+		(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n)	\
+		(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN			(1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT		1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK		\
+		(0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN			(1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK		NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL			0xc80
+#define NHMEX_W_MSR_PMON_CNT0			0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0		0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR		0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL		0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN		(1ULL << 31)
+
 struct intel_uncore_ops;
 struct intel_uncore_pmu;
 struct intel_uncore_box;
@@ -178,6 +367,7 @@ struct intel_uncore_type {
 	unsigned msr_offset;
 	unsigned num_shared_regs:8;
 	unsigned single_fixed:1;
+	unsigned pair_ctr_ctl:1;
 	struct event_constraint unconstrainted;
 	struct event_constraint *constraints;
 	struct intel_uncore_pmu *pmus;
@@ -213,7 +403,7 @@ struct intel_uncore_pmu {
 
 struct intel_uncore_extra_reg {
 	raw_spinlock_t lock;
-	u64 config1;
+	u64 config, config1, config2;
 	atomic_t ref;
 };
 
@@ -323,14 +513,16 @@ unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
 static inline
 unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
 {
-	return idx + box->pmu->type->event_ctl +
+	return box->pmu->type->event_ctl +
+		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
 		box->pmu->type->msr_offset * box->pmu->pmu_idx;
 }
 
 static inline
 unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
 {
-	return idx + box->pmu->type->perf_ctr +
+	return box->pmu->type->perf_ctr +
+		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
 		box->pmu->type->msr_offset * box->pmu->pmu_idx;
 }
 
@@ -422,3 +614,8 @@ static inline void uncore_box_init(struct intel_uncore_box *box)
 			box->pmu->type->ops->init_box(box);
 	}
 }
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+	return (box->phys_id < 0);
+}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 571246d81edf..ae42418bc50f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,8 +27,8 @@ static int die_counter;
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%p>] %s%pB\n", (void *) address,
-			reliable ? "" : "? ", (void *) address);
+	pr_cont(" [<%p>] %s%pB\n",
+		(void *)address, reliable ? "" : "? ", (void *)address);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 		return 1;
 
+	print_modules();
 	show_regs(regs);
 #ifdef CONFIG_X86_32
 	if (user_mode_vm(regs)) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0b1d783daab..1038a417ea53 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (kstack_end(stack))
 			break;
 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			printk(KERN_CONT "\n");
-		printk(KERN_CONT " %08lx", *stack++);
+			pr_cont("\n");
+		pr_cont(" %08lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs)
 {
 	int i;
 
-	print_modules();
 	__show_regs(regs, !user_mode_vm(regs));
 
-	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
-		TASK_COMM_LEN, current->comm, task_pid_nr(current),
-		current_thread_info(), current, task_thread_info(current));
+	pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
+		 TASK_COMM_LEN, current->comm, task_pid_nr(current),
+		 current_thread_info(), current, task_thread_info(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs)
 		unsigned char c;
 		u8 *ip;
 
-		printk(KERN_EMERG "Stack:\n");
+		pr_emerg("Stack:\n");
 		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
 
-		printk(KERN_EMERG "Code: ");
+		pr_emerg("Code:");
 
 		ip = (u8 *)regs->ip - code_prologue;
 		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
@@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs)
 		for (i = 0; i < code_len; i++, ip++) {
 			if (ip < (u8 *)PAGE_OFFSET ||
 					probe_kernel_address(ip, c)) {
-				printk(KERN_CONT " Bad EIP value.");
+				pr_cont("  Bad EIP value.");
 				break;
 			}
 			if (ip == (u8 *)regs->ip)
-				printk(KERN_CONT "<%02x> ", c);
+				pr_cont(" <%02x>", c);
 			else
-				printk(KERN_CONT "%02x ", c);
+				pr_cont(" %02x", c);
 		}
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 791b76122aa8..b653675d5288 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (stack >= irq_stack && stack <= irq_stack_end) {
 			if (stack == irq_stack_end) {
 				stack = (unsigned long *) (irq_stack_end[-1]);
-				printk(KERN_CONT " <EOI> ");
+				pr_cont(" <EOI> ");
 			}
 		} else {
 		if (((long) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		}
 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			printk(KERN_CONT "\n");
-		printk(KERN_CONT " %016lx", *stack++);
+			pr_cont("\n");
+		pr_cont(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
 
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs)
 
 	sp = regs->sp;
 	printk("CPU %d ", cpu);
-	print_modules();
 	__show_regs(regs, 1);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n",
+	       cur->comm, cur->pid, task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs)
 		for (i = 0; i < code_len; i++, ip++) {
 			if (ip < (u8 *)PAGE_OFFSET ||
 					probe_kernel_address(ip, c)) {
-				printk(KERN_CONT " Bad RIP value.");
+				pr_cont(" Bad RIP value.");
 				break;
 			}
 			if (ip == (u8 *)regs->ip)
-				printk(KERN_CONT "<%02x> ", c);
+				pr_cont("<%02x> ", c);
 			else
-				printk(KERN_CONT "%02x ", c);
+				pr_cont("%02x ", c);
 		}
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38308fa7d7e5..459d4a0dca8d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1834,10 +1834,30 @@ end_repeat_nmi:
 	 */
 	call save_paranoid
 	DEFAULT_FRAME 0
+
+	/*
+	 * Save off the CR2 register. If we take a page fault in the NMI then
+	 * it could corrupt the CR2 value. If the NMI preempts a page fault
+	 * handler before it was able to read the CR2 register, and then the
+	 * NMI itself takes a page fault, the page fault that was preempted
+	 * will read the information from the NMI page fault and not the
+	 * origin fault. Save it off and restore it if it changes.
+	 * Use the r12 callee-saved register.
+	 */
+	movq %cr2, %r12
+
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq %rsp,%rdi
 	movq $-1,%rsi
 	call do_nmi
+
+	/* Did the NMI take a page fault? Restore cr2 if it did */
+	movq %cr2, %rcx
+	cmpq %rcx, %r12
+	je 1f
+	movq %r12, %cr2
+1:
+	
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 nmi_swapgs:
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3dafc6003b7c..1f5f1d5d2a02 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -294,9 +294,9 @@ void fixup_irqs(void)
 		raw_spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
-			printk("Broke affinity for irq %i\n", irq);
+			pr_notice("Broke affinity for irq %i\n", irq);
 		else if (!set_affinity)
-			printk("Cannot set affinity for irq %i\n", irq);
+			pr_notice("Cannot set affinity for irq %i\n", irq);
 	}
 
 	/*
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1649cf899ad6..4873e62db6a1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -514,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {
 
 #ifdef MODULE
 /* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id microcode_id[] = {
+static const struct x86_cpu_id __initconst microcode_id[] = {
 #ifdef CONFIG_MICROCODE_INTEL
 	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
 #endif
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f21fd94ac897..202494d2ec6e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -15,6 +15,9 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
@@ -30,9 +33,14 @@
 #include <asm/pgtable.h>
 
 #if 0
-#define DEBUGP printk
+#define DEBUGP(fmt, ...)				\
+	printk(KERN_DEBUG fmt, ##__VA_ARGS__)
 #else
-#define DEBUGP(fmt...)
+#define DEBUGP(fmt, ...)				\
+do {							\
+	if (0)						\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+} while (0)
 #endif
 
 void *module_alloc(unsigned long size)
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 	Elf32_Sym *sym;
 	uint32_t *location;
 
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
 		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 			*location += sym->st_value - (uint32_t)location;
 			break;
 		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			pr_err("%s: Unknown relocation: %u\n",
 			       me->name, ELF32_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	void *loc;
 	u64 val;
 
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
 		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			+ ELF64_R_SYM(rel[i].r_info);
 
 		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
-			(int)ELF64_R_TYPE(rel[i].r_info),
-			sym->st_value, rel[i].r_addend, (u64)loc);
+		       (int)ELF64_R_TYPE(rel[i].r_info),
+		       sym->st_value, rel[i].r_addend, (u64)loc);
 
 		val = sym->st_value + rel[i].r_addend;
 
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 #endif
 			break;
 		default:
-			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
+			pr_err("%s: Unknown rela relocation: %llu\n",
 			       me->name, ELF64_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	return 0;
 
 overflow:
-	printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+	pr_err("overflow in relocation type %d val %Lx\n",
 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
-	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
+	pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
 	       me->name);
 	return -ENOEXEC;
 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a0b2f84457be..f84f5c57de35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
 #ifdef CONFIG_X86_32
 /*
  * For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C. Simply have 3 states
- * the NMI can be in.
+ * add a workaround to the iret problem in C (preventing nested
+ * NMIs if an NMI takes a trap). Simply have 3 states the NMI
+ * can be in:
  *
  *  1) not running
  *  2) executing
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
  * If an NMI hits a breakpoint that executes an iret, another
  * NMI can preempt it. We do not want to allow this new NMI
  * to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the first NMI will perform
- * an cmpxchg on the state, and if it doesn't successfully
- * reset the state to "not running" it will restart the next
- * NMI.
+ * We set the state to "latched", and the exit of the first NMI will
+ * perform a dec_return, if the result is zero (NOT_RUNNING), then
+ * it will simply exit the NMI handler. If not, the dec_return
+ * would have set the state to NMI_EXECUTING (what we want it to
+ * be when we are running). In this case, we simply jump back
+ * to rerun the NMI handler again, and restart the 'latched' NMI.
+ *
+ * No trap (breakpoint or page fault) should be hit before nmi_restart,
+ * thus there is no race between the first check of state for NOT_RUNNING
+ * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
+ * at this point.
+ *
+ * In case the NMI takes a page fault, we need to save off the CR2
+ * because the NMI could have preempted another page fault and corrupt
+ * the CR2 that is about to be read. As nested NMIs must be restarted
+ * and they can not take breakpoints or page faults, the update of the
+ * CR2 must be done before converting the nmi state back to NOT_RUNNING.
+ * Otherwise, there would be a race of another nested NMI coming in
+ * after setting state to NOT_RUNNING but before updating the nmi_cr2.
  */
 enum nmi_states {
-	NMI_NOT_RUNNING,
+	NMI_NOT_RUNNING = 0,
 	NMI_EXECUTING,
 	NMI_LATCHED,
 };
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+static DEFINE_PER_CPU(unsigned long, nmi_cr2);
 
 #define nmi_nesting_preprocess(regs)					\
 	do {								\
-		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
-			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
+		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\
+			this_cpu_write(nmi_state, NMI_LATCHED);		\
 			return;						\
 		}							\
-	nmi_restart:							\
-		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
-	} while (0)
+		this_cpu_write(nmi_state, NMI_EXECUTING);		\
+		this_cpu_write(nmi_cr2, read_cr2());			\
+	} while (0);							\
+	nmi_restart:
 
 #define nmi_nesting_postprocess()					\
 	do {								\
-		if (cmpxchg(&__get_cpu_var(nmi_state),			\
-		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
+		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\
+			write_cr2(this_cpu_read(nmi_cr2));		\
+		if (this_cpu_dec_return(nmi_state))			\
 			goto nmi_restart;				\
 	} while (0)
 #else /* x86_64 */
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 149b8d9c6ad4..6d9582ec0324 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
 static void __init init_nmi_testsuite(void)
 {
 	/* trap all the unknown NMIs we may generate */
-	register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
+			__initdata);
 }
 
 static void __init cleanup_nmi_testsuite(void)
@@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 {
 	unsigned long timeout;
 
-	if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
-				 NMI_FLAG_FIRST, "nmi_selftest")) {
+	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
 		nmi_fail = FAILURE;
 		return;
 	}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b72838bae64a..299d49302e7d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -22,6 +22,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt) "Calgary: " fmt
+
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
 		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
 					  npages, 0, boundary_size, 0);
 		if (offset == ~0UL) {
-			printk(KERN_WARNING "Calgary: IOMMU full.\n");
+			pr_warn("IOMMU full\n");
 			spin_unlock_irqrestore(&tbl->it_lock, flags);
 			if (panic_on_overflow)
 				panic("Calgary: fix the allocator.\n");
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
 	entry = iommu_range_alloc(dev, tbl, npages);
 
 	if (unlikely(entry == DMA_ERROR_CODE)) {
-		printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
-		       "iommu %p\n", npages, tbl);
+		pr_warn("failed to allocate %u pages in iommu %p\n",
+			npages, tbl);
 		return DMA_ERROR_CODE;
 	}
 
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)
 		i++;
 	} while ((val & 0xff) != 0xff && i < 100);
 	if (i == 100)
-		printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
-		       "continuing anyway\n");
+		pr_warn("PCI bus not quiesced, continuing anyway\n");
 
 	/* invalidate TCE cache */
 	target = calgary_reg(bbar, tar_offset(tbl->it_busno));
@@ -604,8 +605,7 @@ begin:
 		i++;
 	} while ((val64 & 0xff) != 0xff && i < 100);
 	if (i == 100)
-		printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, "
-		       "continuing anyway\n");
+		pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
 
 	/* 3. poll Page Migration DEBUG for SoftStopFault */
 	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
@@ -617,8 +617,7 @@ begin:
 		if (++count < 100)
 			goto begin;
 		else {
-			printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, "
-			       "aborting TCE cache flush sequence!\n");
+			pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
 			return; /* pray for the best */
 		}
 	}
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)
 	plssr = be32_to_cpu(readl(target));
 
 	/* If no error, the agent ID in the CSR is not valid */
-	printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, "
-	       "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr);
+	pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
+		 tbl->it_busno, csr, plssr);
 }
 
 static void calioc2_dump_error_regs(struct iommu_table *tbl)
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)
 	target = calgary_reg(bbar, phboff | 0x800);
 	mck = be32_to_cpu(readl(target));
 
-	printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n",
-	       tbl->it_busno);
+	pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
 
-	printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
-	       csr, plssr, csmr, mck);
+	pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
+		 csr, plssr, csmr, mck);
 
 	/* dump rest of error regs */
-	printk(KERN_EMERG "Calgary: ");
+	pr_emerg("");
 	for (i = 0; i < ARRAY_SIZE(errregs); i++) {
 		/* err regs are at 0x810 - 0x870 */
 		erroff = (0x810 + (i * 0x10));
 		target = calgary_reg(bbar, phboff | erroff);
 		errregs[i] = be32_to_cpu(readl(target));
-		printk("0x%08x@0x%lx ", errregs[i], erroff);
+		pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
 	}
-	printk("\n");
+	pr_cont("\n");
 
 	/* root complex status */
 	target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 735279e54e59..ef6a8456f719 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -145,16 +147,14 @@ void show_regs_common(void)
 	/* Board Name is optional */
 	board = dmi_get_system_info(DMI_BOARD_NAME);
 
-	printk(KERN_CONT "\n");
-	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s",
-		current->pid, current->comm, print_tainted(),
-		init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	printk(KERN_CONT " %s %s", vendor, product);
-	if (board)
-		printk(KERN_CONT "/%s", board);
-	printk(KERN_CONT "\n");
+	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n",
+	       current->pid, current->comm, print_tainted(),
+	       init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version,
+	       vendor, product,
+	       board ? "/" : "",
+	       board ? board : "");
 }
 
 void flush_thread(void)
@@ -645,7 +645,7 @@ static void amd_e400_idle(void)
 			amd_e400_c1e_detected = true;
 			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
-			printk(KERN_INFO "System has AMD C1E enabled\n");
+			pr_info("System has AMD C1E enabled\n");
 		}
 	}
 
@@ -659,8 +659,7 @@ static void amd_e400_idle(void)
 			 */
 			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
 					   &cpu);
-			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
-			       cpu);
+			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
 		}
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
+		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
 	}
 #endif
 	if (pm_idle)
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 		/*
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
-		printk(KERN_INFO "using mwait in idle threads.\n");
+		pr_info("using mwait in idle threads\n");
 		pm_idle = mwait_idle;
 	} else if (cpu_has_amd_erratum(amd_erratum_400)) {
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
-		printk(KERN_INFO "using AMD E400 aware idle routine\n");
+		pr_info("using AMD E400 aware idle routine\n");
 		pm_idle = amd_e400_idle;
 	} else
 		pm_idle = default_idle;
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str)
 		return -EINVAL;
 
 	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
+		pr_info("using polling idle threads\n");
 		pm_idle = poll_idle;
 		boot_option_idle_override = IDLE_POLL;
 	} else if (!strcmp(str, "mwait")) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3e215ba68766..0a980c9d7cb8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)
 {
 	if (dead_task->mm) {
 		if (dead_task->mm->context.size) {
-			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
-					dead_task->comm,
-					dead_task->mm->context.ldt,
-					dead_task->mm->context.size);
+			pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+				dead_task->comm,
+				dead_task->mm->context.ldt,
+				dead_task->mm->context.size);
 			BUG();
 		}
 	}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5de92f1abd76..52190a938b4a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
@@ -20,14 +22,12 @@
 #include <asm/virtext.h>
 #include <asm/cpu.h>
 #include <asm/nmi.h>
+#include <asm/smp.h>
 
-#ifdef CONFIG_X86_32
-# include <linux/ctype.h>
-# include <linux/mc146818rtc.h>
-# include <asm/realmode.h>
-#else
-# include <asm/x86_init.h>
-#endif
+#include <linux/ctype.h>
+#include <linux/mc146818rtc.h>
+#include <asm/realmode.h>
+#include <asm/x86_init.h>
 
 /*
  * Power off function, if any
@@ -49,7 +49,7 @@ int reboot_force;
  */
 static int reboot_default = 1;
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 static int reboot_cpu = -1;
 #endif
 
@@ -67,8 +67,8 @@ bool port_cf9_safe = false;
  * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
  * warm   Don't set the cold reboot flag
  * cold   Set the cold reboot flag
- * bios   Reboot by jumping through the BIOS (only for X86_32)
- * smp    Reboot by executing reset on BSP or other CPU (only for X86_32)
+ * bios   Reboot by jumping through the BIOS
+ * smp    Reboot by executing reset on BSP or other CPU
  * triple Force a triple fault (init)
  * kbd    Use the keyboard controller. cold reset (default)
  * acpi   Use the RESET_REG in the FADT
@@ -95,7 +95,6 @@ static int __init reboot_setup(char *str)
 			reboot_mode = 0;
 			break;
 
-#ifdef CONFIG_X86_32
 #ifdef CONFIG_SMP
 		case 's':
 			if (isdigit(*(str+1))) {
@@ -112,7 +111,6 @@ static int __init reboot_setup(char *str)
 #endif /* CONFIG_SMP */
 
 		case 'b':
-#endif
 		case 'a':
 		case 'k':
 		case 't':
@@ -138,7 +136,6 @@ static int __init reboot_setup(char *str)
 __setup("reboot=", reboot_setup);
 
 
-#ifdef CONFIG_X86_32
 /*
  * Reboot options and system auto-detection code provided by
  * Dell Inc. so their systems "just work". :-)
@@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_BIOS) {
 		reboot_type = BOOT_BIOS;
-		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			"BIOS", d->ident);
 	}
 	return 0;
 }
 
-void machine_real_restart(unsigned int type)
+void __noreturn machine_real_restart(unsigned int type)
 {
-	void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
-		real_mode_header->machine_real_restart_asm;
-
 	local_irq_disable();
 
 	/*
@@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type)
 	/*
 	 * Switch back to the initial page table.
 	 */
+#ifdef CONFIG_X86_32
 	load_cr3(initial_page_table);
-
-	/*
-	 * Write 0x1234 to absolute memory location 0x472.  The BIOS reads
-	 * this on booting to tell it to "Bypass memory test (also warm
-	 * boot)".  This seems like a fairly standard thing that gets set by
-	 * REBOOT.COM programs, and the previous reset routine did this
-	 * too. */
-	*((unsigned short *)0x472) = reboot_mode;
+#else
+	write_cr3(real_mode_header->trampoline_pgd);
+#endif
 
 	/* Jump to the identity-mapped low memory code */
-	restart_lowmem(type);
+#ifdef CONFIG_X86_32
+	asm volatile("jmpl *%0" : :
+		     "rm" (real_mode_header->machine_real_restart_asm),
+		     "a" (type));
+#else
+	asm volatile("ljmpl *%0" : :
+		     "m" (real_mode_header->machine_real_restart_asm),
+		     "D" (type));
+#endif
+	unreachable();
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
 
-#endif /* CONFIG_X86_32 */
-
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
  */
@@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_CF9) {
 		reboot_type = BOOT_CF9;
-		printk(KERN_INFO "%s series board detected. "
-		       "Selecting PCI-method for reboots.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			"PCI", d->ident);
 	}
 	return 0;
 }
@@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_KBD) {
 		reboot_type = BOOT_KBD;
-		printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboot.\n",
+			"KBD", d->ident);
 	}
 	return 0;
 }
 
 /*
- * This is a single dmi_table handling all reboot quirks.  Note that
- * REBOOT_BIOS is only available for 32bit
+ * This is a single dmi_table handling all reboot quirks.
  */
 static struct dmi_system_id __initdata reboot_dmi_table[] = {
-#ifdef CONFIG_X86_32
 	{	/* Handle problems with rebooting on Dell E520's */
 		.callback = set_bios_reboot,
 		.ident = "Dell E520",
@@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
-#endif /* CONFIG_X86_32 */
 
 	{	/* Handle reboot issue on Acer Aspire one */
 		.callback = set_kbd_reboot,
@@ -584,13 +580,11 @@ static void native_machine_emergency_restart(void)
 			reboot_type = BOOT_KBD;
 			break;
 
-#ifdef CONFIG_X86_32
 		case BOOT_BIOS:
 			machine_real_restart(MRR_BIOS);
 
 			reboot_type = BOOT_KBD;
 			break;
-#endif
 
 		case BOOT_ACPI:
 			acpi_reboot();
@@ -632,12 +626,10 @@ void native_machine_shutdown(void)
 	/* The boot cpu is always logical cpu 0 */
 	int reboot_cpu_id = 0;
 
-#ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
 		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
-#endif
 
 	/* Make certain the cpu I'm about to reboot on is online */
 	if (!cpu_online(reboot_cpu_id))
@@ -678,7 +670,7 @@ static void __machine_emergency_restart(int emergency)
 
 static void native_machine_restart(char *__unused)
 {
-	printk("machine restart\n");
+	pr_notice("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16be6dc14db1..f4b9b80e1b95 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.timers.wallclock_init();
 
-	x86_platform.wallclock_init();
-
 	mcheck_init();
 
 	arch_init_ideal_nops();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 21af737053aa..b280908a376e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,6 +6,9 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 		       me->comm, me->pid, where, frame,
 		       regs->ip, regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
+		pr_cont("\n");
 	}
 
 	force_sig(SIGSEGV, me);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7bd8a0823654..c1a310fb8309 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,4 +1,4 @@
-/*
+ /*
  *	x86 SMP booting functions
  *
  *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
@@ -39,6 +39,8 @@
  *	Glauber Costa		:	i386 and x86_64 integration
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/module.h>
@@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)
 	 * boards)
 	 */
 
-	pr_debug("CALLIN, before setup_local_APIC().\n");
+	pr_debug("CALLIN, before setup_local_APIC()\n");
 	if (apic->smp_callin_clear_local_apic)
 		apic->smp_callin_clear_local_apic();
 	setup_local_APIC();
@@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	/*
-	 * We need to hold call_lock, so there is no inconsistency
-	 * between the time smp_call_function() determines number of
-	 * IPI recipients, and the time when the determination is made
-	 * for which cpus receive the IPI. Holding this
-	 * lock helps us to not include this cpu in a currently in progress
-	 * smp_call_function().
-	 *
 	 * We need to hold vector_lock so there the set of online cpus
 	 * does not change while we are assigning vectors to cpus.  Holding
 	 * this lock ensures we don't half assign or remove an irq from a cpu.
 	 */
-	ipi_call_lock();
 	lock_vector_lock();
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
-	ipi_call_unlock();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 	x86_platform.nmi_init();
 
@@ -432,17 +425,16 @@ static void impress_friends(void)
 	/*
 	 * Allow the user to impress friends.
 	 */
-	pr_debug("Before bogomips.\n");
+	pr_debug("Before bogomips\n");
 	for_each_possible_cpu(cpu)
 		if (cpumask_test_cpu(cpu, cpu_callout_mask))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
-	printk(KERN_INFO
-		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
 		num_online_cpus(),
 		bogosum/(500000/HZ),
 		(bogosum/(5000/HZ))%100);
 
-	pr_debug("Before bogocount - setting activated=1.\n");
+	pr_debug("Before bogocount - setting activated=1\n");
 }
 
 void __inquire_remote_apic(int apicid)
@@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid)
 	int timeout;
 	u32 status;
 
-	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
+	pr_info("Inquiring remote APIC 0x%x...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
+		pr_info("... APIC 0x%x %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
 		 */
 		status = safe_apic_wait_icr_idle();
 		if (status)
-			printk(KERN_CONT
-			       "a previous APIC delivery may have failed\n");
+			pr_cont("a previous APIC delivery may have failed\n");
 
 		apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
 
@@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid)
 		switch (status) {
 		case APIC_ICR_RR_VALID:
 			status = apic_read(APIC_RRR);
-			printk(KERN_CONT "%08x\n", status);
+			pr_cont("%08x\n", status);
 			break;
 		default:
-			printk(KERN_CONT "failed\n");
+			pr_cont("failed\n");
 		}
 	}
 }
@@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 			apic_write(APIC_ESR, 0);
 		accept_status = (apic_read(APIC_ESR) & 0xEF);
 	}
-	pr_debug("NMI sent.\n");
+	pr_debug("NMI sent\n");
 
 	if (send_status)
-		printk(KERN_ERR "APIC never delivered???\n");
+		pr_err("APIC never delivered???\n");
 	if (accept_status)
-		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+		pr_err("APIC delivery error (%lx)\n", accept_status);
 
 	return (send_status | accept_status);
 }
@@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		apic_read(APIC_ESR);
 	}
 
-	pr_debug("Asserting INIT.\n");
+	pr_debug("Asserting INIT\n");
 
 	/*
 	 * Turn INIT on target chip
@@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 
 	mdelay(10);
 
-	pr_debug("Deasserting INIT.\n");
+	pr_debug("Deasserting INIT\n");
 
 	/* Target chip */
 	/* Send IPI */
@@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	/*
 	 * Run STARTUP IPI loop.
 	 */
-	pr_debug("#startup loops: %d.\n", num_starts);
+	pr_debug("#startup loops: %d\n", num_starts);
 
 	for (j = 1; j <= num_starts; j++) {
-		pr_debug("Sending STARTUP #%d.\n", j);
+		pr_debug("Sending STARTUP #%d\n", j);
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
-		pr_debug("After apic_write.\n");
+		pr_debug("After apic_write\n");
 
 		/*
 		 * STARTUP IPI
@@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		 */
 		udelay(300);
 
-		pr_debug("Startup point 1.\n");
+		pr_debug("Startup point 1\n");
 
 		pr_debug("Waiting for send to finish...\n");
 		send_status = safe_apic_wait_icr_idle();
@@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		if (send_status || accept_status)
 			break;
 	}
-	pr_debug("After Startup.\n");
+	pr_debug("After Startup\n");
 
 	if (send_status)
-		printk(KERN_ERR "APIC never delivered???\n");
+		pr_err("APIC never delivered???\n");
 	if (accept_status)
-		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+		pr_err("APIC delivery error (%lx)\n", accept_status);
 
 	return (send_status | accept_status);
 }
@@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
 	if (system_state == SYSTEM_BOOTING) {
 		if (node != current_node) {
 			if (current_node > (-1))
-				pr_cont(" Ok.\n");
+				pr_cont(" OK\n");
 			current_node = node;
 			pr_info("Booting Node %3d, Processors ", node);
 		}
-		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
 		return;
 	} else
 		pr_info("Booting Node %d Processor %d APIC 0x%x\n",
@@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 		/*
 		 * allow APs to start initializing.
 		 */
-		pr_debug("Before Callout %d.\n", cpu);
+		pr_debug("Before Callout %d\n", cpu);
 		cpumask_set_cpu(cpu, cpu_callout_mask);
-		pr_debug("After Callout %d.\n", cpu);
+		pr_debug("After Callout %d\n", cpu);
 
 		/*
 		 * Wait 5s total for a response
@@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 				pr_err("CPU%d: Stuck ??\n", cpu);
 			else
 				/* trampoline code not run */
-				pr_err("CPU%d: Not responding.\n", cpu);
+				pr_err("CPU%d: Not responding\n", cpu);
 			if (apic->inquire_remote_apic)
 				apic->inquire_remote_apic(apicid);
 		}
@@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
 	    !physid_isset(apicid, phys_cpu_present_map) ||
 	    !apic->apic_id_valid(apicid)) {
-		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
+		pr_err("%s: bad cpu %d\n", __func__, cpu);
 		return -EINVAL;
 	}
 
@@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		unsigned int cpu;
 		unsigned nr;
 
-		printk(KERN_WARNING
-		       "More than 8 CPUs detected - skipping them.\n"
-		       "Use CONFIG_X86_BIGSMP.\n");
+		pr_warn("More than 8 CPUs detected - skipping them\n"
+			"Use CONFIG_X86_BIGSMP\n");
 
 		nr = 0;
 		for_each_present_cpu(cpu) {
@@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 #endif
 
 	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
-		printk(KERN_WARNING
-			"weird, boot CPU (#%d) not listed by the BIOS.\n",
+		pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
 			hard_smp_processor_id());
 
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
@@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 */
 	if (!smp_found_config && !acpi_lapic) {
 		preempt_enable();
-		printk(KERN_NOTICE "SMP motherboard not detected.\n");
+		pr_notice("SMP motherboard not detected\n");
 		disable_smp();
 		if (APIC_init_uniprocessor())
-			printk(KERN_NOTICE "Local APIC not detected."
-					   " Using dummy APIC emulation.\n");
+			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
 		return -1;
 	}
 
@@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 * CPU too, but we do it for the sake of robustness anyway.
 	 */
 	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
-		printk(KERN_NOTICE
-			"weird, boot CPU (#%d) not listed by the BIOS.\n",
-			boot_cpu_physical_apicid);
+		pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
+			  boot_cpu_physical_apicid);
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 	preempt_enable();
@@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		if (!disable_apic) {
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
 				boot_cpu_physical_apicid);
-			pr_err("... forcing use of dummy APIC emulation."
-				"(tell your hw vendor)\n");
+			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
 		}
 		smpboot_clear_io_apic();
 		disable_ioapic_support();
@@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 * If SMP should be disabled, then really disable it!
 	 */
 	if (!max_cpus) {
-		printk(KERN_INFO "SMP mode deactivated.\n");
+		pr_info("SMP mode deactivated\n");
 		smpboot_clear_io_apic();
 
 		connect_bsp_APIC();
@@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 
 	if (smp_sanity_check(max_cpus) < 0) {
-		printk(KERN_INFO "SMP disabled\n");
+		pr_info("SMP disabled\n");
 		disable_smp();
 		goto out;
 	}
@@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	 * Set up local APIC timer on boot CPU.
 	 */
 
-	printk(KERN_INFO "CPU%d: ", 0);
+	pr_info("CPU%d: ", 0);
 	print_cpu_info(&cpu_data(0));
 	x86_init.timers.setup_percpu_clockev();
 
@@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
-	pr_debug("Boot done.\n");
+	pr_debug("Boot done\n");
 
 	nmi_selftest();
 	impress_friends();
@@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void)
 
 	/* nr_cpu_ids could be reduced via nr_cpus= */
 	if (possible > nr_cpu_ids) {
-		printk(KERN_WARNING
-			"%d Processors exceeds NR_CPUS limit of %d\n",
+		pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",
 			possible, nr_cpu_ids);
 		possible = nr_cpu_ids;
 	}
@@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void)
 	if (!setup_max_cpus)
 #endif
 	if (possible > i) {
-		printk(KERN_WARNING
-			"%d Processors exceeds max_cpus limit of %u\n",
+		pr_warn("%d Processors exceeds max_cpus limit of %u\n",
 			possible, setup_max_cpus);
 		possible = i;
 	}
 
-	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
 
 	for (i = 0; i < possible; i++)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 05b31d92f69c..b481341c9369 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -9,6 +9,9 @@
 /*
  * Handle hardware traps and faults.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/spinlock.h>
@@ -143,12 +146,11 @@ trap_signal:
 #ifdef CONFIG_X86_64
 	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 	    printk_ratelimit()) {
-		printk(KERN_INFO
-		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
-		       tsk->comm, tsk->pid, str,
-		       regs->ip, regs->sp, error_code);
+		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+			tsk->comm, tsk->pid, str,
+			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		pr_cont("\n");
 	}
 #endif
 
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 			printk_ratelimit()) {
-		printk(KERN_INFO
-			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
+		pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
 			tsk->comm, task_pid_nr(tsk),
 			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		pr_cont("\n");
 	}
 
 	force_sig(SIGSEGV, tsk);
@@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 #if 0
 	/* No need to warn about this any longer. */
-	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+	pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 #endif
 }
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fc0a147e3727..cfa5d4f7ca56 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/init.h>
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
-	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-			"cannot disable TSC completely.\n");
+	pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
 	tsc_disabled = 1;
 	return 1;
 }
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)
 			goto success;
 		}
 	}
-	printk("Fast TSC calibration failed\n");
+	pr_err("Fast TSC calibration failed\n");
 	return 0;
 
 success:
@@ -392,7 +393,7 @@ success:
 	 */
 	delta *= PIT_TICK_RATE;
 	do_div(delta, i*256*1000);
-	printk("Fast TSC calibration using PIT\n");
+	pr_info("Fast TSC calibration using PIT\n");
 	return delta;
 }
 
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)
 		 * use the reference value, as it is more precise.
 		 */
 		if (delta >= 90 && delta <= 110) {
-			printk(KERN_INFO
-			       "TSC: PIT calibration matches %s. %d loops\n",
-			       hpet ? "HPET" : "PMTIMER", i + 1);
+			pr_info("PIT calibration matches %s. %d loops\n",
+				hpet ? "HPET" : "PMTIMER", i + 1);
 			return tsc_ref_min;
 		}
 
@@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void)
 	 */
 	if (tsc_pit_min == ULONG_MAX) {
 		/* PIT gave no useful value */
-		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+		pr_warn("Unable to calibrate against PIT\n");
 
 		/* We don't have an alternative source, disable TSC */
 		if (!hpet && !ref1 && !ref2) {
-			printk("TSC: No reference (HPET/PMTIMER) available\n");
+			pr_notice("No reference (HPET/PMTIMER) available\n");
 			return 0;
 		}
 
 		/* The alternative source failed as well, disable TSC */
 		if (tsc_ref_min == ULONG_MAX) {
-			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
-			       "failed.\n");
+			pr_warn("HPET/PMTIMER calibration failed\n");
 			return 0;
 		}
 
 		/* Use the alternative source */
-		printk(KERN_INFO "TSC: using %s reference calibration\n",
-		       hpet ? "HPET" : "PMTIMER");
+		pr_info("using %s reference calibration\n",
+			hpet ? "HPET" : "PMTIMER");
 
 		return tsc_ref_min;
 	}
 
 	/* We don't have an alternative source, use the PIT calibration value */
 	if (!hpet && !ref1 && !ref2) {
-		printk(KERN_INFO "TSC: Using PIT calibration value\n");
+		pr_info("Using PIT calibration value\n");
 		return tsc_pit_min;
 	}
 
 	/* The alternative source failed, use the PIT calibration value */
 	if (tsc_ref_min == ULONG_MAX) {
-		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
-		       "Using PIT calibration\n");
+		pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
 		return tsc_pit_min;
 	}
 
@@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void)
 	 * the PIT value as we know that there are PMTIMERs around
 	 * running at double speed. At least we let the user know:
 	 */
-	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
-	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-	printk(KERN_INFO "TSC: Using PIT calibration value\n");
+	pr_warn("PIT calibration deviates from %s: %lu %lu\n",
+		hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+	pr_info("Using PIT calibration value\n");
 	return tsc_pit_min;
 }
 
@@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason)
 		tsc_unstable = 1;
 		sched_clock_stable = 0;
 		disable_sched_clock_irqtime();
-		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
+		pr_info("Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
 		if (clocksource_tsc.mult)
 			clocksource_mark_unstable(&clocksource_tsc);
@@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 		goto out;
 
 	tsc_khz = freq;
-	printk(KERN_INFO "Refined TSC clocksource calibration: "
-		"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
-					(unsigned long)tsc_khz % 1000);
+	pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
+		(unsigned long)tsc_khz / 1000,
+		(unsigned long)tsc_khz % 1000);
 
 out:
 	clocksource_register_khz(&clocksource_tsc, tsc_khz);
@@ -970,9 +968,9 @@ void __init tsc_init(void)
 		return;
 	}
 
-	printk("Detected %lu.%03lu MHz processor.\n",
-			(unsigned long)cpu_khz / 1000,
-			(unsigned long)cpu_khz % 1000);
+	pr_info("Detected %lu.%03lu MHz processor\n",
+		(unsigned long)cpu_khz / 1000,
+		(unsigned long)cpu_khz % 1000);
 
 	/*
 	 * Secondary CPUs do not run through tsc_init(), so set up
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 255f58ae71e8..54abcc0baf23 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -28,6 +28,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 	local_irq_enable();
 
 	if (!current->thread.vm86_info) {
-		printk("no vm86_info: BAD\n");
+		pr_alert("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
 	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
 	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
 	tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
-		printk("vm86: could not access userspace vm86_info\n");
+		pr_alert("could not access userspace vm86_info\n");
 		do_exit(SIGSEGV);
 	}
 
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 8eeb55a551b4..992f890283e9 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -16,6 +16,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci_regs.h>
 #include <linux/smp.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/pci-direct.h>
@@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void)
 	ctl = readl(address + 4);
 	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",
 	       cap, ctl);
+
+	/* If possible, let the vSMP foundation route the interrupt optimally */
+#ifdef CONFIG_SMP
+	if (cap & ctl & BIT(8)) {
+		ctl &= ~BIT(8);
+#ifdef CONFIG_PROC_FS
+		/* Don't let users change irq affinity via procfs */
+		no_irq_affinity = 1;
+#endif
+	}
+#endif
+
 	if (cap & ctl & (1 << 4)) {
 		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
 		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
@@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void)
 		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);
 		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);
 		pv_init_ops.patch = vsmp_patch;
-
 		ctl &= ~(1 << 4);
-		writel(ctl, address + 4);
-		ctl = readl(address + 4);
-		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
 	}
+	writel(ctl, address + 4);
+	ctl = readl(address + 4);
+	pr_info("vSMP CTL: control set to:0x%08x\n", ctl);
 
 	early_iounmap(address, 8);
 }
@@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void)
 #endif
 }
 
+static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return hard_smp_processor_id() >> index_msb;
+}
+
+/*
+ * In vSMP, all cpus should be capable of handling interrupts, regardless of
+ * the APIC used.
+ */
+static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					  const struct cpumask *mask)
+{
+	cpumask_setall(retmask);
+}
+
+static void vsmp_apic_post_init(void)
+{
+	/* need to update phys_pkg_id */
+	apic->phys_pkg_id = apicid_phys_pkg_id;
+	apic->vector_allocation_domain = fill_vector_allocation_domain;
+}
+
 void __init vsmp_init(void)
 {
 	detect_vsmp_box();
 	if (!is_vsmp_box())
 		return;
 
+	x86_platform.apic_post_init = vsmp_apic_post_init;
+
 	vsmp_cap_cpus();
 
 	set_vsmp_pv_ops();
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 5db36caf4289..8d141b309046 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,6 +18,8 @@
  *  use the vDSO.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
 			      const char *message)
 {
-	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
-	struct task_struct *tsk;
-
-	if (!show_unhandled_signals || !__ratelimit(&rs))
+	if (!show_unhandled_signals)
 		return;
 
-	tsk = current;
-
-	printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
-	       level, tsk->comm, task_pid_nr(tsk),
-	       message, regs->ip, regs->cs,
-	       regs->sp, regs->ax, regs->si, regs->di);
+	pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+			      level, current->comm, task_pid_nr(current),
+			      message, regs->ip, regs->cs,
+			      regs->sp, regs->ax, regs->si, regs->di);
 }
 
 static int addr_to_vsyscall_nr(unsigned long addr)
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 9796c2f3d074..6020f6f5927c 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(copy_user_generic_string);
 EXPORT_SYMBOL(copy_user_generic_unrolled);
+EXPORT_SYMBOL(copy_user_enhanced_fast_string);
 EXPORT_SYMBOL(__copy_user_nocache);
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 35c5e543f550..9f3167e891ef 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { }
 void __init x86_init_pgd_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
-void wallclock_init_noop(void) { }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= native_calibrate_tsc,
-	.wallclock_init			= wallclock_init_noop,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
 	.iommu_shutdown			= iommu_shutdown_noop,
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index bd18149b2b0f..3d3e20709119 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -3,6 +3,9 @@
  *
  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/bootmem.h>
 #include <linux/compat.h>
 #include <asm/i387.h>
@@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf)
 	BUG_ON(sig_xstate_size < xstate_size);
 
 	if ((unsigned long)buf % 64)
-		printk("save_i387_xstate: bad fpstate %p\n", buf);
+		pr_err("%s: bad fpstate %p\n", __func__, buf);
 
 	if (!used_math())
 		return 0;
@@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void)
 	pcntxt_mask = eax + ((u64)edx << 32);
 
 	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
-		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+		pr_err("FP/SSE not shown under xsave features 0x%llx\n",
 		       pcntxt_mask);
 		BUG();
 	}
@@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void)
 
 	setup_xstate_init();
 
-	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
-	       "cntxt size 0x%x\n",
-	       pcntxt_mask, xstate_size);
+	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
+		pcntxt_mask, xstate_size);
 }
 
 /*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bc4e9d84157f..e0e6990723e9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -385,7 +385,7 @@ void free_initmem(void)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
+void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
 	/*
 	 * end could be not aligned, and We can not align that,
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..71b5d5a07d7b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,7 +1,7 @@
 /*
  *	SGI UltraViolet TLB flush routines.
  *
- *	(c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
+ *	(c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
  *
  *	This code is released under the GNU General Public License version 2 or
  *	later.
@@ -38,8 +38,7 @@ static int timeout_base_ns[] = {
 
 static int timeout_us;
 static int nobau;
-static int baudisabled;
-static spinlock_t disable_lock;
+static int nobau_perm;
 static cycles_t congested_cycles;
 
 /* tunables: */
@@ -47,12 +46,13 @@ static int max_concurr		= MAX_BAU_CONCURRENT;
 static int max_concurr_const	= MAX_BAU_CONCURRENT;
 static int plugged_delay	= PLUGGED_DELAY;
 static int plugsb4reset		= PLUGSB4RESET;
+static int giveup_limit		= GIVEUP_LIMIT;
 static int timeoutsb4reset	= TIMEOUTSB4RESET;
 static int ipi_reset_limit	= IPI_RESET_LIMIT;
 static int complete_threshold	= COMPLETE_THRESHOLD;
 static int congested_respns_us	= CONGESTED_RESPONSE_US;
 static int congested_reps	= CONGESTED_REPS;
-static int congested_period	= CONGESTED_PERIOD;
+static int disabled_period	= DISABLED_PERIOD;
 
 static struct tunables tunables[] = {
 	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
@@ -63,7 +63,8 @@ static struct tunables tunables[] = {
 	{&complete_threshold, COMPLETE_THRESHOLD},
 	{&congested_respns_us, CONGESTED_RESPONSE_US},
 	{&congested_reps, CONGESTED_REPS},
-	{&congested_period, CONGESTED_PERIOD}
+	{&disabled_period, DISABLED_PERIOD},
+	{&giveup_limit, GIVEUP_LIMIT}
 };
 
 static struct dentry *tunables_dir;
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
 static DEFINE_PER_CPU(struct bau_control, bau_control);
 static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
 
+static void
+set_bau_on(void)
+{
+	int cpu;
+	struct bau_control *bcp;
+
+	if (nobau_perm) {
+		pr_info("BAU not initialized; cannot be turned on\n");
+		return;
+	}
+	nobau = 0;
+	for_each_present_cpu(cpu) {
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->nobau = 0;
+	}
+	pr_info("BAU turned on\n");
+	return;
+}
+
+static void
+set_bau_off(void)
+{
+	int cpu;
+	struct bau_control *bcp;
+
+	nobau = 1;
+	for_each_present_cpu(cpu) {
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->nobau = 1;
+	}
+	pr_info("BAU turned off\n");
+	return;
+}
+
 /*
  * Determine the first node on a uvhub. 'Nodes' are used for kernel
  * memory allocation.
@@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
 		 * Both sockets dump their completed count total into
 		 * the message's count.
 		 */
-		smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
+		*sp = 0;
 		asp = (struct atomic_short *)&msg->acknowledge_count;
 		msg_ack_count = atom_asr(socket_ack_count, asp);
 
@@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
 }
 
 /*
- * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
+ * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
+ * But not currently used.
  */
 static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
 {
 	unsigned long descriptor_status;
-	unsigned long descriptor_status2;
 
-	descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
-	descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
-	descriptor_status = (descriptor_status << 1) | descriptor_status2;
+	descriptor_status =
+		((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
 	return descriptor_status;
 }
 
@@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp)
  */
 int handle_uv2_busy(struct bau_control *bcp)
 {
-	int busy_one = bcp->using_desc;
-	int normal = bcp->uvhub_cpu;
-	int selected = -1;
-	int i;
-	unsigned long descriptor_status;
-	unsigned long status;
-	int mmr_offset;
-	struct bau_desc *bau_desc_old;
-	struct bau_desc *bau_desc_new;
-	struct bau_control *hmaster = bcp->uvhub_master;
 	struct ptc_stats *stat = bcp->statp;
-	cycles_t ttm;
 
 	stat->s_uv2_wars++;
-	spin_lock(&hmaster->uvhub_lock);
-	/* try for the original first */
-	if (busy_one != normal) {
-		if (!normal_busy(bcp))
-			selected = normal;
-	}
-	if (selected < 0) {
-		/* can't use the normal, select an alternate */
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-		descriptor_status = read_lmmr(mmr_offset);
-
-		/* scan available descriptors 32-63 */
-		for (i = 0; i < UV_CPUS_PER_AS; i++) {
-			if ((hmaster->inuse_map & (1 << i)) == 0) {
-				status = ((descriptor_status >>
-						(i * UV_ACT_STATUS_SIZE)) &
-						UV_ACT_STATUS_MASK) << 1;
-				if (status != UV2H_DESC_BUSY) {
-					selected = i + UV_CPUS_PER_AS;
-					break;
-				}
-			}
-		}
-	}
-
-	if (busy_one != normal)
-		/* mark the busy alternate as not in-use */
-		hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
-
-	if (selected >= 0) {
-		/* switch to the selected descriptor */
-		if (selected != normal) {
-			/* set the selected alternate as in-use */
-			hmaster->inuse_map |=
-					(1 << (selected - UV_CPUS_PER_AS));
-			if (selected > stat->s_uv2_wars_hw)
-				stat->s_uv2_wars_hw = selected;
-		}
-		bau_desc_old = bcp->descriptor_base;
-		bau_desc_old += (ITEMS_PER_DESC * busy_one);
-		bcp->using_desc = selected;
-		bau_desc_new = bcp->descriptor_base;
-		bau_desc_new += (ITEMS_PER_DESC * selected);
-		*bau_desc_new = *bau_desc_old;
-	} else {
-		/*
-		 * All are busy. Wait for the normal one for this cpu to
-		 * free up.
-		 */
-		stat->s_uv2_war_waits++;
-		spin_unlock(&hmaster->uvhub_lock);
-		ttm = get_cycles();
-		do {
-			cpu_relax();
-		} while (normal_busy(bcp));
-		spin_lock(&hmaster->uvhub_lock);
-		/* switch to the original descriptor */
-		bcp->using_desc = normal;
-		bau_desc_old = bcp->descriptor_base;
-		bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
-		bcp->using_desc = (ITEMS_PER_DESC * normal);
-		bau_desc_new = bcp->descriptor_base;
-		bau_desc_new += (ITEMS_PER_DESC * normal);
-		*bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
-	}
-	spin_unlock(&hmaster->uvhub_lock);
-	return FLUSH_RETRY_BUSYBUG;
+	bcp->busy = 1;
+	return FLUSH_GIVEUP;
 }
 
 static int uv2_wait_completion(struct bau_desc *bau_desc,
@@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 {
 	unsigned long descriptor_stat;
 	cycles_t ttm;
-	int desc = bcp->using_desc;
+	int desc = bcp->uvhub_cpu;
 	long busy_reps = 0;
 	struct ptc_stats *stat = bcp->statp;
 
@@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 
 	/* spin on the status MMR, waiting for it to go idle */
 	while (descriptor_stat != UV2H_DESC_IDLE) {
-		/*
-		 * Our software ack messages may be blocked because
-		 * there are no swack resources available.  As long
-		 * as none of them has timed out hardware will NACK
-		 * our message and its state will stay IDLE.
-		 */
-		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
-		    (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
+		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
+			/*
+			 * A h/w bug on the destination side may
+			 * have prevented the message being marked
+			 * pending, thus it doesn't get replied to
+			 * and gets continually nacked until it times
+			 * out with a SOURCE_TIMEOUT.
+			 */
 			stat->s_stimeout++;
 			return FLUSH_GIVEUP;
-		} else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
-			stat->s_strongnacks++;
-			bcp->conseccompletes = 0;
-			return FLUSH_GIVEUP;
 		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
+			ttm = get_cycles();
+
+			/*
+			 * Our retries may be blocked by all destination
+			 * swack resources being consumed, and a timeout
+			 * pending.  In that case hardware returns the
+			 * ERROR that looks like a destination timeout.
+			 * Without using the extended status we have to
+			 * deduce from the short time that this was a
+			 * strong nack.
+			 */
+			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
+				bcp->conseccompletes = 0;
+				stat->s_plugged++;
+				/* FLUSH_RETRY_PLUGGED causes hang on boot */
+				return FLUSH_GIVEUP;
+			}
 			stat->s_dtimeout++;
 			bcp->conseccompletes = 0;
-			return FLUSH_RETRY_TIMEOUT;
+			/* FLUSH_RETRY_TIMEOUT causes hang on boot */
+			return FLUSH_GIVEUP;
 		} else {
 			busy_reps++;
 			if (busy_reps > 1000000) {
@@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 				busy_reps = 0;
 				ttm = get_cycles();
 				if ((ttm - bcp->send_message) >
-					(bcp->clocks_per_100_usec)) {
+						bcp->timeout_interval)
 					return handle_uv2_busy(bcp);
-				}
 			}
 			/*
 			 * descriptor_stat is still BUSY
@@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc,
 {
 	int right_shift;
 	unsigned long mmr_offset;
-	int desc = bcp->using_desc;
+	int desc = bcp->uvhub_cpu;
 
 	if (desc < UV_CPUS_PER_AS) {
 		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
@@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc,
 }
 
 /*
- * Completions are taking a very long time due to a congested numalink
- * network.
+ * Stop all cpus on a uvhub from using the BAU for a period of time.
+ * This is reversed by check_enable.
  */
-static void disable_for_congestion(struct bau_control *bcp,
-					struct ptc_stats *stat)
+static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
 {
-	/* let only one cpu do this disabling */
-	spin_lock(&disable_lock);
-
-	if (!baudisabled && bcp->period_requests &&
-	    ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
-		int tcpu;
-		struct bau_control *tbcp;
-		/* it becomes this cpu's job to turn on the use of the
-		   BAU again */
-		baudisabled = 1;
-		bcp->set_bau_off = 1;
-		bcp->set_bau_on_time = get_cycles();
-		bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
+	int tcpu;
+	struct bau_control *tbcp;
+	struct bau_control *hmaster;
+	cycles_t tm1;
+
+	hmaster = bcp->uvhub_master;
+	spin_lock(&hmaster->disable_lock);
+	if (!bcp->baudisabled) {
 		stat->s_bau_disabled++;
+		tm1 = get_cycles();
 		for_each_present_cpu(tcpu) {
 			tbcp = &per_cpu(bau_control, tcpu);
-			tbcp->baudisabled = 1;
+			if (tbcp->uvhub_master == hmaster) {
+				tbcp->baudisabled = 1;
+				tbcp->set_bau_on_time =
+					tm1 + bcp->disabled_period;
+			}
 		}
 	}
-
-	spin_unlock(&disable_lock);
+	spin_unlock(&hmaster->disable_lock);
 }
 
 static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
 			bcp->period_requests++;
 			bcp->period_time += elapsed;
 			if ((elapsed > congested_cycles) &&
-			    (bcp->period_requests > bcp->cong_reps))
-				disable_for_congestion(bcp, stat);
+			    (bcp->period_requests > bcp->cong_reps) &&
+			    ((bcp->period_time / bcp->period_requests) >
+							congested_cycles)) {
+				stat->s_congested++;
+				disable_for_period(bcp, stat);
+			}
 		}
 	} else
 		stat->s_requestor--;
 
 	if (completion_status == FLUSH_COMPLETE && try > 1)
 		stat->s_retriesok++;
-	else if (completion_status == FLUSH_GIVEUP)
+	else if (completion_status == FLUSH_GIVEUP) {
 		stat->s_giveup++;
+		if (get_cycles() > bcp->period_end)
+			bcp->period_giveups = 0;
+		bcp->period_giveups++;
+		if (bcp->period_giveups == 1)
+			bcp->period_end = get_cycles() + bcp->disabled_period;
+		if (bcp->period_giveups > bcp->giveup_limit) {
+			disable_for_period(bcp, stat);
+			stat->s_giveuplimit++;
+		}
+	}
 }
 
 /*
@@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
  * Returns 1 if it gives up entirely and the original cpu mask is to be
  * returned to the kernel.
  */
-int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
+int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
+	struct bau_desc *bau_desc)
 {
 	int seq_number = 0;
 	int completion_stat = 0;
@@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 	struct bau_control *hmaster = bcp->uvhub_master;
 	struct uv1_bau_msg_header *uv1_hdr = NULL;
 	struct uv2_bau_msg_header *uv2_hdr = NULL;
-	struct bau_desc *bau_desc;
 
-	if (bcp->uvhub_version == 1)
+	if (bcp->uvhub_version == 1) {
+		uv1 = 1;
 		uv1_throttle(hmaster, stat);
+	}
 
 	while (hmaster->uvhub_quiesce)
 		cpu_relax();
 
 	time1 = get_cycles();
+	if (uv1)
+		uv1_hdr = &bau_desc->header.uv1_hdr;
+	else
+		uv2_hdr = &bau_desc->header.uv2_hdr;
+
 	do {
-		bau_desc = bcp->descriptor_base;
-		bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
-		if (bcp->uvhub_version == 1) {
-			uv1 = 1;
-			uv1_hdr = &bau_desc->header.uv1_hdr;
-		} else
-			uv2_hdr = &bau_desc->header.uv2_hdr;
-		if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
+		if (try == 0) {
 			if (uv1)
 				uv1_hdr->msg_type = MSG_REGULAR;
 			else
@@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 			uv1_hdr->sequence = seq_number;
 		else
 			uv2_hdr->sequence = seq_number;
-		index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
+		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
 		bcp->send_message = get_cycles();
 
 		write_mmr_activation(index);
 
 		try++;
 		completion_stat = wait_completion(bau_desc, bcp, try);
-		/* UV2: wait_completion() may change the bcp->using_desc */
 
 		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
 
 		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
 			bcp->ipi_attempts = 0;
+			stat->s_overipilimit++;
 			completion_stat = FLUSH_GIVEUP;
 			break;
 		}
 		cpu_relax();
 	} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
-		 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
 		 (completion_stat == FLUSH_RETRY_TIMEOUT));
 
 	time2 = get_cycles();
@@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 }
 
 /*
- * The BAU is disabled. When the disabled time period has expired, the cpu
- * that disabled it must re-enable it.
- * Return 0 if it is re-enabled for all cpus.
+ * The BAU is disabled for this uvhub. When the disabled time period has
+ * expired re-enable it.
+ * Return 0 if it is re-enabled for all cpus on this uvhub.
  */
 static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
 {
 	int tcpu;
 	struct bau_control *tbcp;
+	struct bau_control *hmaster;
 
-	if (bcp->set_bau_off) {
-		if (get_cycles() >= bcp->set_bau_on_time) {
-			stat->s_bau_reenabled++;
-			baudisabled = 0;
-			for_each_present_cpu(tcpu) {
-				tbcp = &per_cpu(bau_control, tcpu);
+	hmaster = bcp->uvhub_master;
+	spin_lock(&hmaster->disable_lock);
+	if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
+		stat->s_bau_reenabled++;
+		for_each_present_cpu(tcpu) {
+			tbcp = &per_cpu(bau_control, tcpu);
+			if (tbcp->uvhub_master == hmaster) {
 				tbcp->baudisabled = 0;
 				tbcp->period_requests = 0;
 				tbcp->period_time = 0;
+				tbcp->period_giveups = 0;
 			}
-			return 0;
 		}
+		spin_unlock(&hmaster->disable_lock);
+		return 0;
 	}
+	spin_unlock(&hmaster->disable_lock);
 	return -1;
 }
 
@@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
-
-	/* kernel was booted 'nobau' */
-	if (nobau)
-		return cpumask;
+	unsigned long descriptor_status;
+	unsigned long status;
 
 	bcp = &per_cpu(bau_control, cpu);
 	stat = bcp->statp;
+	stat->s_enters++;
+
+	if (bcp->nobau)
+		return cpumask;
+
+	if (bcp->busy) {
+		descriptor_status =
+			read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
+		status = ((descriptor_status >> (bcp->uvhub_cpu *
+			UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
+		if (status == UV2H_DESC_BUSY)
+			return cpumask;
+		bcp->busy = 0;
+	}
 
 	/* bau was disabled due to slow response */
 	if (bcp->baudisabled) {
-		if (check_enable(bcp, stat))
+		if (check_enable(bcp, stat)) {
+			stat->s_ipifordisabled++;
 			return cpumask;
+		}
 	}
 
 	/*
@@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
-	bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
+	bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
 		return NULL;
@@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
 	 * or 1 if it gave up and the original cpumask should be returned.
 	 */
-	if (!uv_flush_send_and_wait(flush_mask, bcp))
+	if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
 		return NULL;
 	else
 		return cpumask;
 }
 
 /*
- * Search the message queue for any 'other' message with the same software
- * acknowledge resource bit vector.
+ * Search the message queue for any 'other' unprocessed message with the
+ * same software acknowledge resource bit vector as the 'msg' message.
  */
 struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
-			struct bau_control *bcp, unsigned char swack_vec)
+					   struct bau_control *bcp)
 {
 	struct bau_pq_entry *msg_next = msg + 1;
+	unsigned char swack_vec = msg->swack_vec;
 
 	if (msg_next > bcp->queue_last)
 		msg_next = bcp->queue_first;
-	while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
-		if (msg_next->swack_vec == swack_vec)
+	while (msg_next != msg) {
+		if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
+				(msg_next->swack_vec == swack_vec))
 			return msg_next;
 		msg_next++;
 		if (msg_next > bcp->queue_last)
@@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
 		 * This message was assigned a swack resource, but no
 		 * reserved acknowlegment is pending.
 		 * The bug has prevented this message from setting the MMR.
-		 * And no other message has used the same sw_ack resource.
-		 * Do the requested shootdown but do not reply to the msg.
-		 * (the 0 means make no acknowledge)
 		 */
-		bau_process_message(mdp, bcp, 0);
-		return;
-	}
-
-	/*
-	 * Some message has set the MMR 'pending' bit; it might have been
-	 * another message.  Look for that message.
-	 */
-	other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
-	if (other_msg) {
-		/* There is another.  Do not ack the current one. */
-		bau_process_message(mdp, bcp, 0);
 		/*
-		 * Let the natural processing of that message acknowledge
-		 * it. Don't get the processing of sw_ack's out of order.
+		 * Some message has set the MMR 'pending' bit; it might have
+		 * been another message.  Look for that message.
 		 */
-		return;
+		other_msg = find_another_by_swack(msg, bcp);
+		if (other_msg) {
+			/*
+			 * There is another. Process this one but do not
+			 * ack it.
+			 */
+			bau_process_message(mdp, bcp, 0);
+			/*
+			 * Let the natural processing of that other message
+			 * acknowledge it. Don't get the processing of sw_ack's
+			 * out of order.
+			 */
+			return;
+		}
 	}
 
 	/*
-	 * There is no other message using this sw_ack, so it is safe to
-	 * acknowledge it.
+	 * Either the MMR shows this one pending a reply or there is no
+	 * other message using this sw_ack, so it is safe to acknowledge it.
 	 */
 	bau_process_message(mdp, bcp, 1);
 
@@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void)
 		 */
 		mmr_image |= (1L << SOFTACK_MSHIFT);
 		if (is_uv2_hub()) {
-			mmr_image |= (1L << UV2_EXT_SHFT);
+			/* hw bug workaround; do not use extended status */
+			mmr_image &= ~(1L << UV2_EXT_SHFT);
 		}
 		write_mmr_misc_control(pnode, mmr_image);
 	}
@@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec)
 static int ptc_seq_show(struct seq_file *file, void *data)
 {
 	struct ptc_stats *stat;
+	struct bau_control *bcp;
 	int cpu;
 
 	cpu = *(loff_t *)data;
 	if (!cpu) {
 		seq_printf(file,
-			"# cpu sent stime self locals remotes ncpus localhub ");
+		 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
 		seq_printf(file,
 			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
 		seq_printf(file,
-		    "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
+			"numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
+		seq_printf(file,
+			"rok resetp resett giveup sto bz throt disable ");
 		seq_printf(file,
-			"resetp resett giveup sto bz throt swack recv rtime ");
+			"enable wars warshw warwaits enters ipidis plugged ");
 		seq_printf(file,
-			"all one mult none retry canc nocan reset rcan ");
+			"ipiover glim cong swack recv rtime all one mult ");
 		seq_printf(file,
-			"disable enable wars warshw warwaits\n");
+			"none retry canc nocan reset rcan\n");
 	}
 	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
-		stat = &per_cpu(ptcstats, cpu);
+		bcp = &per_cpu(bau_control, cpu);
+		stat = bcp->statp;
 		/* source side statistics */
 		seq_printf(file,
-			"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-			   cpu, stat->s_requestor, cycles_2_us(stat->s_time),
+			"cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			   cpu, bcp->nobau, stat->s_requestor,
+			   cycles_2_us(stat->s_time),
 			   stat->s_ntargself, stat->s_ntarglocals,
 			   stat->s_ntargremotes, stat->s_ntargcpu,
 			   stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
@@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data)
 			   stat->s_resets_plug, stat->s_resets_timeout,
 			   stat->s_giveup, stat->s_stimeout,
 			   stat->s_busy, stat->s_throttles);
+		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			   stat->s_bau_disabled, stat->s_bau_reenabled,
+			   stat->s_uv2_wars, stat->s_uv2_wars_hw,
+			   stat->s_uv2_war_waits, stat->s_enters,
+			   stat->s_ipifordisabled, stat->s_plugged,
+			   stat->s_overipilimit, stat->s_giveuplimit,
+			   stat->s_congested);
 
 		/* destination side statistics */
 		seq_printf(file,
-			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
 			   read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
 			   stat->d_requestee, cycles_2_us(stat->d_time),
 			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
 			   stat->d_nomsg, stat->d_retries, stat->d_canceled,
 			   stat->d_nocanceled, stat->d_resets,
 			   stat->d_rcanceled);
-		seq_printf(file, "%ld %ld %ld %ld %ld\n",
-			stat->s_bau_disabled, stat->s_bau_reenabled,
-			stat->s_uv2_wars, stat->s_uv2_wars_hw,
-			stat->s_uv2_war_waits);
 	}
 	return 0;
 }
@@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,
 	char *buf;
 	int ret;
 
-	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
-		"max_concur plugged_delay plugsb4reset",
-		"timeoutsb4reset ipi_reset_limit complete_threshold",
-		"congested_response_us congested_reps congested_period",
+	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
+		"max_concur plugged_delay plugsb4reset timeoutsb4reset",
+		"ipi_reset_limit complete_threshold congested_response_us",
+		"congested_reps disabled_period giveup_limit",
 		max_concurr, plugged_delay, plugsb4reset,
 		timeoutsb4reset, ipi_reset_limit, complete_threshold,
-		congested_respns_us, congested_reps, congested_period);
+		congested_respns_us, congested_reps, disabled_period,
+		giveup_limit);
 
 	if (!buf)
 		return -ENOMEM;
@@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
 		return -EFAULT;
 	optstr[count - 1] = '\0';
 
+	if (!strcmp(optstr, "on")) {
+		set_bau_on();
+		return count;
+	} else if (!strcmp(optstr, "off")) {
+		set_bau_off();
+		return count;
+	}
+
 	if (strict_strtol(optstr, 10, &input_arg) < 0) {
 		printk(KERN_DEBUG "%s is invalid\n", optstr);
 		return -EINVAL;
@@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
 		bcp->complete_threshold =	complete_threshold;
 		bcp->cong_response_us =		congested_respns_us;
 		bcp->cong_reps =		congested_reps;
-		bcp->cong_period =		congested_period;
+		bcp->disabled_period =		sec_2_cycles(disabled_period);
+		bcp->giveup_limit =		giveup_limit;
 	}
 	return count;
 }
@@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
 			 *   fairness chaining multilevel count replied_to
 			 */
 		} else {
+			/*
+			 * BIOS uses legacy mode, but UV2 hardware always
+			 * uses native mode for selective broadcasts.
+			 */
 			uv2_hdr = &bd2->header.uv2_hdr;
 			uv2_hdr->swack_flag =	1;
 			uv2_hdr->base_dest_nasid =
@@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void)
 		index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
 		mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
 		mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
-		base = timeout_base_ns[index];
-		ts_ns = base * mult1 * mult2;
+		ts_ns = timeout_base_ns[index];
+		ts_ns *= (mult1 * mult2);
 		ret = ts_ns / 1000;
 	} else {
 		/* 4 bits  0/1 for 10/80us base, 3 bits of multiplier */
@@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void)
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
 		bcp->baudisabled		= 0;
+		if (nobau)
+			bcp->nobau		= 1;
 		bcp->statp			= &per_cpu(ptcstats, cpu);
 		/* time interval to catch a hardware stay-busy bug */
 		bcp->timeout_interval		= usec_2_cycles(2*timeout_us);
@@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void)
 		bcp->complete_threshold		= complete_threshold;
 		bcp->cong_response_us		= congested_respns_us;
 		bcp->cong_reps			= congested_reps;
-		bcp->cong_period		= congested_period;
-		bcp->clocks_per_100_usec =	usec_2_cycles(100);
+		bcp->disabled_period =		sec_2_cycles(disabled_period);
+		bcp->giveup_limit =		giveup_limit;
 		spin_lock_init(&bcp->queue_lock);
 		spin_lock_init(&bcp->uvhub_lock);
+		spin_lock_init(&bcp->disable_lock);
 	}
 }
 
@@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
 		}
 		bcp->uvhub_master = *hmasterp;
 		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
-		bcp->using_desc = bcp->uvhub_cpu;
 		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
 			printk(KERN_EMERG "%d cpus per uvhub invalid\n",
 				bcp->uvhub_cpu);
@@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void)
 	if (!is_uv_system())
 		return 0;
 
-	if (nobau)
-		return 0;
-
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 	}
 
 	nuvhubs = uv_num_possible_blades();
-	spin_lock_init(&disable_lock);
 	congested_cycles = usec_2_cycles(congested_respns_us);
 
 	uv_base_pnode = 0x7fffffff;
@@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void)
 	enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		nobau = 1;
+		set_bau_off();
+		nobau_perm = 1;
 		return 0;
 	}
 
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index f25c2765a5c9..acf7752da952 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
+	unsigned int dest;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
 			sizeof(unsigned long));
@@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	if (err != 0)
 		return err;
 
+	err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
+	if (err != 0)
+		return err;
+
 	if (limit == UV_AFFINITY_CPU)
 		irq_set_status_flags(irq, IRQ_NO_BALANCING);
 	else
@@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	entry->polarity		= 0;
 	entry->trigger		= 0;
 	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->dest		= dest;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -222,7 +227,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 /*
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 5b84a2d30888..b2d534cab25f 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -22,7 +22,7 @@ wakeup-objs	+= video-bios.o
 realmode-y			+= header.o
 realmode-y			+= trampoline_$(BITS).o
 realmode-y			+= stack.o
-realmode-$(CONFIG_X86_32)	+= reboot_32.o
+realmode-y			+= reboot.o
 realmode-$(CONFIG_ACPI_SLEEP)	+= $(wakeup-objs)
 
 targets	+= $(realmode-y)
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index fadf48378ada..a28221d94e69 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/page_types.h>
+#include <asm/segment.h>
 
 #include "realmode.h"
 	
@@ -28,8 +29,9 @@ GLOBAL(real_mode_header)
 	.long	pa_wakeup_header
 #endif
 	/* APM/BIOS reboot */
-#ifdef CONFIG_X86_32
 	.long	pa_machine_real_restart_asm
+#ifdef CONFIG_X86_64
+	.long	__KERNEL32_CS
 #endif
 END(real_mode_header)
 
diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot.S
index 114044876b3d..f932ea61d1c8 100644
--- a/arch/x86/realmode/rm/reboot_32.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -2,6 +2,8 @@
 #include <linux/init.h>
 #include <asm/segment.h>
 #include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
 #include "realmode.h"
 
 /*
@@ -12,13 +14,35 @@
  * doesn't work with at least one type of 486 motherboard.  It is easy
  * to stop this code working; hence the copious comments.
  *
- * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax.
+ * This code is called with the restart type (0 = BIOS, 1 = APM) in
+ * the primary argument register (%eax for 32 bit, %edi for 64 bit).
  */
 	.section ".text32", "ax"
 	.code32
-
-	.balign	16
 ENTRY(machine_real_restart_asm)
+
+#ifdef CONFIG_X86_64
+	/* Switch to trampoline GDT as it is guaranteed < 4 GiB */
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
+	lgdtl	pa_tr_gdt
+
+	/* Disable paging to drop us out of long mode */
+	movl	%cr0, %eax
+	andl	$~X86_CR0_PG, %eax
+	movl	%eax, %cr0
+	ljmpl	$__KERNEL32_CS, $pa_machine_real_restart_paging_off
+
+GLOBAL(machine_real_restart_paging_off)
+	xorl	%eax, %eax
+	xorl	%edx, %edx
+	movl	$MSR_EFER, %ecx
+	wrmsr
+
+	movl	%edi, %eax
+	
+#endif /* CONFIG_X86_64 */
+	
 	/* Set up the IDT for real mode. */
 	lidtl	pa_machine_real_restart_idt
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index afb250d22a6b..f58dca7a6e52 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	this_cpu_write(cpu_state, CPU_ONLINE);