summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/crypto/chacha20-ssse3-x86_64.S6
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/include/asm/device.h10
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/intel_punit_ipc.h101
-rw-r--r--arch/x86/include/asm/intel_telemetry.h147
-rw-r--r--arch/x86/include/asm/irq.h5
-rw-r--r--arch/x86/include/asm/pci_x86.h10
-rw-r--r--arch/x86/include/asm/pgtable_types.h6
-rw-r--r--arch/x86/include/asm/pmem.h11
-rw-r--r--arch/x86/include/asm/uaccess.h78
-rw-r--r--arch/x86/include/asm/uaccess_64.h94
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/apic/vector.c221
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c20
-rw-r--r--arch/x86/kernel/head64.c8
-rw-r--r--arch/x86/kernel/irq.c11
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/mm/hugetlbpage.c4
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/common.c38
-rw-r--r--arch/x86/pci/pcbios.c108
-rw-r--r--arch/x86/pci/vmd.c723
-rw-r--r--arch/x86/platform/efi/quirks.c17
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c8
-rw-r--r--arch/x86/platform/intel-quark/imr.c18
-rw-r--r--arch/x86/realmode/rm/Makefile1
36 files changed, 1463 insertions, 240 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4a10ba9e95da..9af2e6338400 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
@@ -99,7 +100,6 @@ config X86
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_API_DEBUG
- select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
@@ -509,11 +509,10 @@ config X86_INTEL_CE
config X86_INTEL_MID
bool "Intel MID platform support"
- depends on X86_32
depends on X86_EXTENDED_PLATFORM
depends on X86_PLATFORM_DEVICES
depends on PCI
- depends on PCI_GOANY
+ depends on X86_64 || (PCI_GOANY && X86_32)
depends on X86_IO_APIC
select SFI
select I2C
@@ -2699,6 +2698,19 @@ config PMC_ATOM
def_bool y
depends on PCI
+config VMD
+ depends on PCI_MSI
+ tristate "Volume Management Device Driver"
+ default N
+ ---help---
+ Adds support for the Intel Volume Management Device (VMD). VMD is a
+ secondary PCI host bridge that allows PCI Express root ports,
+ and devices attached to them, to be removed from the default
+ PCI domain and placed within the VMD domain. This provides
+ more bus resources than are otherwise possible with a
+ single domain. If you know your system provides one of these and
+ has devices attached to it, say Y; if you are not sure, say N.
+
source "net/Kconfig"
source "drivers/Kconfig"
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 2ee62dba0373..bbe1a62efc02 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -60,6 +60,7 @@ clean-files += cpustr.h
KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0a291cdfaf77..f9ce75d80101 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -33,6 +33,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE :=n
LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
index 712b13047b41..3a33124e9112 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S
@@ -157,7 +157,9 @@ ENTRY(chacha20_4block_xor_ssse3)
# done with the slightly better performing SSSE3 byte shuffling,
# 7/12-bit word rotation uses traditional shift+OR.
- sub $0x40,%rsp
+ mov %rsp,%r11
+ sub $0x80,%rsp
+ and $~63,%rsp
# x0..15[0-3] = s0..3[0..3]
movq 0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@ ENTRY(chacha20_4block_xor_ssse3)
pxor %xmm1,%xmm15
movdqu %xmm15,0xf0(%rsi)
- add $0x40,%rsp
+ mov %r11,%rsp
ret
ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 265c0ed68118..c854541d93ff 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -4,6 +4,7 @@
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
VDSO64-$(CONFIG_X86_64) := y
VDSOX32-$(CONFIG_X86_X32_ABI) := y
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 03dd72957d2f..684ed6c3aa67 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -10,6 +10,16 @@ struct dev_archdata {
#endif
};
+#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
+struct dma_domain {
+ struct list_head node;
+ struct dma_map_ops *dma_ops;
+ int domain_nr;
+};
+void add_dma_domain(struct dma_domain *domain);
+void del_dma_domain(struct dma_domain *domain);
+#endif
+
struct pdev_archdata {
};
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 953b7263f844..3a27b93e6261 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -46,8 +46,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define HAVE_ARCH_DMA_SUPPORTED 1
extern int dma_supported(struct device *hwdev, u64 mask);
-#include <asm-generic/dma-mapping-common.h>
-
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
struct dma_attrs *attrs);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1e3408e88604..1815b736269d 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -130,6 +130,11 @@ struct irq_alloc_info {
char *uv_name;
};
#endif
+#if IS_ENABLED(CONFIG_VMD)
+ struct {
+ struct msi_desc *desc;
+ };
+#endif
};
};
diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h
new file mode 100644
index 000000000000..201eb9dce595
--- /dev/null
+++ b/arch/x86/include/asm/intel_punit_ipc.h
@@ -0,0 +1,101 @@
+#ifndef _ASM_X86_INTEL_PUNIT_IPC_H_
+#define _ASM_X86_INTEL_PUNIT_IPC_H_
+
+/*
+ * Three types of 8bit P-Unit IPC commands are supported,
+ * bit[7:6]: [00]: BIOS; [01]: GTD; [10]: ISPD.
+ */
+typedef enum {
+ BIOS_IPC = 0,
+ GTDRIVER_IPC,
+ ISPDRIVER_IPC,
+ RESERVED_IPC,
+} IPC_TYPE;
+
+#define IPC_TYPE_OFFSET 6
+#define IPC_PUNIT_BIOS_CMD_BASE (BIOS_IPC << IPC_TYPE_OFFSET)
+#define IPC_PUNIT_GTD_CMD_BASE (GTDDRIVER_IPC << IPC_TYPE_OFFSET)
+#define IPC_PUNIT_ISPD_CMD_BASE (ISPDRIVER_IPC << IPC_TYPE_OFFSET)
+#define IPC_PUNIT_CMD_TYPE_MASK (RESERVED_IPC << IPC_TYPE_OFFSET)
+
+/* BIOS => Pcode commands */
+#define IPC_PUNIT_BIOS_ZERO (IPC_PUNIT_BIOS_CMD_BASE | 0x00)
+#define IPC_PUNIT_BIOS_VR_INTERFACE (IPC_PUNIT_BIOS_CMD_BASE | 0x01)
+#define IPC_PUNIT_BIOS_READ_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x02)
+#define IPC_PUNIT_BIOS_WRITE_PCS (IPC_PUNIT_BIOS_CMD_BASE | 0x03)
+#define IPC_PUNIT_BIOS_READ_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x04)
+#define IPC_PUNIT_BIOS_WRITE_PCU_CONFIG (IPC_PUNIT_BIOS_CMD_BASE | 0x05)
+#define IPC_PUNIT_BIOS_READ_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x06)
+#define IPC_PUNIT_BIOS_WRITE_PL1_SETTING (IPC_PUNIT_BIOS_CMD_BASE | 0x07)
+#define IPC_PUNIT_BIOS_TRIGGER_VDD_RAM (IPC_PUNIT_BIOS_CMD_BASE | 0x08)
+#define IPC_PUNIT_BIOS_READ_TELE_INFO (IPC_PUNIT_BIOS_CMD_BASE | 0x09)
+#define IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0a)
+#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0b)
+#define IPC_PUNIT_BIOS_READ_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0c)
+#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x0d)
+#define IPC_PUNIT_BIOS_READ_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0e)
+#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE (IPC_PUNIT_BIOS_CMD_BASE | 0x0f)
+#define IPC_PUNIT_BIOS_READ_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x10)
+#define IPC_PUNIT_BIOS_WRITE_TELE_EVENT (IPC_PUNIT_BIOS_CMD_BASE | 0x11)
+#define IPC_PUNIT_BIOS_READ_MODULE_TEMP (IPC_PUNIT_BIOS_CMD_BASE | 0x12)
+#define IPC_PUNIT_BIOS_RESERVED (IPC_PUNIT_BIOS_CMD_BASE | 0x13)
+#define IPC_PUNIT_BIOS_READ_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x14)
+#define IPC_PUNIT_BIOS_WRITE_VOLTAGE_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x15)
+#define IPC_PUNIT_BIOS_READ_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x16)
+#define IPC_PUNIT_BIOS_WRITE_RATIO_OVER (IPC_PUNIT_BIOS_CMD_BASE | 0x17)
+#define IPC_PUNIT_BIOS_READ_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x18)
+#define IPC_PUNIT_BIOS_WRITE_VF_GL_CTRL (IPC_PUNIT_BIOS_CMD_BASE | 0x19)
+#define IPC_PUNIT_BIOS_READ_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1a)
+#define IPC_PUNIT_BIOS_WRITE_FM_SOC_TEMP_THRESH (IPC_PUNIT_BIOS_CMD_BASE | 0x1b)
+
+/* GT Driver => Pcode commands */
+#define IPC_PUNIT_GTD_ZERO (IPC_PUNIT_GTD_CMD_BASE | 0x00)
+#define IPC_PUNIT_GTD_CONFIG (IPC_PUNIT_GTD_CMD_BASE | 0x01)
+#define IPC_PUNIT_GTD_READ_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x02)
+#define IPC_PUNIT_GTD_WRITE_ICCP_LIC_CDYN_SCAL (IPC_PUNIT_GTD_CMD_BASE | 0x03)
+#define IPC_PUNIT_GTD_GET_WM_VAL (IPC_PUNIT_GTD_CMD_BASE | 0x06)
+#define IPC_PUNIT_GTD_WRITE_CONFIG_WISHREQ (IPC_PUNIT_GTD_CMD_BASE | 0x07)
+#define IPC_PUNIT_GTD_READ_REQ_DUTY_CYCLE (IPC_PUNIT_GTD_CMD_BASE | 0x16)
+#define IPC_PUNIT_GTD_DIS_VOL_FREQ_CHG_REQUEST (IPC_PUNIT_GTD_CMD_BASE | 0x17)
+#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_CTRL (IPC_PUNIT_GTD_CMD_BASE | 0x1a)
+#define IPC_PUNIT_GTD_DYNA_DUTY_CYCLE_TUNING (IPC_PUNIT_GTD_CMD_BASE | 0x1c)
+
+/* ISP Driver => Pcode commands */
+#define IPC_PUNIT_ISPD_ZERO (IPC_PUNIT_ISPD_CMD_BASE | 0x00)
+#define IPC_PUNIT_ISPD_CONFIG (IPC_PUNIT_ISPD_CMD_BASE | 0x01)
+#define IPC_PUNIT_ISPD_GET_ISP_LTR_VAL (IPC_PUNIT_ISPD_CMD_BASE | 0x02)
+#define IPC_PUNIT_ISPD_ACCESS_IU_FREQ_BOUNDS (IPC_PUNIT_ISPD_CMD_BASE | 0x03)
+#define IPC_PUNIT_ISPD_READ_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x04)
+#define IPC_PUNIT_ISPD_WRITE_CDYN_LEVEL (IPC_PUNIT_ISPD_CMD_BASE | 0x05)
+
+/* Error codes */
+#define IPC_PUNIT_ERR_SUCCESS 0
+#define IPC_PUNIT_ERR_INVALID_CMD 1
+#define IPC_PUNIT_ERR_INVALID_PARAMETER 2
+#define IPC_PUNIT_ERR_CMD_TIMEOUT 3
+#define IPC_PUNIT_ERR_CMD_LOCKED 4
+#define IPC_PUNIT_ERR_INVALID_VR_ID 5
+#define IPC_PUNIT_ERR_VR_ERR 6
+
+#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC)
+
+int intel_punit_ipc_simple_command(int cmd, int para1, int para2);
+int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out);
+
+#else
+
+static inline int intel_punit_ipc_simple_command(int cmd,
+ int para1, int para2)
+{
+ return -ENODEV;
+}
+
+static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2,
+ u32 *in, u32 *out)
+{
+ return -ENODEV;
+}
+
+#endif /* CONFIG_INTEL_PUNIT_IPC */
+
+#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
new file mode 100644
index 000000000000..ed65fe701de5
--- /dev/null
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -0,0 +1,147 @@
+/*
+ * Intel SOC Telemetry Driver Header File
+ * Copyright (C) 2015, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#ifndef INTEL_TELEMETRY_H
+#define INTEL_TELEMETRY_H
+
+#define TELEM_MAX_EVENTS_SRAM 28
+#define TELEM_MAX_OS_ALLOCATED_EVENTS 20
+
+enum telemetry_unit {
+ TELEM_PSS = 0,
+ TELEM_IOSS,
+ TELEM_UNIT_NONE
+};
+
+struct telemetry_evtlog {
+ u32 telem_evtid;
+ u64 telem_evtlog;
+};
+
+struct telemetry_evtconfig {
+ /* Array of Event-IDs to Enable */
+ u32 *evtmap;
+
+ /* Number of Events (<29) in evtmap */
+ u8 num_evts;
+
+ /* Sampling period */
+ u8 period;
+};
+
+struct telemetry_evtmap {
+ const char *name;
+ u32 evt_id;
+};
+
+struct telemetry_unit_config {
+ struct telemetry_evtmap *telem_evts;
+ void __iomem *regmap;
+ u32 ssram_base_addr;
+ u8 ssram_evts_used;
+ u8 curr_period;
+ u8 max_period;
+ u8 min_period;
+ u32 ssram_size;
+
+};
+
+struct telemetry_plt_config {
+ struct telemetry_unit_config pss_config;
+ struct telemetry_unit_config ioss_config;
+ struct mutex telem_trace_lock;
+ struct mutex telem_lock;
+ bool telem_in_use;
+};
+
+struct telemetry_core_ops {
+ int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period,
+ u8 *ioss_min_period, u8 *ioss_max_period);
+
+ int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig,
+ struct telemetry_evtconfig *ioss_evtconfig,
+ int pss_len, int ioss_len);
+
+ int (*update_events)(struct telemetry_evtconfig pss_evtconfig,
+ struct telemetry_evtconfig ioss_evtconfig);
+
+ int (*set_sampling_period)(u8 pss_period, u8 ioss_period);
+
+ int (*get_trace_verbosity)(enum telemetry_unit telem_unit,
+ u32 *verbosity);
+
+ int (*set_trace_verbosity)(enum telemetry_unit telem_unit,
+ u32 verbosity);
+
+ int (*raw_read_eventlog)(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog,
+ int len, int log_all_evts);
+
+ int (*read_eventlog)(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog,
+ int len, int log_all_evts);
+
+ int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts,
+ u32 *pss_evtmap, u32 *ioss_evtmap);
+
+ int (*reset_events)(void);
+};
+
+int telemetry_set_pltdata(struct telemetry_core_ops *ops,
+ struct telemetry_plt_config *pltconfig);
+
+int telemetry_clear_pltdata(void);
+
+int telemetry_pltconfig_valid(void);
+
+int telemetry_get_evtname(enum telemetry_unit telem_unit,
+ const char **name, int len);
+
+int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig,
+ struct telemetry_evtconfig ioss_evtconfig);
+
+int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts,
+ u32 *pss_evtmap, u32 *ioss_evtmap);
+
+int telemetry_reset_events(void);
+
+int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config,
+ struct telemetry_evtconfig *ioss_config,
+ int pss_len, int ioss_len);
+
+int telemetry_read_events(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog, int len);
+
+int telemetry_raw_read_events(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog, int len);
+
+int telemetry_read_eventlog(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog, int len);
+
+int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit,
+ struct telemetry_evtlog *evtlog, int len);
+
+int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period,
+ u8 *ioss_min_period, u8 *ioss_max_period);
+
+int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period);
+
+int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit,
+ u32 verbosity);
+
+int telemetry_get_trace_verbosity(enum telemetry_unit telem_unit,
+ u32 *verbosity);
+
+#endif /* INTEL_TELEMETRY_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 881b4768644a..e7de5c9a4fbd 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
#define __ARCH_HAS_DO_SOFTIRQ
+struct irq_desc;
+
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
extern int check_irq_vectors_for_cpu_disable(void);
extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
#endif
#ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
-struct irq_desc;
extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..46873fbd44e1 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -151,11 +151,11 @@ extern struct list_head pci_mmcfg_list;
#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
/*
- * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
- * on their northbrige except through the * %eax register. As such, you MUST
- * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
- * accessor functions.
- * In fact just use pci_config_*, nothing else please.
+ * On AMD Fam10h CPUs, all PCI MMIO configuration space accesses must use
+ * %eax. No other source or target registers may be used. The following
+ * mmio_config_* accessors enforce this. See "BIOS and Kernel Developer's
+ * Guide (BKDG) For AMD Family 10h Processors", rev. 3.48, sec 2.11.1,
+ * "MMIO Configuration Coding Requirements".
*/
static inline unsigned char mmio_config_readb(void __iomem *pos)
{
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 04c27a013165..4432ab7f407c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -366,20 +366,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
}
static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
{
+ pgprotval_t val = pgprot_val(pgprot);
pgprot_t new;
- unsigned long val;
- val = pgprot_val(pgprot);
pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
return new;
}
static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
{
+ pgprotval_t val = pgprot_val(pgprot);
pgprot_t new;
- unsigned long val;
- val = pgprot_val(pgprot);
pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
((val & _PAGE_PAT_LARGE) >>
(_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1544fabcd7f9..c57fd1ea9689 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
}
/**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. This function requires explicit ordering with an
- * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
*/
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
+ void *vaddr = (void __force *)addr;
void *vend = vaddr + size;
void *p;
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
len = copy_from_iter_nocache(vaddr, bytes, i);
if (__iter_needs_pmem_wb(i))
- __arch_wb_cache_pmem(vaddr, bytes);
+ arch_wb_cache_pmem(addr, bytes);
return len;
}
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
void *vaddr = (void __force *)addr;
memset(vaddr, 0, size);
- __arch_wb_cache_pmem(vaddr, size);
+ arch_wb_cache_pmem(addr, size);
}
static inline bool __arch_has_wmb_pmem(void)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 660458af425d..a4a30e4b2d34 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -134,6 +134,9 @@ extern int __get_user_4(void);
extern int __get_user_8(void);
extern int __get_user_bad(void);
+#define __uaccess_begin() stac()
+#define __uaccess_end() clac()
+
/*
* This is a type: either unsigned long, if the argument fits into
* that type, or otherwise unsigned long long.
@@ -193,10 +196,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
#ifdef CONFIG_X86_32
#define __put_user_asm_u64(x, addr, err, errret) \
- asm volatile(ASM_STAC "\n" \
+ asm volatile("\n" \
"1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
- "3: " ASM_CLAC "\n" \
+ "3:" \
".section .fixup,\"ax\"\n" \
"4: movl %3,%0\n" \
" jmp 3b\n" \
@@ -207,10 +210,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
: "A" (x), "r" (addr), "i" (errret), "0" (err))
#define __put_user_asm_ex_u64(x, addr) \
- asm volatile(ASM_STAC "\n" \
+ asm volatile("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
- "3: " ASM_CLAC "\n" \
+ "3:" \
_ASM_EXTABLE_EX(1b, 2b) \
_ASM_EXTABLE_EX(2b, 3b) \
: : "A" (x), "r" (addr))
@@ -304,6 +307,10 @@ do { \
} \
} while (0)
+/*
+ * This doesn't do __uaccess_begin/end - the exception handling
+ * around it must do that.
+ */
#define __put_user_size_ex(x, ptr, size) \
do { \
__chk_user_ptr(ptr); \
@@ -358,9 +365,9 @@ do { \
} while (0)
#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
- asm volatile(ASM_STAC "\n" \
+ asm volatile("\n" \
"1: mov"itype" %2,%"rtype"1\n" \
- "2: " ASM_CLAC "\n" \
+ "2:\n" \
".section .fixup,\"ax\"\n" \
"3: mov %3,%0\n" \
" xor"itype" %"rtype"1,%"rtype"1\n" \
@@ -370,6 +377,10 @@ do { \
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
+/*
+ * This doesn't do __uaccess_begin/end - the exception handling
+ * around it must do that.
+ */
#define __get_user_size_ex(x, ptr, size) \
do { \
__chk_user_ptr(ptr); \
@@ -400,7 +411,9 @@ do { \
#define __put_user_nocheck(x, ptr, size) \
({ \
int __pu_err; \
+ __uaccess_begin(); \
__put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
+ __uaccess_end(); \
__builtin_expect(__pu_err, 0); \
})
@@ -408,7 +421,9 @@ do { \
({ \
int __gu_err; \
unsigned long __gu_val; \
+ __uaccess_begin(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__builtin_expect(__gu_err, 0); \
})
@@ -423,9 +438,9 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
- asm volatile(ASM_STAC "\n" \
+ asm volatile("\n" \
"1: mov"itype" %"rtype"1,%2\n" \
- "2: " ASM_CLAC "\n" \
+ "2:\n" \
".section .fixup,\"ax\"\n" \
"3: mov %3,%0\n" \
" jmp 2b\n" \
@@ -445,11 +460,11 @@ struct __large_struct { unsigned long buf[100]; };
*/
#define uaccess_try do { \
current_thread_info()->uaccess_err = 0; \
- stac(); \
+ __uaccess_begin(); \
barrier();
#define uaccess_catch(err) \
- clac(); \
+ __uaccess_end(); \
(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
} while (0)
@@ -547,12 +562,13 @@ extern void __cmpxchg_wrong_size(void)
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
+ __uaccess_begin(); \
switch (size) { \
case 1: \
{ \
- asm volatile("\t" ASM_STAC "\n" \
+ asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \
- "2:\t" ASM_CLAC "\n" \
+ "2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
@@ -566,9 +582,9 @@ extern void __cmpxchg_wrong_size(void)
} \
case 2: \
{ \
- asm volatile("\t" ASM_STAC "\n" \
+ asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \
- "2:\t" ASM_CLAC "\n" \
+ "2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
@@ -582,9 +598,9 @@ extern void __cmpxchg_wrong_size(void)
} \
case 4: \
{ \
- asm volatile("\t" ASM_STAC "\n" \
+ asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \
- "2:\t" ASM_CLAC "\n" \
+ "2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
@@ -601,9 +617,9 @@ extern void __cmpxchg_wrong_size(void)
if (!IS_ENABLED(CONFIG_X86_64)) \
__cmpxchg_wrong_size(); \
\
- asm volatile("\t" ASM_STAC "\n" \
+ asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \
- "2:\t" ASM_CLAC "\n" \
+ "2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
@@ -618,6 +634,7 @@ extern void __cmpxchg_wrong_size(void)
default: \
__cmpxchg_wrong_size(); \
} \
+ __uaccess_end(); \
*__uval = __old; \
__ret; \
})
@@ -754,5 +771,30 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
*/
#define __copy_from_user_nmi __copy_from_user_inatomic
+/*
+ * The "unsafe" user accesses aren't really "unsafe", but the naming
+ * is a big fat warning: you have to not only do the access_ok()
+ * checking before using them, but you have to surround them with the
+ * user_access_begin/end() pair.
+ */
+#define user_access_begin() __uaccess_begin()
+#define user_access_end() __uaccess_end()
+
+#define unsafe_put_user(x, ptr) \
+({ \
+ int __pu_err; \
+ __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
+ __builtin_expect(__pu_err, 0); \
+})
+
+#define unsafe_get_user(x, ptr) \
+({ \
+ int __gu_err; \
+ unsigned long __gu_val; \
+ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ __builtin_expect(__gu_err, 0); \
+})
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f2f9b39b274a..b89c34c4019b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -56,35 +56,49 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
- case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src,
+ case 1:
+ __uaccess_begin();
+ __get_user_asm(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
+ __uaccess_end();
return ret;
- case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src,
+ case 2:
+ __uaccess_begin();
+ __get_user_asm(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
+ __uaccess_end();
return ret;
- case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src,
+ case 4:
+ __uaccess_begin();
+ __get_user_asm(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
+ __uaccess_end();
return ret;
- case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ case 8:
+ __uaccess_begin();
+ __get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
+ __uaccess_end();
return ret;
case 10:
+ __uaccess_begin();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
- if (unlikely(ret))
- return ret;
- __get_user_asm(*(u16 *)(8 + (char *)dst),
- (u16 __user *)(8 + (char __user *)src),
- ret, "w", "w", "=r", 2);
+ if (likely(!ret))
+ __get_user_asm(*(u16 *)(8 + (char *)dst),
+ (u16 __user *)(8 + (char __user *)src),
+ ret, "w", "w", "=r", 2);
+ __uaccess_end();
return ret;
case 16:
+ __uaccess_begin();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
- if (unlikely(ret))
- return ret;
- __get_user_asm(*(u64 *)(8 + (char *)dst),
- (u64 __user *)(8 + (char __user *)src),
- ret, "q", "", "=r", 8);
+ if (likely(!ret))
+ __get_user_asm(*(u64 *)(8 + (char *)dst),
+ (u64 __user *)(8 + (char __user *)src),
+ ret, "q", "", "=r", 8);
+ __uaccess_end();
return ret;
default:
return copy_user_generic(dst, (__force void *)src, size);
@@ -106,35 +120,51 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
- case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst,
+ case 1:
+ __uaccess_begin();
+ __put_user_asm(*(u8 *)src, (u8 __user *)dst,
ret, "b", "b", "iq", 1);
+ __uaccess_end();
return ret;
- case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst,
+ case 2:
+ __uaccess_begin();
+ __put_user_asm(*(u16 *)src, (u16 __user *)dst,
ret, "w", "w", "ir", 2);
+ __uaccess_end();
return ret;
- case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst,
+ case 4:
+ __uaccess_begin();
+ __put_user_asm(*(u32 *)src, (u32 __user *)dst,
ret, "l", "k", "ir", 4);
+ __uaccess_end();
return ret;
- case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst,
+ case 8:
+ __uaccess_begin();
+ __put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 8);
+ __uaccess_end();
return ret;
case 10:
+ __uaccess_begin();
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 10);
- if (unlikely(ret))
- return ret;
- asm("":::"memory");
- __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
- ret, "w", "w", "ir", 2);
+ if (likely(!ret)) {
+ asm("":::"memory");
+ __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
+ ret, "w", "w", "ir", 2);
+ }
+ __uaccess_end();
return ret;
case 16:
+ __uaccess_begin();
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 16);
- if (unlikely(ret))
- return ret;
- asm("":::"memory");
- __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
- ret, "q", "", "er", 8);
+ if (likely(!ret)) {
+ asm("":::"memory");
+ __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
+ ret, "q", "", "er", 8);
+ }
+ __uaccess_end();
return ret;
default:
return copy_user_generic((__force void *)dst, src, size);
@@ -160,39 +190,47 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
switch (size) {
case 1: {
u8 tmp;
+ __uaccess_begin();
__get_user_asm(tmp, (u8 __user *)src,
ret, "b", "b", "=q", 1);
if (likely(!ret))
__put_user_asm(tmp, (u8 __user *)dst,
ret, "b", "b", "iq", 1);
+ __uaccess_end();
return ret;
}
case 2: {
u16 tmp;
+ __uaccess_begin();
__get_user_asm(tmp, (u16 __user *)src,
ret, "w", "w", "=r", 2);
if (likely(!ret))
__put_user_asm(tmp, (u16 __user *)dst,
ret, "w", "w", "ir", 2);
+ __uaccess_end();
return ret;
}
case 4: {
u32 tmp;
+ __uaccess_begin();
__get_user_asm(tmp, (u32 __user *)src,
ret, "l", "k", "=r", 4);
if (likely(!ret))
__put_user_asm(tmp, (u32 __user *)dst,
ret, "l", "k", "ir", 4);
+ __uaccess_end();
return ret;
}
case 8: {
u64 tmp;
+ __uaccess_begin();
__get_user_asm(tmp, (u64 __user *)src,
ret, "q", "", "=r", 8);
if (likely(!ret))
__put_user_asm(tmp, (u64 __user *)dst,
ret, "q", "", "er", 8);
+ __uaccess_end();
return ret;
}
default:
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f25321894ad2..fdb0fbfb1197 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
const struct cpumask *mask;
+ struct irq_desc *desc;
struct irq_data *idata;
struct irq_chip *chip;
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
continue;
- idata = irq_get_irq_data(irq);
+ desc = irq_to_desc(irq);
+ raw_spin_lock_irq(&desc->lock);
+ idata = irq_desc_get_irq_data(desc);
/*
* Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
/* Might be lapic_chip for irq 0 */
if (chip->irq_set_affinity)
chip->irq_set_affinity(idata, mask, false);
+ raw_spin_unlock_irq(&desc->lock);
}
}
#endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
struct irq_domain *x86_vector_domain;
EXPORT_SYMBOL_GPL(x86_vector_domain);
static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
static struct irq_chip lapic_controller;
#ifdef CONFIG_X86_IO_APIC
static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
*/
static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
static int current_offset = VECTOR_OFFSET_START % 16;
- int cpu, err;
+ int cpu, vector;
- if (d->move_in_progress)
+ /*
+ * If there is still a move in progress or the previous move has not
+ * been cleaned up completely, tell the caller to come back later.
+ */
+ if (d->move_in_progress ||
+ cpumask_intersects(d->old_domain, cpu_online_mask))
return -EBUSY;
/* Only try and allocate irqs on cpus that are present */
- err = -ENOSPC;
cpumask_clear(d->old_domain);
+ cpumask_clear(searched_cpumask);
cpu = cpumask_first_and(mask, cpu_online_mask);
while (cpu < nr_cpu_ids) {
- int new_cpu, vector, offset;
+ int new_cpu, offset;
+ /* Get the possible target cpus for @mask/@cpu from the apic */
apic->vector_allocation_domain(cpu, vector_cpumask, mask);
+ /*
+ * Clear the offline cpus from @vector_cpumask for searching
+ * and verify whether the result overlaps with @mask. If true,
+ * then the call to apic->cpu_mask_to_apicid_and() will
+ * succeed as well. If not, no point in trying to find a
+ * vector in this mask.
+ */
+ cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+ if (!cpumask_intersects(vector_searchmask, mask))
+ goto next_cpu;
+
if (cpumask_subset(vector_cpumask, d->domain)) {
- err = 0;
if (cpumask_equal(vector_cpumask, d->domain))
- break;
+ goto success;
/*
- * New cpumask using the vector is a proper subset of
- * the current in use mask. So cleanup the vector
- * allocation for the members that are not used anymore.
+ * Mark the cpus which are not longer in the mask for
+ * cleanup.
*/
- cpumask_andnot(d->old_domain, d->domain,
- vector_cpumask);
- d->move_in_progress =
- cpumask_intersects(d->old_domain, cpu_online_mask);
- cpumask_and(d->domain, d->domain, vector_cpumask);
- break;
+ cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+ vector = d->cfg.vector;
+ goto update;
}
vector = current_vector;
@@ -158,45 +170,60 @@ next:
vector = FIRST_EXTERNAL_VECTOR + offset;
}
- if (unlikely(current_vector == vector)) {
- cpumask_or(d->old_domain, d->old_domain,
- vector_cpumask);
- cpumask_andnot(vector_cpumask, mask, d->old_domain);
- cpu = cpumask_first_and(vector_cpumask,
- cpu_online_mask);
- continue;
- }
+ /* If the search wrapped around, try the next cpu */
+ if (unlikely(current_vector == vector))
+ goto next_cpu;
if (test_bit(vector, used_vectors))
goto next;
- for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+ for_each_cpu(new_cpu, vector_searchmask) {
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
goto next;
}
/* Found one! */
current_vector = vector;
current_offset = offset;
- if (d->cfg.vector) {
+ /* Schedule the old vector for cleanup on all cpus */
+ if (d->cfg.vector)
cpumask_copy(d->old_domain, d->domain);
- d->move_in_progress =
- cpumask_intersects(d->old_domain, cpu_online_mask);
- }
- for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+ for_each_cpu(new_cpu, vector_searchmask)
per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
- d->cfg.vector = vector;
- cpumask_copy(d->domain, vector_cpumask);
- err = 0;
- break;
- }
+ goto update;
- if (!err) {
- /* cache destination APIC IDs into cfg->dest_apicid */
- err = apic->cpu_mask_to_apicid_and(mask, d->domain,
- &d->cfg.dest_apicid);
+next_cpu:
+ /*
+ * We exclude the current @vector_cpumask from the requested
+ * @mask and try again with the next online cpu in the
+ * result. We cannot modify @mask, so we use @vector_cpumask
+ * as a temporary buffer here as it will be reassigned when
+ * calling apic->vector_allocation_domain() above.
+ */
+ cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+ cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+ cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+ continue;
}
+ return -ENOSPC;
- return err;
+update:
+ /*
+ * Exclude offline cpus from the cleanup mask and set the
+ * move_in_progress flag when the result is not empty.
+ */
+ cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+ d->move_in_progress = !cpumask_empty(d->old_domain);
+ d->cfg.vector = vector;
+ cpumask_copy(d->domain, vector_cpumask);
+success:
+ /*
+ * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+ * as we already established, that mask & d->domain & cpu_online_mask
+ * is not empty.
+ */
+ BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+ &d->cfg.dest_apicid));
+ return 0;
}
static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
static void clear_irq_vector(int irq, struct apic_chip_data *data)
{
struct irq_desc *desc;
- unsigned long flags;
int cpu, vector;
- raw_spin_lock_irqsave(&vector_lock, flags);
BUG_ON(!data->cfg.vector);
vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
data->cfg.vector = 0;
cpumask_clear(data->domain);
- if (likely(!data->move_in_progress)) {
- raw_spin_unlock_irqrestore(&vector_lock, flags);
+ /*
+ * If move is in progress or the old_domain mask is not empty,
+ * i.e. the cleanup IPI has not been processed yet, we need to remove
+ * the old references to desc from all cpus vector tables.
+ */
+ if (!data->move_in_progress && cpumask_empty(data->old_domain))
return;
- }
desc = irq_to_desc(irq);
for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
}
}
data->move_in_progress = 0;
- raw_spin_unlock_irqrestore(&vector_lock, flags);
}
void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
static void x86_vector_free_irqs(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
+ struct apic_chip_data *apic_data;
struct irq_data *irq_data;
+ unsigned long flags;
int i;
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
if (irq_data && irq_data->chip_data) {
+ raw_spin_lock_irqsave(&vector_lock, flags);
clear_irq_vector(virq + i, irq_data->chip_data);
- free_apic_chip_data(irq_data->chip_data);
+ apic_data = irq_data->chip_data;
+ irq_domain_reset_irq_data(irq_data);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ free_apic_chip_data(apic_data);
#ifdef CONFIG_X86_IO_APIC
if (virq + i < nr_legacy_irqs())
legacy_irq_data[virq + i] = NULL;
#endif
- irq_domain_reset_irq_data(irq_data);
}
}
}
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
return arch_early_ioapic_init();
}
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
return -EINVAL;
err = assign_irq_vector(irq, data, dest);
- if (err) {
- if (assign_irq_vector(irq, data,
- irq_data_get_affinity_mask(irq_data)))
- pr_err("Failed to recover vector for irq %d\n", irq);
- return err;
- }
-
- return IRQ_SET_MASK_OK;
+ return err ? err : IRQ_SET_MASK_OK;
}
static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
#ifdef CONFIG_SMP
static void __send_cleanup_vector(struct apic_chip_data *data)
{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
-
- for_each_cpu_and(i, data->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i),
- IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
+ raw_spin_lock(&vector_lock);
+ cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
data->move_in_progress = 0;
+ if (!cpumask_empty(data->old_domain))
+ apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+ raw_spin_unlock(&vector_lock);
}
void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
goto unlock;
/*
- * Check if the irq migration is in progress. If so, we
- * haven't received the cleanup request yet for this irq.
+ * Nothing to cleanup if irq migration is in progress
+ * or this cpu is not set in the cleanup mask.
*/
- if (data->move_in_progress)
+ if (data->move_in_progress ||
+ !cpumask_test_cpu(me, data->old_domain))
goto unlock;
+ /*
+ * We have two cases to handle here:
+ * 1) vector is unchanged but the target mask got reduced
+ * 2) vector and the target mask has changed
+ *
+ * #1 is obvious, but in #2 we have two vectors with the same
+ * irq descriptor: the old and the new vector. So we need to
+ * make sure that we only cleanup the old vector. The new
+ * vector has the current @vector number in the config and
+ * this cpu is part of the target mask. We better leave that
+ * one alone.
+ */
if (vector == data->cfg.vector &&
cpumask_test_cpu(me, data->domain))
goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
goto unlock;
}
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+ cpumask_clear_cpu(me, data->old_domain);
unlock:
raw_spin_unlock(&desc->lock);
}
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
}
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+ struct apic_chip_data *data = apic_chip_data(irqdata);
+ struct irq_cfg *cfg = data ? &data->cfg : NULL;
- if (cfg)
- __irq_complete_move(cfg, cfg->vector);
+ if (!cfg)
+ return;
+
+ __irq_complete_move(cfg, cfg->vector);
+
+ /*
+ * This is tricky. If the cleanup of @data->old_domain has not been
+ * done yet, then the following setaffinity call will fail with
+ * -EBUSY. This can leave the interrupt in a stale state.
+ *
+ * The cleanup cannot make progress because we hold @desc->lock. So in
+ * case @data->old_domain is not yet cleaned up, we need to drop the
+ * lock and acquire it again. @desc cannot go away, because the
+ * hotplug code holds the sparse irq lock.
+ */
+ raw_spin_lock(&vector_lock);
+ /* Clean out all offline cpus (including ourself) first. */
+ cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+ while (!cpumask_empty(data->old_domain)) {
+ raw_spin_unlock(&vector_lock);
+ raw_spin_unlock(&desc->lock);
+ cpu_relax();
+ raw_spin_lock(&desc->lock);
+ /*
+ * Reevaluate apic_chip_data. It might have been cleared after
+ * we dropped @desc->lock.
+ */
+ data = apic_chip_data(irqdata);
+ if (!data)
+ return;
+ raw_spin_lock(&vector_lock);
+ }
+ raw_spin_unlock(&vector_lock);
}
#endif
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d760c6bb37b5..624db00583f4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
return;
}
pr_info("UV: Found %s hub\n", hub);
- map_low_mmrs();
+
+ /* We now only need to map the MMRs on UV1 */
+ if (is_uv1_hub())
+ map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a667078a5180..fed2ab1f1065 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
static int intel_alt_er(int idx, u64 config)
{
- int alt_idx;
+ int alt_idx = idx;
+
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
return idx;
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
return;
if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
- void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
for_each_cpu(i, topology_sibling_cpumask(cpu)) {
struct intel_shared_regs *pc;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
- *onln = cpuc->shared_regs;
+ cpuc->kfree_on_online[0] = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index f97f8075bf04..3bf41d413775 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
case 87: /* Knights Landing */
ret = knl_uncore_pci_init();
break;
+ case 94: /* SkyLake */
+ ret = skl_uncore_pci_init();
+ break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 07aa2d6bd710..a7086b862156 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
int ivb_uncore_pci_init(void);
int hsw_uncore_pci_init(void);
int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 0b934820fafd..2bd030ddd0db 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -8,6 +8,7 @@
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
{ /* end: all zeroes */ },
};
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
static struct pci_driver snb_uncore_pci_driver = {
.name = "snb_uncore",
.id_table = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
.id_table = bdw_uncore_pci_ids,
};
+static struct pci_driver skl_uncore_pci_driver = {
+ .name = "skl_uncore",
+ .id_table = skl_uncore_pci_ids,
+};
+
struct imc_uncore_pci_dev {
__u32 pci_id;
struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
+ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
{ /* end marker */ }
};
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
return imc_uncore_pci_init();
}
+int skl_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
/* end of Sandy Bridge uncore support */
/* Nehalem uncore support */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f129a9af6357..2c0f3407bd1f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
reserve_ebda_region();
+ switch (boot_params.hdr.hardware_subarch) {
+ case X86_SUBARCH_INTEL_MID:
+ x86_intel_mid_early_setup();
+ break;
+ default:
+ break;
+ }
+
start_kernel();
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f8062aaf5df9..61521dc19c10 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -462,7 +462,7 @@ void fixup_irqs(void)
* non intr-remapping case, we can't wait till this interrupt
* arrives at this cpu before completing the irq move.
*/
- irq_force_complete_move(irq);
+ irq_force_complete_move(desc);
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
@@ -470,6 +470,15 @@ void fixup_irqs(void)
}
chip = irq_data_get_irq_chip(data);
+ /*
+ * The interrupt descriptor might have been cleaned up
+ * already, but it is not yet removed from the radix tree
+ */
+ if (!chip) {
+ raw_spin_unlock(&desc->lock);
+ continue;
+ }
+
if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
chip->irq_mask(data);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 819ab3f9c9c7..ba7fbba9831b 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -385,6 +385,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
return image->fops->cleanup(image->image_loader_data);
}
+#ifdef CONFIG_KEXEC_VERIFY_SIG
int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
unsigned long kernel_len)
{
@@ -395,6 +396,7 @@ int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
return image->fops->verify_sig(kernel, kernel_len);
}
+#endif
/*
* Apply purgatory relocations.
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 42982b26e32b..740d7ac03a55 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt)
}
__setup("hugepagesz=", setup_hugepagesz);
-#ifdef CONFIG_CMA
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
static __init int gigantic_pages_init(void)
{
- /* With CMA we can allocate gigantic pages at runtime */
+ /* With compaction or CMA we can allocate gigantic pages at runtime */
if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fc6a4c8f6e2a..2440814b0069 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -33,7 +33,7 @@ struct cpa_data {
pgd_t *pgd;
pgprot_t mask_set;
pgprot_t mask_clr;
- int numpages;
+ unsigned long numpages;
int flags;
unsigned long pfn;
unsigned force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
* CPA operation. Either a large page has been
* preserved or a single page update happened.
*/
- BUG_ON(cpa->numpages > numpages);
+ BUG_ON(cpa->numpages > numpages || !cpa->numpages);
numpages -= cpa->numpages;
if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
cpa->curpage++;
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 5c6fc3577a49..97062a635b77 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -23,6 +23,8 @@ obj-y += bus_numa.o
obj-$(CONFIG_AMD_NB) += amd_bus.o
obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o
+obj-$(CONFIG_VMD) += vmd.o
+
ifeq ($(CONFIG_PCI_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index eccd4d99e6a4..2879efc73a96 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -641,6 +641,43 @@ unsigned int pcibios_assign_all_busses(void)
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
+#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
+static LIST_HEAD(dma_domain_list);
+static DEFINE_SPINLOCK(dma_domain_list_lock);
+
+void add_dma_domain(struct dma_domain *domain)
+{
+ spin_lock(&dma_domain_list_lock);
+ list_add(&domain->node, &dma_domain_list);
+ spin_unlock(&dma_domain_list_lock);
+}
+EXPORT_SYMBOL_GPL(add_dma_domain);
+
+void del_dma_domain(struct dma_domain *domain)
+{
+ spin_lock(&dma_domain_list_lock);
+ list_del(&domain->node);
+ spin_unlock(&dma_domain_list_lock);
+}
+EXPORT_SYMBOL_GPL(del_dma_domain);
+
+static void set_dma_domain_ops(struct pci_dev *pdev)
+{
+ struct dma_domain *domain;
+
+ spin_lock(&dma_domain_list_lock);
+ list_for_each_entry(domain, &dma_domain_list, node) {
+ if (pci_domain_nr(pdev->bus) == domain->domain_nr) {
+ pdev->dev.archdata.dma_ops = domain->dma_ops;
+ break;
+ }
+ }
+ spin_unlock(&dma_domain_list_lock);
+}
+#else
+static void set_dma_domain_ops(struct pci_dev *pdev) {}
+#endif
+
int pcibios_add_device(struct pci_dev *dev)
{
struct setup_data *data;
@@ -670,6 +707,7 @@ int pcibios_add_device(struct pci_dev *dev)
pa_data = data->next;
iounmap(data);
}
+ set_dma_domain_ops(dev);
return 0;
}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 9b83b9051ae7..9770e55e768f 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -180,6 +180,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
unsigned long result = 0;
unsigned long flags;
unsigned long bx = (bus << 8) | devfn;
+ u16 number = 0, mask = 0;
WARN_ON(seg);
if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
@@ -189,53 +190,35 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
switch (len) {
case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_BYTE),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- /*
- * Zero-extend the result beyond 8 bits, do not trust the
- * BIOS having done it:
- */
- *value &= 0xff;
+ number = PCIBIOS_READ_CONFIG_BYTE;
+ mask = 0xff;
break;
case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_WORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- /*
- * Zero-extend the result beyond 16 bits, do not trust the
- * BIOS having done it:
- */
- *value &= 0xffff;
+ number = PCIBIOS_READ_CONFIG_WORD;
+ mask = 0xffff;
break;
case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_DWORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
+ number = PCIBIOS_READ_CONFIG_DWORD;
break;
}
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (number),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ /*
+ * Zero-extend the result beyond 8 or 16 bits, do not trust the
+ * BIOS having done it:
+ */
+ if (mask)
+ *value &= mask;
+
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
return (int)((result & 0xff00) >> 8);
@@ -247,6 +230,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,
unsigned long result = 0;
unsigned long flags;
unsigned long bx = (bus << 8) | devfn;
+ u16 number = 0;
WARN_ON(seg);
if ((bus > 255) || (devfn > 255) || (reg > 255))
@@ -256,43 +240,27 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,
switch (len) {
case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
+ number = PCIBIOS_WRITE_CONFIG_BYTE;
break;
case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_WORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
+ number = PCIBIOS_WRITE_CONFIG_WORD;
break;
case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
+ number = PCIBIOS_WRITE_CONFIG_DWORD;
break;
}
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (number),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
return (int)((result & 0xff00) >> 8);
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
new file mode 100644
index 000000000000..d57e48016f15
--- /dev/null
+++ b/arch/x86/pci/vmd.c
@@ -0,0 +1,723 @@
+/*
+ * Volume Management Device driver
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+
+#include <asm/irqdomain.h>
+#include <asm/device.h>
+#include <asm/msi.h>
+#include <asm/msidef.h>
+
+#define VMD_CFGBAR 0
+#define VMD_MEMBAR1 2
+#define VMD_MEMBAR2 4
+
+/*
+ * Lock for manipulating VMD IRQ lists.
+ */
+static DEFINE_RAW_SPINLOCK(list_lock);
+
+/**
+ * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
+ * @node: list item for parent traversal.
+ * @rcu: RCU callback item for freeing.
+ * @irq: back pointer to parent.
+ * @virq: the virtual IRQ value provided to the requesting driver.
+ *
+ * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
+ * a VMD IRQ using this structure.
+ */
+struct vmd_irq {
+ struct list_head node;
+ struct rcu_head rcu;
+ struct vmd_irq_list *irq;
+ unsigned int virq;
+};
+
+/**
+ * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
+ * @irq_list: the list of irq's the VMD one demuxes to.
+ * @vmd_vector: the h/w IRQ assigned to the VMD.
+ * @index: index into the VMD MSI-X table; used for message routing.
+ * @count: number of child IRQs assigned to this vector; used to track
+ * sharing.
+ */
+struct vmd_irq_list {
+ struct list_head irq_list;
+ struct vmd_dev *vmd;
+ unsigned int vmd_vector;
+ unsigned int index;
+ unsigned int count;
+};
+
+struct vmd_dev {
+ struct pci_dev *dev;
+
+ spinlock_t cfg_lock;
+ char __iomem *cfgbar;
+
+ int msix_count;
+ struct msix_entry *msix_entries;
+ struct vmd_irq_list *irqs;
+
+ struct pci_sysdata sysdata;
+ struct resource resources[3];
+ struct irq_domain *irq_domain;
+ struct pci_bus *bus;
+
+#ifdef CONFIG_X86_DEV_DMA_OPS
+ struct dma_map_ops dma_ops;
+ struct dma_domain dma_domain;
+#endif
+};
+
+static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
+{
+ return container_of(bus->sysdata, struct vmd_dev, sysdata);
+}
+
+/*
+ * Drivers managing a device in a VMD domain allocate their own IRQs as before,
+ * but the MSI entry for the hardware it's driving will be programmed with a
+ * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its
+ * domain into one of its own, and the VMD driver de-muxes these for the
+ * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations
+ * and irq_chip to set this up.
+ */
+static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct vmd_irq *vmdirq = data->chip_data;
+ struct vmd_irq_list *irq = vmdirq->irq;
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index);
+ msg->data = 0;
+}
+
+/*
+ * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
+ */
+static void vmd_irq_enable(struct irq_data *data)
+{
+ struct vmd_irq *vmdirq = data->chip_data;
+
+ raw_spin_lock(&list_lock);
+ list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
+ raw_spin_unlock(&list_lock);
+
+ data->chip->irq_unmask(data);
+}
+
+static void vmd_irq_disable(struct irq_data *data)
+{
+ struct vmd_irq *vmdirq = data->chip_data;
+
+ data->chip->irq_mask(data);
+
+ raw_spin_lock(&list_lock);
+ list_del_rcu(&vmdirq->node);
+ raw_spin_unlock(&list_lock);
+}
+
+/*
+ * XXX: Stubbed until we develop acceptable way to not create conflicts with
+ * other devices sharing the same vector.
+ */
+static int vmd_irq_set_affinity(struct irq_data *data,
+ const struct cpumask *dest, bool force)
+{
+ return -EINVAL;
+}
+
+static struct irq_chip vmd_msi_controller = {
+ .name = "VMD-MSI",
+ .irq_enable = vmd_irq_enable,
+ .irq_disable = vmd_irq_disable,
+ .irq_compose_msi_msg = vmd_compose_msi_msg,
+ .irq_set_affinity = vmd_irq_set_affinity,
+};
+
+static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
+ msi_alloc_info_t *arg)
+{
+ return 0;
+}
+
+/*
+ * XXX: We can be even smarter selecting the best IRQ once we solve the
+ * affinity problem.
+ */
+static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd)
+{
+ int i, best = 0;
+
+ raw_spin_lock(&list_lock);
+ for (i = 1; i < vmd->msix_count; i++)
+ if (vmd->irqs[i].count < vmd->irqs[best].count)
+ best = i;
+ vmd->irqs[best].count++;
+ raw_spin_unlock(&list_lock);
+
+ return &vmd->irqs[best];
+}
+
+static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
+ unsigned int virq, irq_hw_number_t hwirq,
+ msi_alloc_info_t *arg)
+{
+ struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(arg->desc)->bus);
+ struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
+
+ if (!vmdirq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&vmdirq->node);
+ vmdirq->irq = vmd_next_irq(vmd);
+ vmdirq->virq = virq;
+
+ irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip,
+ vmdirq, handle_simple_irq, vmd, NULL);
+ return 0;
+}
+
+static void vmd_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq)
+{
+ struct vmd_irq *vmdirq = irq_get_chip_data(virq);
+
+ /* XXX: Potential optimization to rebalance */
+ raw_spin_lock(&list_lock);
+ vmdirq->irq->count--;
+ raw_spin_unlock(&list_lock);
+
+ kfree_rcu(vmdirq, rcu);
+}
+
+static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
+
+ if (nvec > vmd->msix_count)
+ return vmd->msix_count;
+
+ memset(arg, 0, sizeof(*arg));
+ return 0;
+}
+
+static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+{
+ arg->desc = desc;
+}
+
+static struct msi_domain_ops vmd_msi_domain_ops = {
+ .get_hwirq = vmd_get_hwirq,
+ .msi_init = vmd_msi_init,
+ .msi_free = vmd_msi_free,
+ .msi_prepare = vmd_msi_prepare,
+ .set_desc = vmd_set_desc,
+};
+
+static struct msi_domain_info vmd_msi_domain_info = {
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX,
+ .ops = &vmd_msi_domain_ops,
+ .chip = &vmd_msi_controller,
+};
+
+#ifdef CONFIG_X86_DEV_DMA_OPS
+/*
+ * VMD replaces the requester ID with its own. DMA mappings for devices in a
+ * VMD domain need to be mapped for the VMD, not the device requiring
+ * the mapping.
+ */
+static struct device *to_vmd_dev(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
+
+ return &vmd->dev->dev;
+}
+
+static struct dma_map_ops *vmd_dma_ops(struct device *dev)
+{
+ return to_vmd_dev(dev)->archdata.dma_ops;
+}
+
+static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
+ gfp_t flag, struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
+ attrs);
+}
+
+static void vmd_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t addr, struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
+ attrs);
+}
+
+static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
+ size, attrs);
+}
+
+static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
+ addr, size, attrs);
+}
+
+static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
+ dir, attrs);
+}
+
+static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
+}
+
+static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
+}
+
+static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
+}
+
+static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
+}
+
+static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size,
+ dir);
+}
+
+static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
+}
+
+static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
+}
+
+static int vmd_mapping_error(struct device *dev, dma_addr_t addr)
+{
+ return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr);
+}
+
+static int vmd_dma_supported(struct device *dev, u64 mask)
+{
+ return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
+}
+
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+static u64 vmd_get_required_mask(struct device *dev)
+{
+ return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
+}
+#endif
+
+static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
+{
+ struct dma_domain *domain = &vmd->dma_domain;
+
+ if (vmd->dev->dev.archdata.dma_ops)
+ del_dma_domain(domain);
+}
+
+#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \
+ do { \
+ if (source->fn) \
+ dest->fn = vmd_##fn; \
+ } while (0)
+
+static void vmd_setup_dma_ops(struct vmd_dev *vmd)
+{
+ const struct dma_map_ops *source = vmd->dev->dev.archdata.dma_ops;
+ struct dma_map_ops *dest = &vmd->dma_ops;
+ struct dma_domain *domain = &vmd->dma_domain;
+
+ domain->domain_nr = vmd->sysdata.domain;
+ domain->dma_ops = dest;
+
+ if (!source)
+ return;
+ ASSIGN_VMD_DMA_OPS(source, dest, alloc);
+ ASSIGN_VMD_DMA_OPS(source, dest, free);
+ ASSIGN_VMD_DMA_OPS(source, dest, mmap);
+ ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable);
+ ASSIGN_VMD_DMA_OPS(source, dest, map_page);
+ ASSIGN_VMD_DMA_OPS(source, dest, unmap_page);
+ ASSIGN_VMD_DMA_OPS(source, dest, map_sg);
+ ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg);
+ ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu);
+ ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device);
+ ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu);
+ ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
+ ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
+ ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+ ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
+#endif
+ add_dma_domain(domain);
+}
+#undef ASSIGN_VMD_DMA_OPS
+#else
+static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {}
+static void vmd_setup_dma_ops(struct vmd_dev *vmd) {}
+#endif
+
+static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
+ unsigned int devfn, int reg, int len)
+{
+ char __iomem *addr = vmd->cfgbar +
+ (bus->number << 20) + (devfn << 12) + reg;
+
+ if ((addr - vmd->cfgbar) + len >=
+ resource_size(&vmd->dev->resource[VMD_CFGBAR]))
+ return NULL;
+
+ return addr;
+}
+
+/*
+ * CPU may deadlock if config space is not serialized on some versions of this
+ * hardware, so all config space access is done under a spinlock.
+ */
+static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
+ int len, u32 *value)
+{
+ struct vmd_dev *vmd = vmd_from_bus(bus);
+ char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
+ unsigned long flags;
+ int ret = 0;
+
+ if (!addr)
+ return -EFAULT;
+
+ spin_lock_irqsave(&vmd->cfg_lock, flags);
+ switch (len) {
+ case 1:
+ *value = readb(addr);
+ break;
+ case 2:
+ *value = readw(addr);
+ break;
+ case 4:
+ *value = readl(addr);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&vmd->cfg_lock, flags);
+ return ret;
+}
+
+/*
+ * VMD h/w converts non-posted config writes to posted memory writes. The
+ * read-back in this function forces the completion so it returns only after
+ * the config space was written, as expected.
+ */
+static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
+ int len, u32 value)
+{
+ struct vmd_dev *vmd = vmd_from_bus(bus);
+ char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
+ unsigned long flags;
+ int ret = 0;
+
+ if (!addr)
+ return -EFAULT;
+
+ spin_lock_irqsave(&vmd->cfg_lock, flags);
+ switch (len) {
+ case 1:
+ writeb(value, addr);
+ readb(addr);
+ break;
+ case 2:
+ writew(value, addr);
+ readw(addr);
+ break;
+ case 4:
+ writel(value, addr);
+ readl(addr);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&vmd->cfg_lock, flags);
+ return ret;
+}
+
+static struct pci_ops vmd_ops = {
+ .read = vmd_pci_read,
+ .write = vmd_pci_write,
+};
+
+/*
+ * VMD domains start at 0x1000 to not clash with ACPI _SEG domains.
+ */
+static int vmd_find_free_domain(void)
+{
+ int domain = 0xffff;
+ struct pci_bus *bus = NULL;
+
+ while ((bus = pci_find_next_bus(bus)) != NULL)
+ domain = max_t(int, domain, pci_domain_nr(bus));
+ return domain + 1;
+}
+
+static int vmd_enable_domain(struct vmd_dev *vmd)
+{
+ struct pci_sysdata *sd = &vmd->sysdata;
+ struct resource *res;
+ u32 upper_bits;
+ unsigned long flags;
+ LIST_HEAD(resources);
+
+ res = &vmd->dev->resource[VMD_CFGBAR];
+ vmd->resources[0] = (struct resource) {
+ .name = "VMD CFGBAR",
+ .start = res->start,
+ .end = (resource_size(res) >> 20) - 1,
+ .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
+ };
+
+ res = &vmd->dev->resource[VMD_MEMBAR1];
+ upper_bits = upper_32_bits(res->end);
+ flags = res->flags & ~IORESOURCE_SIZEALIGN;
+ if (!upper_bits)
+ flags &= ~IORESOURCE_MEM_64;
+ vmd->resources[1] = (struct resource) {
+ .name = "VMD MEMBAR1",
+ .start = res->start,
+ .end = res->end,
+ .flags = flags,
+ };
+
+ res = &vmd->dev->resource[VMD_MEMBAR2];
+ upper_bits = upper_32_bits(res->end);
+ flags = res->flags & ~IORESOURCE_SIZEALIGN;
+ if (!upper_bits)
+ flags &= ~IORESOURCE_MEM_64;
+ vmd->resources[2] = (struct resource) {
+ .name = "VMD MEMBAR2",
+ .start = res->start + 0x2000,
+ .end = res->end,
+ .flags = flags,
+ };
+
+ sd->domain = vmd_find_free_domain();
+ if (sd->domain < 0)
+ return sd->domain;
+
+ sd->node = pcibus_to_node(vmd->dev->bus);
+
+ vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info,
+ NULL);
+ if (!vmd->irq_domain)
+ return -ENODEV;
+
+ pci_add_resource(&resources, &vmd->resources[0]);
+ pci_add_resource(&resources, &vmd->resources[1]);
+ pci_add_resource(&resources, &vmd->resources[2]);
+ vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd,
+ &resources);
+ if (!vmd->bus) {
+ pci_free_resource_list(&resources);
+ irq_domain_remove(vmd->irq_domain);
+ return -ENODEV;
+ }
+
+ vmd_setup_dma_ops(vmd);
+ dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
+ pci_rescan_bus(vmd->bus);
+
+ WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
+ "domain"), "Can't create symlink to domain\n");
+ return 0;
+}
+
+static irqreturn_t vmd_irq(int irq, void *data)
+{
+ struct vmd_irq_list *irqs = data;
+ struct vmd_irq *vmdirq;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
+ generic_handle_irq(vmdirq->virq);
+ rcu_read_unlock();
+
+ return IRQ_HANDLED;
+}
+
+static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct vmd_dev *vmd;
+ int i, err;
+
+ if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
+ return -ENOMEM;
+
+ vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
+ if (!vmd)
+ return -ENOMEM;
+
+ vmd->dev = dev;
+ err = pcim_enable_device(dev);
+ if (err < 0)
+ return err;
+
+ vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
+ if (!vmd->cfgbar)
+ return -ENOMEM;
+
+ pci_set_master(dev);
+ if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
+ dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)))
+ return -ENODEV;
+
+ vmd->msix_count = pci_msix_vec_count(dev);
+ if (vmd->msix_count < 0)
+ return -ENODEV;
+
+ vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
+ GFP_KERNEL);
+ if (!vmd->irqs)
+ return -ENOMEM;
+
+ vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count,
+ sizeof(*vmd->msix_entries),
+ GFP_KERNEL);
+ if (!vmd->msix_entries)
+ return -ENOMEM;
+ for (i = 0; i < vmd->msix_count; i++)
+ vmd->msix_entries[i].entry = i;
+
+ vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1,
+ vmd->msix_count);
+ if (vmd->msix_count < 0)
+ return vmd->msix_count;
+
+ for (i = 0; i < vmd->msix_count; i++) {
+ INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
+ vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector;
+ vmd->irqs[i].index = i;
+
+ err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector,
+ vmd_irq, 0, "vmd", &vmd->irqs[i]);
+ if (err)
+ return err;
+ }
+
+ spin_lock_init(&vmd->cfg_lock);
+ pci_set_drvdata(dev, vmd);
+ err = vmd_enable_domain(vmd);
+ if (err)
+ return err;
+
+ dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
+ vmd->sysdata.domain);
+ return 0;
+}
+
+static void vmd_remove(struct pci_dev *dev)
+{
+ struct vmd_dev *vmd = pci_get_drvdata(dev);
+
+ pci_set_drvdata(dev, NULL);
+ sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
+ pci_stop_root_bus(vmd->bus);
+ pci_remove_root_bus(vmd->bus);
+ vmd_teardown_dma_ops(vmd);
+ irq_domain_remove(vmd->irq_domain);
+}
+
+#ifdef CONFIG_PM
+static int vmd_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ pci_save_state(pdev);
+ return 0;
+}
+
+static int vmd_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ pci_restore_state(pdev);
+ return 0;
+}
+#endif
+static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
+
+static const struct pci_device_id vmd_ids[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),},
+ {0,}
+};
+MODULE_DEVICE_TABLE(pci, vmd_ids);
+
+static struct pci_driver vmd_drv = {
+ .name = "vmd",
+ .id_table = vmd_ids,
+ .probe = vmd_probe,
+ .remove = vmd_remove,
+ .driver = {
+ .pm = &vmd_dev_pm_ops,
+ },
+};
+module_pci_driver(vmd_drv);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("0.6");
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 1c7380da65ff..2d66db8f80f9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,6 +8,7 @@
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <linux/acpi.h>
+#include <linux/dmi.h>
#include <asm/efi.h>
#include <asm/uv/uv.h>
@@ -248,6 +249,16 @@ out:
return ret;
}
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+ { NULL, "SGI UV1",
+ { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"),
+ }
+ },
+ { } /* NULL entry stops DMI scanning */
+};
+
void __init efi_apply_memmap_quirks(void)
{
/*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
efi_unmap_memmap();
}
- /*
- * UV doesn't support the new EFI pagetable mapping yet.
- */
- if (is_uv_system())
+ /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */
+ if (dmi_check_system(sgi_uv1_dmi))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
}
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 1bbc21e2e4ae..90bb997ed0a2 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
else {
intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
- pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+ pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
}
out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
else if (strcmp("lapic_and_apbt", arg) == 0)
intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
else {
- pr_warn("X86 INTEL_MID timer option %s not recognised"
- " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
- arg);
+ pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+ arg);
return -EINVAL;
}
return 0;
}
__setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index c1bdafaac3ca..c61b6c332e97 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -220,11 +220,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
if (imr_is_enabled(&imr)) {
base = imr_to_phys(imr.addr_lo);
end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+ size = end - base + 1;
} else {
base = 0;
end = 0;
+ size = 0;
}
- size = end - base;
seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
"rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
&base, &end, size, imr.rmask, imr.wmask,
@@ -579,6 +580,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
{
phys_addr_t base = virt_to_phys(&_text);
size_t size = virt_to_phys(&__end_rodata) - base;
+ unsigned long start, end;
int i;
int ret;
@@ -586,18 +588,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
for (i = 0; i < idev->max_imr; i++)
imr_clear(i);
+ start = (unsigned long)_text;
+ end = (unsigned long)__end_rodata - 1;
+
/*
* Setup a locked IMR around the physical extent of the kernel
* from the beginning of the .text secton to the end of the
* .rodata section as one physically contiguous block.
+ *
+ * We don't round up @size since it is already PAGE_SIZE aligned.
+ * See vmlinux.lds.S for details.
*/
ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
if (ret < 0) {
- pr_err("unable to setup IMR for kernel: (%p - %p)\n",
- &_text, &__end_rodata);
+ pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+ size / 1024, start, end);
} else {
- pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
- size / 1024, &_text, &__end_rodata);
+ pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+ size / 1024, start, end);
}
}
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 2730d775ef9a..3e75fcf6b836 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -70,3 +70,4 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
-I$(srctree)/arch/x86/boot
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+UBSAN_SANITIZE := n