summaryrefslogtreecommitdiff
path: root/arch/x86/platform
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/platform')
-rw-r--r--arch/x86/platform/Makefile9
-rw-r--r--arch/x86/platform/atom/Makefile2
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c207
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c131
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts9
-rw-r--r--arch/x86/platform/efi/Makefile10
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c79
-rw-r--r--arch/x86/platform/efi/efi.c1508
-rw-r--r--arch/x86/platform/efi/efi_32.c119
-rw-r--r--arch/x86/platform/efi/efi_64.c829
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S121
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S117
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S98
-rw-r--r--arch/x86/platform/efi/memmap.c250
-rw-r--r--arch/x86/platform/efi/quirks.c785
-rw-r--r--arch/x86/platform/efi/runtime-map.c194
-rw-r--r--arch/x86/platform/geode/Makefile2
-rw-r--r--arch/x86/platform/geode/alix.c96
-rw-r--r--arch/x86/platform/geode/geode-common.c178
-rw-r--r--arch/x86/platform/geode/geode-common.h21
-rw-r--r--arch/x86/platform/geode/geos.c87
-rw-r--r--arch/x86/platform/geode/net5501.c77
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c51
-rw-r--r--arch/x86/platform/intel-mid/Makefile2
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c126
-rw-r--r--arch/x86/platform/intel-mid/pwr.c485
-rw-r--r--arch/x86/platform/intel-quark/Makefile3
-rw-r--r--arch/x86/platform/intel-quark/imr.c597
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c129
-rw-r--r--arch/x86/platform/intel/Makefile2
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c558
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c21
-rw-r--r--arch/x86/platform/mrst/Makefile3
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c324
-rw-r--r--arch/x86/platform/mrst/mrst.c1052
-rw-r--r--arch/x86/platform/mrst/vrtc.c178
-rw-r--r--arch/x86/platform/olpc/Makefile1
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c23
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-rtc.c15
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c47
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c35
-rw-r--r--arch/x86/platform/olpc/olpc.c131
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c133
-rw-r--r--arch/x86/platform/olpc/olpc_ofw.c8
-rw-r--r--arch/x86/platform/olpc/xo1-wakeup.S10
-rw-r--r--arch/x86/platform/pvh/Makefile6
-rw-r--r--arch/x86/platform/pvh/enlighten.c142
-rw-r--r--arch/x86/platform/pvh/head.S311
-rw-r--r--arch/x86/platform/scx200/Makefile1
-rw-r--r--arch/x86/platform/scx200/scx200_32.c1
-rw-r--r--arch/x86/platform/sfi/Makefile1
-rw-r--r--arch/x86/platform/sfi/sfi.c109
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c110
-rw-r--r--arch/x86/platform/uv/Makefile3
-rw-r--r--arch/x86/platform/uv/bios_uv.c191
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2149
-rw-r--r--arch/x86/platform/uv/uv_irq.c313
-rw-r--r--arch/x86/platform/uv/uv_nmi.c1102
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c76
-rw-r--r--arch/x86/platform/uv/uv_time.c102
-rw-r--r--arch/x86/platform/visws/Makefile1
-rw-r--r--arch/x86/platform/visws/visws_quirks.c608
66 files changed, 7327 insertions, 6766 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 01e0231a113e..3ed03a2552d0 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,13 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
# Platform specific code goes here
+obj-y += atom/
obj-y += ce4100/
obj-y += efi/
obj-y += geode/
-obj-y += goldfish/
obj-y += iris/
-obj-y += mrst/
+obj-y += intel/
+obj-y += intel-mid/
+obj-y += intel-quark/
obj-y += olpc/
obj-y += scx200/
-obj-y += sfi/
obj-y += ts5500/
-obj-y += visws/
obj-y += uv/
diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile
new file mode 100644
index 000000000000..e06bbecd6358
--- /dev/null
+++ b/arch/x86/platform/atom/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
new file mode 100644
index 000000000000..44c30ce6360a
--- /dev/null
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel SOC Punit device state debug driver
+ * Punit controls power management for North Complex devices (Graphics
+ * blocks, Image Signal Processing, video processing, display, DSP etc.)
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) "punit_atom: " fmt
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/io.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/iosf_mbi.h>
+
+/* Subsystem config/status Video processor */
+#define VED_SS_PM0 0x32
+/* Subsystem config/status ISP (Image Signal Processor) */
+#define ISP_SS_PM0 0x39
+/* Subsystem config/status Input/output controller */
+#define MIO_SS_PM 0x3B
+/* Shift bits for getting status for video, isp and i/o */
+#define SSS_SHIFT 24
+
+/* Power gate status reg */
+#define PWRGT_STATUS 0x61
+/* Shift bits for getting status for graphics rendering */
+#define RENDER_POS 0
+/* Shift bits for getting status for media control */
+#define MEDIA_POS 2
+/* Shift bits for getting status for Valley View/Baytrail display */
+#define VLV_DISPLAY_POS 6
+
+/* Subsystem config/status display for Cherry Trail SOC */
+#define CHT_DSP_SSS 0x36
+/* Shift bits for getting status for display */
+#define CHT_DSP_SSS_POS 16
+
+struct punit_device {
+ char *name;
+ int reg;
+ int sss_pos;
+};
+
+static const struct punit_device punit_device_tng[] = {
+ { "DISPLAY", CHT_DSP_SSS, SSS_SHIFT },
+ { "VED", VED_SS_PM0, SSS_SHIFT },
+ { "ISP", ISP_SS_PM0, SSS_SHIFT },
+ { "MIO", MIO_SS_PM, SSS_SHIFT },
+ { NULL }
+};
+
+static const struct punit_device punit_device_byt[] = {
+ { "GFX RENDER", PWRGT_STATUS, RENDER_POS },
+ { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
+ { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS },
+ { "VED", VED_SS_PM0, SSS_SHIFT },
+ { "ISP", ISP_SS_PM0, SSS_SHIFT },
+ { "MIO", MIO_SS_PM, SSS_SHIFT },
+ { NULL }
+};
+
+static const struct punit_device punit_device_cht[] = {
+ { "GFX RENDER", PWRGT_STATUS, RENDER_POS },
+ { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
+ { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS },
+ { "VED", VED_SS_PM0, SSS_SHIFT },
+ { "ISP", ISP_SS_PM0, SSS_SHIFT },
+ { "MIO", MIO_SS_PM, SSS_SHIFT },
+ { NULL }
+};
+
+static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"};
+
+static int punit_dev_state_show(struct seq_file *seq_file, void *unused)
+{
+ u32 punit_pwr_status;
+ struct punit_device *punit_devp = seq_file->private;
+ int index;
+ int status;
+
+ seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n");
+ while (punit_devp->name) {
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ punit_devp->reg, &punit_pwr_status);
+ if (status) {
+ seq_printf(seq_file, "%9s : Read Failed\n",
+ punit_devp->name);
+ } else {
+ index = (punit_pwr_status >> punit_devp->sss_pos) & 3;
+ seq_printf(seq_file, "%9s : %s\n", punit_devp->name,
+ dstates[index]);
+ }
+ punit_devp++;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(punit_dev_state);
+
+static struct dentry *punit_dbg_file;
+
+static void punit_dbgfs_register(struct punit_device *punit_device)
+{
+ punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
+
+ debugfs_create_file("dev_power_state", 0444, punit_dbg_file,
+ punit_device, &punit_dev_state_fops);
+}
+
+static void punit_dbgfs_unregister(void)
+{
+ debugfs_remove_recursive(punit_dbg_file);
+}
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+static const struct punit_device *punit_dev;
+
+static void punit_s2idle_check(void)
+{
+ const struct punit_device *punit_devp;
+ u32 punit_pwr_status, dstate;
+ int status;
+
+ for (punit_devp = punit_dev; punit_devp->name; punit_devp++) {
+ /* Skip MIO, it is on till the very last moment */
+ if (punit_devp->reg == MIO_SS_PM)
+ continue;
+
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ punit_devp->reg, &punit_pwr_status);
+ if (status) {
+ pr_err("%s read failed\n", punit_devp->name);
+ } else {
+ dstate = (punit_pwr_status >> punit_devp->sss_pos) & 3;
+ if (!dstate)
+ pr_err("%s is in D0 prior to s2idle\n", punit_devp->name);
+ }
+ }
+}
+
+static struct acpi_s2idle_dev_ops punit_s2idle_ops = {
+ .check = punit_s2idle_check,
+};
+
+static void punit_s2idle_check_register(struct punit_device *punit_device)
+{
+ punit_dev = punit_device;
+ acpi_register_lps0_dev(&punit_s2idle_ops);
+}
+
+static void punit_s2idle_check_unregister(void)
+{
+ acpi_unregister_lps0_dev(&punit_s2idle_ops);
+}
+#else
+static void punit_s2idle_check_register(struct punit_device *punit_device) {}
+static void punit_s2idle_check_unregister(void) {}
+#endif
+
+#define X86_MATCH(vfm, data) \
+ X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_MWAIT, data)
+
+static const struct x86_cpu_id intel_punit_cpu_ids[] = {
+ X86_MATCH(INTEL_ATOM_SILVERMONT, &punit_device_byt),
+ X86_MATCH(INTEL_ATOM_SILVERMONT_MID, &punit_device_tng),
+ X86_MATCH(INTEL_ATOM_AIRMONT, &punit_device_cht),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
+
+static int __init punit_atom_debug_init(void)
+{
+ struct punit_device *punit_device;
+ const struct x86_cpu_id *id;
+
+ id = x86_match_cpu(intel_punit_cpu_ids);
+ if (!id)
+ return -ENODEV;
+
+ punit_device = (struct punit_device *)id->driver_data;
+ punit_dbgfs_register(punit_device);
+ punit_s2idle_check_register(punit_device);
+
+ return 0;
+}
+
+static void __exit punit_atom_debug_exit(void)
+{
+ punit_s2idle_check_unregister();
+ punit_dbgfs_unregister();
+}
+
+module_init(punit_atom_debug_init);
+module_exit(punit_atom_debug_exit);
+
+MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Driver for Punit devices states debugging");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
index 91fc92971d94..7b7f37dc80b1 100644
--- a/arch/x86/platform/ce4100/Makefile
+++ b/arch/x86/platform/ce4100/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index f8ab4945892e..aaa7017416f7 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -1,32 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel CE4100 platform specific setup code
*
* (C) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/serial_reg.h>
-#include <linux/serial_8250.h>
+#include <linux/reboot.h>
#include <asm/ce4100.h>
#include <asm/prom.h>
#include <asm/setup.h>
-#include <asm/i8259.h>
#include <asm/io.h>
-#include <asm/io_apic.h>
-#include <asm/emergency-restart.h>
-
-static int ce4100_i8042_detect(void)
-{
- return 0;
-}
/*
* The CE4100 platform has an internal 8051 Microcontroller which is
@@ -40,107 +24,15 @@ static void ce4100_power_off(void)
outb(0x4, 0xcf9);
}
-#ifdef CONFIG_SERIAL_8250
-
-static unsigned int mem_serial_in(struct uart_port *p, int offset)
-{
- offset = offset << p->regshift;
- return readl(p->membase + offset);
-}
-
-/*
- * The UART Tx interrupts are not set under some conditions and therefore serial
- * transmission hangs. This is a silicon issue and has not been root caused. The
- * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
- * bit of LSR register in interrupt handler to see whether at least one of these
- * two bits is set, if so then process the transmit request. If this workaround
- * is not applied, then the serial transmission may hang. This workaround is for
- * errata number 9 in Errata - B step.
-*/
-
-static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
-{
- unsigned int ret, ier, lsr;
-
- if (offset == UART_IIR) {
- offset = offset << p->regshift;
- ret = readl(p->membase + offset);
- if (ret & UART_IIR_NO_INT) {
- /* see if the TX interrupt should have really set */
- ier = mem_serial_in(p, UART_IER);
- /* see if the UART's XMIT interrupt is enabled */
- if (ier & UART_IER_THRI) {
- lsr = mem_serial_in(p, UART_LSR);
- /* now check to see if the UART should be
- generating an interrupt (but isn't) */
- if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
- ret &= ~UART_IIR_NO_INT;
- }
- }
- } else
- ret = mem_serial_in(p, offset);
- return ret;
-}
-
-static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
-{
- offset = offset << p->regshift;
- writel(value, p->membase + offset);
-}
-
-static void ce4100_serial_fixup(int port, struct uart_port *up,
- unsigned short *capabilites)
-{
-#ifdef CONFIG_EARLY_PRINTK
- /*
- * Over ride the legacy port configuration that comes from
- * asm/serial.h. Using the ioport driver then switching to the
- * PCI memmaped driver hangs the IOAPIC
- */
- if (up->iotype != UPIO_MEM32) {
- up->uartclk = 14745600;
- up->mapbase = 0xdffe0200;
- set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
- up->mapbase & PAGE_MASK);
- up->membase =
- (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
- up->membase += up->mapbase & ~PAGE_MASK;
- up->mapbase += port * 0x100;
- up->membase += port * 0x100;
- up->iotype = UPIO_MEM32;
- up->regshift = 2;
- up->irq = 4;
- }
-#endif
- up->iobase = 0;
- up->serial_in = ce4100_mem_serial_in;
- up->serial_out = ce4100_mem_serial_out;
-
- *capabilites |= (1 << 12);
-}
-
-static __init void sdv_serial_fixup(void)
-{
- serial8250_set_isa_configurator(ce4100_serial_fixup);
-}
-
-#else
-static inline void sdv_serial_fixup(void) {};
-#endif
-
static void __init sdv_arch_setup(void)
{
sdv_serial_fixup();
}
-#ifdef CONFIG_X86_IO_APIC
-static void __cpuinit sdv_pci_init(void)
+static void sdv_pci_init(void)
{
x86_of_pci_init();
- /* We can't set this earlier, because we need to calibrate the timer */
- legacy_pic = &null_legacy_pic;
}
-#endif
/*
* CE4100 specific x86_init function overrides and early setup
@@ -148,12 +40,12 @@ static void __cpuinit sdv_pci_init(void)
*/
void __init x86_ce4100_early_setup(void)
{
- x86_init.oem.arch_setup = sdv_arch_setup;
- x86_platform.i8042_detect = ce4100_i8042_detect;
- x86_init.resources.probe_roms = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.pci.init = ce4100_pci_init;
+ x86_init.oem.arch_setup = sdv_arch_setup;
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.pci.init = ce4100_pci_init;
+ x86_init.pci.init_irq = sdv_pci_init;
/*
* By default, the reboot method is ACPI which is supported by the
@@ -164,10 +56,5 @@ void __init x86_ce4100_early_setup(void)
*/
reboot_type = BOOT_KBD;
-#ifdef CONFIG_X86_IO_APIC
- x86_init.pci.init_irq = sdv_pci_init;
- x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
-#endif
-
pm_power_off = ce4100_power_off;
}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index ce874f872cc6..65fa3d866226 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* CE4100 on Falcon Falls
*
* (c) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
*/
/dts-v1/;
/ {
@@ -252,7 +249,7 @@
gpio@26 {
#gpio-cells = <2>;
- compatible = "ti,pcf8575";
+ compatible = "nxp,pcf8575";
reg = <0x26>;
gpio-controller;
};
@@ -266,7 +263,7 @@
gpio@26 {
#gpio-cells = <2>;
- compatible = "ti,pcf8575";
+ compatible = "nxp,pcf8575";
reg = <0x26>;
gpio-controller;
};
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 6db1cc4c7534..500cab4a7f7c 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,2 +1,8 @@
-obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
-obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
+# SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE := n
+GCOV_PROFILE := n
+
+obj-$(CONFIG_EFI) += memmap.o quirks.o efi.o efi_$(BITS).o \
+ efi_stub_$(BITS).o
+obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
+obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
deleted file mode 100644
index 7145ec63c520..000000000000
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2012 Intel Corporation
- * Author: Josh Triplett <josh@joshtriplett.org>
- *
- * Based on the bgrt driver:
- * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
- * Author: Matthew Garrett
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/efi-bgrt.h>
-
-struct acpi_table_bgrt *bgrt_tab;
-void *__initdata bgrt_image;
-size_t __initdata bgrt_image_size;
-
-struct bmp_header {
- u16 id;
- u32 size;
-} __packed;
-
-void __init efi_bgrt_init(void)
-{
- acpi_status status;
- void __iomem *image;
- bool ioremapped = false;
- struct bmp_header bmp_header;
-
- if (acpi_disabled)
- return;
-
- status = acpi_get_table("BGRT", 0,
- (struct acpi_table_header **)&bgrt_tab);
- if (ACPI_FAILURE(status))
- return;
-
- if (bgrt_tab->header.length < sizeof(*bgrt_tab))
- return;
- if (bgrt_tab->version != 1)
- return;
- if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
- return;
-
- image = efi_lookup_mapped_addr(bgrt_tab->image_address);
- if (!image) {
- image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
- ioremapped = true;
- if (!image)
- return;
- }
-
- memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
- if (ioremapped)
- iounmap(image);
- bgrt_image_size = bmp_header.size;
-
- bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
- if (!bgrt_image)
- return;
-
- if (ioremapped) {
- image = ioremap(bgrt_tab->image_address, bmp_header.size);
- if (!image) {
- kfree(bgrt_image);
- bgrt_image = NULL;
- return;
- }
- }
-
- memcpy_fromio(bgrt_image, image, bgrt_image_size);
- if (ioremapped)
- iounmap(image);
-}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c8d5577044bb..463b784499a8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Common EFI (Extensible Firmware Interface) support functions
* Based on Extensible Firmware Interface Specification version 1.0
@@ -12,6 +13,8 @@
* Bibo Mao <bibo.mao@intel.com>
* Chandramouli Narayanan <mouli@linux.intel.com>
* Huang Ying <ying.huang@intel.com>
+ * Copyright (C) 2013 SuSE Labs
+ * Borislav Petkov <bp@suse.de> - runtime services VA mapping
*
* Copied from efi_32.c to eliminate the duplicated code between EFI
* 32/64 support code. --ying 2007-10-26
@@ -33,9 +36,8 @@
#include <linux/efi.h>
#include <linux/efi-bgrt.h>
#include <linux/export.h>
-#include <linux/bootmem.h>
-#include <linux/slab.h>
#include <linux/memblock.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/time.h>
@@ -45,61 +47,57 @@
#include <asm/setup.h>
#include <asm/efi.h>
+#include <asm/e820/api.h>
#include <asm/time.h>
-#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
-#include <asm/rtc.h>
-
-#define EFI_DEBUG 1
-
-#define EFI_MIN_RESERVE 5120
+#include <asm/uv/uv.h>
-#define EFI_DUMMY_GUID \
- EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+static unsigned long efi_systab_phys __initdata;
+static unsigned long efi_runtime, efi_nr_tables;
-static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
+unsigned long efi_fw_vendor, efi_config_table;
-struct efi __read_mostly efi = {
- .mps = EFI_INVALID_TABLE_ADDR,
- .acpi = EFI_INVALID_TABLE_ADDR,
- .acpi20 = EFI_INVALID_TABLE_ADDR,
- .smbios = EFI_INVALID_TABLE_ADDR,
- .sal_systab = EFI_INVALID_TABLE_ADDR,
- .boot_info = EFI_INVALID_TABLE_ADDR,
- .hcdp = EFI_INVALID_TABLE_ADDR,
- .uga = EFI_INVALID_TABLE_ADDR,
- .uv_systab = EFI_INVALID_TABLE_ADDR,
+static const efi_config_table_type_t arch_tables[] __initconst = {
+#ifdef CONFIG_X86_UV
+ {UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" },
+#endif
+ {},
};
-EXPORT_SYMBOL(efi);
-
-struct efi_memory_map memmap;
-
-static struct efi efi_phys __initdata;
-static efi_system_table_t efi_systab __initdata;
-
-unsigned long x86_efi_facility;
-
-/*
- * Returns 1 if 'facility' is enabled, 0 otherwise.
- */
-int efi_enabled(int facility)
-{
- return test_bit(facility, &x86_efi_facility) != 0;
-}
-EXPORT_SYMBOL(efi_enabled);
-static bool __initdata disable_runtime = false;
-static int __init setup_noefi(char *arg)
-{
- disable_runtime = true;
- return 0;
-}
-early_param("noefi", setup_noefi);
+static const unsigned long * const efi_tables[] = {
+ &efi.acpi,
+ &efi.acpi20,
+ &efi.smbios,
+ &efi.smbios3,
+#ifdef CONFIG_X86_UV
+ &uv_systab_phys,
+#endif
+ &efi_fw_vendor,
+ &efi_runtime,
+ &efi_config_table,
+ &efi.esrt,
+ &efi_mem_attr_table,
+#ifdef CONFIG_EFI_RCI2_TABLE
+ &rci2_table_phys,
+#endif
+ &efi.tpm_log,
+ &efi.tpm_final_log,
+ &efi_rng_seed,
+#ifdef CONFIG_LOAD_UEFI_KEYS
+ &efi.mokvar_table,
+#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+ &efi.coco_secret,
+#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ &efi.unaccepted,
+#endif
+};
-int add_efi_memmap;
-EXPORT_SYMBOL(add_efi_memmap);
+u64 efi_setup; /* efi setup_data physical address */
+static int add_efi_memmap __initdata;
static int __init setup_add_efi_memmap(char *arg)
{
add_efi_memmap = 1;
@@ -107,237 +105,21 @@ static int __init setup_add_efi_memmap(char *arg)
}
early_param("add_efi_memmap", setup_add_efi_memmap);
-static bool efi_no_storage_paranoia;
-
-static int __init setup_storage_paranoia(char *arg)
-{
- efi_no_storage_paranoia = true;
- return 0;
-}
-early_param("efi_no_storage_paranoia", setup_storage_paranoia);
-
-
-static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
- unsigned long flags;
- efi_status_t status;
-
- spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(get_time, tm, tc);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return status;
-}
-
-static efi_status_t virt_efi_set_time(efi_time_t *tm)
-{
- unsigned long flags;
- efi_status_t status;
-
- spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt1(set_time, tm);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return status;
-}
-
-static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
- efi_bool_t *pending,
- efi_time_t *tm)
-{
- unsigned long flags;
- efi_status_t status;
-
- spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt3(get_wakeup_time,
- enabled, pending, tm);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return status;
-}
-
-static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
-{
- unsigned long flags;
- efi_status_t status;
-
- spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(set_wakeup_time,
- enabled, tm);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return status;
-}
-
-static efi_status_t virt_efi_get_variable(efi_char16_t *name,
- efi_guid_t *vendor,
- u32 *attr,
- unsigned long *data_size,
- void *data)
-{
- return efi_call_virt5(get_variable,
- name, vendor, attr,
- data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
- efi_char16_t *name,
- efi_guid_t *vendor)
-{
- return efi_call_virt3(get_next_variable,
- name_size, name, vendor);
-}
-
-static efi_status_t virt_efi_set_variable(efi_char16_t *name,
- efi_guid_t *vendor,
- u32 attr,
- unsigned long data_size,
- void *data)
-{
- return efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
-}
-
-static efi_status_t virt_efi_query_variable_info(u32 attr,
- u64 *storage_space,
- u64 *remaining_space,
- u64 *max_variable_size)
-{
- if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
- return EFI_UNSUPPORTED;
-
- return efi_call_virt4(query_variable_info, attr, storage_space,
- remaining_space, max_variable_size);
-}
-
-static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
-{
- return efi_call_virt1(get_next_high_mono_count, count);
-}
-
-static void virt_efi_reset_system(int reset_type,
- efi_status_t status,
- unsigned long data_size,
- efi_char16_t *data)
-{
- efi_call_virt4(reset_system, reset_type, status,
- data_size, data);
-}
-
-static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
- unsigned long count,
- unsigned long sg_list)
-{
- if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
- return EFI_UNSUPPORTED;
-
- return efi_call_virt3(update_capsule, capsules, count, sg_list);
-}
-
-static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
- unsigned long count,
- u64 *max_size,
- int *reset_type)
-{
- if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
- return EFI_UNSUPPORTED;
-
- return efi_call_virt4(query_capsule_caps, capsules, count, max_size,
- reset_type);
-}
-
-static efi_status_t __init phys_efi_set_virtual_address_map(
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- efi_status_t status;
-
- efi_call_phys_prelog();
- status = efi_call_phys4(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
- efi_call_phys_epilog();
- return status;
-}
-
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
- efi_time_cap_t *tc)
-{
- unsigned long flags;
- efi_status_t status;
-
- spin_lock_irqsave(&rtc_lock, flags);
- efi_call_phys_prelog();
- status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
- virt_to_phys(tc));
- efi_call_phys_epilog();
- spin_unlock_irqrestore(&rtc_lock, flags);
- return status;
-}
-
-int efi_set_rtc_mmss(const struct timespec *now)
-{
- unsigned long nowtime = now->tv_sec;
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
- struct rtc_time tm;
-
- status = efi.get_time(&eft, &cap);
- if (status != EFI_SUCCESS) {
- pr_err("Oops: efitime: can't read time!\n");
- return -1;
- }
-
- rtc_time_to_tm(nowtime, &tm);
- if (!rtc_valid_tm(&tm)) {
- eft.year = tm.tm_year + 1900;
- eft.month = tm.tm_mon + 1;
- eft.day = tm.tm_mday;
- eft.minute = tm.tm_min;
- eft.second = tm.tm_sec;
- eft.nanosecond = 0;
- } else {
- printk(KERN_ERR
- "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
- __FUNCTION__, nowtime);
- return -1;
- }
-
- status = efi.set_time(&eft);
- if (status != EFI_SUCCESS) {
- pr_err("Oops: efitime: can't write time!\n");
- return -1;
- }
- return 0;
-}
-
-void efi_get_time(struct timespec *now)
-{
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- status = efi.get_time(&eft, &cap);
- if (status != EFI_SUCCESS)
- pr_err("Oops: efitime: can't read time!\n");
-
- now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
- eft.minute, eft.second);
- now->tv_nsec = 0;
-}
-
/*
* Tell the kernel about the EFI memory map. This might include
- * more than the max 128 entries that can fit in the e820 legacy
- * (zeropage) memory map.
+ * more than the max 128 entries that can fit in the passed in e820
+ * legacy (zeropage) memory map, but the kernel's e820 table can hold
+ * E820_MAX_ENTRIES.
*/
static void __init do_add_efi_memmap(void)
{
- void *p;
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type;
@@ -348,19 +130,25 @@ static void __init do_add_efi_memmap(void)
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- if (md->attribute & EFI_MEMORY_WB)
- e820_type = E820_RAM;
+ if (efi_soft_reserve_enabled()
+ && (md->attribute & EFI_MEMORY_SP))
+ e820_type = E820_TYPE_SOFT_RESERVED;
+ else if (md->attribute & EFI_MEMORY_WB)
+ e820_type = E820_TYPE_RAM;
else
- e820_type = E820_RESERVED;
+ e820_type = E820_TYPE_RESERVED;
break;
case EFI_ACPI_RECLAIM_MEMORY:
- e820_type = E820_ACPI;
+ e820_type = E820_TYPE_ACPI;
break;
case EFI_ACPI_MEMORY_NVS:
- e820_type = E820_NVS;
+ e820_type = E820_TYPE_NVS;
break;
case EFI_UNUSABLE_MEMORY:
- e820_type = E820_UNUSABLE;
+ e820_type = E820_TYPE_UNUSABLE;
+ break;
+ case EFI_PERSISTENT_MEMORY:
+ e820_type = E820_TYPE_PMEM;
break;
default:
/*
@@ -368,220 +156,290 @@ static void __init do_add_efi_memmap(void)
* EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
* EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
*/
- e820_type = E820_RESERVED;
+ e820_type = E820_TYPE_RESERVED;
break;
}
- e820_add_region(start, size, e820_type);
+
+ e820__range_add(start, size, e820_type);
}
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ e820__update_table(e820_table);
+}
+
+/*
+ * Given add_efi_memmap defaults to 0 and there is no alternative
+ * e820 mechanism for soft-reserved memory, import the full EFI memory
+ * map if soft reservations are present and enabled. Otherwise, the
+ * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is
+ * the efi=nosoftreserve option.
+ */
+static bool do_efi_soft_reserve(void)
+{
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP))
+ return false;
+
+ if (!efi_soft_reserve_enabled())
+ return false;
+
+ for_each_efi_memory_desc(md)
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ (md->attribute & EFI_MEMORY_SP))
+ return true;
+ return false;
}
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
- unsigned long pmap;
+ struct efi_memory_map_data data;
+ phys_addr_t pmap;
+ int rv;
-#ifdef CONFIG_X86_32
- /* Can't handle data above 4GB at this time */
- if (e->efi_memmap_hi) {
+ if (efi_enabled(EFI_PARAVIRT))
+ return 0;
+
+ /* Can't handle firmware tables above 4GB on i386 */
+ if (IS_ENABLED(CONFIG_X86_32) && e->efi_memmap_hi > 0) {
pr_err("Memory map is above 4GB, disabling EFI.\n");
return -EINVAL;
}
- pmap = e->efi_memmap;
-#else
- pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
-#endif
- memmap.phys_map = (void *)pmap;
- memmap.nr_map = e->efi_memmap_size /
- e->efi_memdesc_size;
- memmap.desc_size = e->efi_memdesc_size;
- memmap.desc_version = e->efi_memdesc_version;
+ pmap = (phys_addr_t)(e->efi_memmap | ((u64)e->efi_memmap_hi << 32));
+
+ data.phys_map = pmap;
+ data.size = e->efi_memmap_size;
+ data.desc_size = e->efi_memdesc_size;
+ data.desc_version = e->efi_memdesc_version;
+
+ if (!efi_enabled(EFI_PARAVIRT)) {
+ rv = efi_memmap_init_early(&data);
+ if (rv)
+ return rv;
+ }
+
+ if (add_efi_memmap || do_efi_soft_reserve())
+ do_add_efi_memmap();
+
+ WARN(efi.memmap.desc_version != 1,
+ "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+ efi.memmap.desc_version);
- memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
+ set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags);
return 0;
}
-#if EFI_DEBUG
-static void __init print_efi_memmap(void)
+#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT)
+#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT)
+#define U64_HIGH_BIT (~(U64_MAX >> 1))
+
+static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i)
{
- efi_memory_desc_t *md;
- void *p;
- int i;
+ u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1;
+ u64 end_hi = 0;
+ char buf[64];
- for (p = memmap.map, i = 0;
- p < memmap.map_end;
- p += memmap.desc_size, i++) {
- md = p;
- pr_info("mem%02u: type=%u, attr=0x%llx, "
- "range=[0x%016llx-0x%016llx) (%lluMB)\n",
- i, md->type, md->attribute, md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+ if (md->num_pages == 0) {
+ end = 0;
+ } else if (md->num_pages > EFI_PAGES_MAX ||
+ EFI_PAGES_MAX - md->num_pages <
+ (md->phys_addr >> EFI_PAGE_SHIFT)) {
+ end_hi = (md->num_pages & OVERFLOW_ADDR_MASK)
+ >> OVERFLOW_ADDR_SHIFT;
+
+ if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT))
+ end_hi += 1;
+ } else {
+ return true;
+ }
+
+ pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n");
+
+ if (end_hi) {
+ pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n",
+ i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ md->phys_addr, end_hi, end);
+ } else {
+ pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n",
+ i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ md->phys_addr, end);
}
+ return false;
}
-#endif /* EFI_DEBUG */
-void __init efi_reserve_boot_services(void)
+static void __init efi_clean_memmap(void)
{
- void *p;
+ efi_memory_desc_t *out = efi.memmap.map;
+ const efi_memory_desc_t *in = out;
+ const efi_memory_desc_t *end = efi.memmap.map_end;
+ int i, n_removal;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
- u64 start = md->phys_addr;
- u64 size = md->num_pages << EFI_PAGE_SHIFT;
+ for (i = n_removal = 0; in < end; i++) {
+ if (efi_memmap_entry_valid(in, i)) {
+ if (out != in)
+ memcpy(out, in, efi.memmap.desc_size);
+ out = (void *)out + efi.memmap.desc_size;
+ } else {
+ n_removal++;
+ }
+ in = (void *)in + efi.memmap.desc_size;
+ }
- if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
- /* Only reserve where possible:
- * - Not within any already allocated areas
- * - Not over any memory area (really needed, if above?)
- * - Not within any part of the kernel
- * - Not the bios reserved area
- */
- if ((start+size >= __pa_symbol(_text)
- && start <= __pa_symbol(_end)) ||
- !e820_all_mapped(start, start+size, E820_RAM) ||
- memblock_is_region_reserved(start, size)) {
- /* Could not reserve, skip it */
- md->num_pages = 0;
- memblock_dbg("Could not reserve boot range "
- "[0x%010llx-0x%010llx]\n",
- start, start+size-1);
- } else
- memblock_reserve(start, size);
+ if (n_removal > 0) {
+ struct efi_memory_map_data data = {
+ .phys_map = efi.memmap.phys_map,
+ .desc_version = efi.memmap.desc_version,
+ .desc_size = efi.memmap.desc_size,
+ .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal),
+ .flags = 0,
+ };
+
+ pr_warn("Removing %d invalid memory map entries.\n", n_removal);
+ efi_memmap_install(&data);
+ }
+}
+
+/*
+ * Firmware can use EfiMemoryMappedIO to request that MMIO regions be
+ * mapped by the OS so they can be accessed by EFI runtime services, but
+ * should have no other significance to the OS (UEFI r2.10, sec 7.2).
+ * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO
+ * regions to E820_TYPE_RESERVED entries, which prevent Linux from
+ * allocating space from them (see remove_e820_regions()).
+ *
+ * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and
+ * PCI host bridge windows, which means Linux can't allocate BAR space for
+ * hot-added devices.
+ *
+ * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this
+ * problem.
+ *
+ * Retain small EfiMemoryMappedIO regions because on some platforms, these
+ * describe non-window space that's included in host bridge _CRS. If we
+ * assign that space to PCI devices, they don't work.
+ */
+static void __init efi_remove_e820_mmio(void)
+{
+ efi_memory_desc_t *md;
+ u64 size, start, end;
+ int i = 0;
+
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_MEMORY_MAPPED_IO) {
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ start = md->phys_addr;
+ end = start + size - 1;
+ if (size >= 256*1024) {
+ pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n",
+ i, start, end, size >> 20);
+ e820__range_remove(start, size,
+ E820_TYPE_RESERVED, 1);
+ } else {
+ pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n",
+ i, start, end, size >> 10);
+ }
+ }
+ i++;
}
}
-void __init efi_unmap_memmap(void)
+void __init efi_print_memmap(void)
{
- clear_bit(EFI_MEMMAP, &x86_efi_facility);
- if (memmap.map) {
- early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
- memmap.map = NULL;
+ efi_memory_desc_t *md;
+ int i = 0;
+
+ for_each_efi_memory_desc(md) {
+ char buf[64];
+
+ pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
+ i++, efi_md_typeattr_format(buf, sizeof(buf), md),
+ md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
+ (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
}
-void __init efi_free_boot_services(void)
+static int __init efi_systab_init(unsigned long phys)
{
+ int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t)
+ : sizeof(efi_system_table_32_t);
+ const efi_table_hdr_t *hdr;
+ bool over4g = false;
void *p;
+ int ret;
- if (!efi_is_native())
- return;
+ hdr = p = early_memremap_ro(phys, size);
+ if (p == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
- unsigned long long start = md->phys_addr;
- unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+ ret = efi_systab_check_header(hdr);
+ if (ret) {
+ early_memunmap(p, size);
+ return ret;
+ }
- if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+ if (efi_enabled(EFI_64BIT)) {
+ const efi_system_table_64_t *systab64 = p;
- /* Could not reserve boot area */
- if (!size)
- continue;
+ efi_runtime = systab64->runtime;
+ over4g = systab64->runtime > U32_MAX;
- free_bootmem_late(start, size);
- }
+ if (efi_setup) {
+ struct efi_setup_data *data;
- efi_unmap_memmap();
-}
+ data = early_memremap_ro(efi_setup, sizeof(*data));
+ if (!data) {
+ early_memunmap(p, size);
+ return -ENOMEM;
+ }
-static int __init efi_systab_init(void *phys)
-{
- if (efi_enabled(EFI_64BIT)) {
- efi_system_table_64_t *systab64;
- u64 tmp = 0;
-
- systab64 = early_ioremap((unsigned long)phys,
- sizeof(*systab64));
- if (systab64 == NULL) {
- pr_err("Couldn't map the system table!\n");
- return -ENOMEM;
- }
+ efi_fw_vendor = (unsigned long)data->fw_vendor;
+ efi_config_table = (unsigned long)data->tables;
- efi_systab.hdr = systab64->hdr;
- efi_systab.fw_vendor = systab64->fw_vendor;
- tmp |= systab64->fw_vendor;
- efi_systab.fw_revision = systab64->fw_revision;
- efi_systab.con_in_handle = systab64->con_in_handle;
- tmp |= systab64->con_in_handle;
- efi_systab.con_in = systab64->con_in;
- tmp |= systab64->con_in;
- efi_systab.con_out_handle = systab64->con_out_handle;
- tmp |= systab64->con_out_handle;
- efi_systab.con_out = systab64->con_out;
- tmp |= systab64->con_out;
- efi_systab.stderr_handle = systab64->stderr_handle;
- tmp |= systab64->stderr_handle;
- efi_systab.stderr = systab64->stderr;
- tmp |= systab64->stderr;
- efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
- tmp |= systab64->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
- tmp |= systab64->boottime;
- efi_systab.nr_tables = systab64->nr_tables;
- efi_systab.tables = systab64->tables;
- tmp |= systab64->tables;
-
- early_iounmap(systab64, sizeof(*systab64));
-#ifdef CONFIG_X86_32
- if (tmp >> 32) {
- pr_err("EFI data located above 4GB, disabling EFI.\n");
- return -EINVAL;
- }
-#endif
- } else {
- efi_system_table_32_t *systab32;
+ over4g |= data->fw_vendor > U32_MAX ||
+ data->tables > U32_MAX;
- systab32 = early_ioremap((unsigned long)phys,
- sizeof(*systab32));
- if (systab32 == NULL) {
- pr_err("Couldn't map the system table!\n");
- return -ENOMEM;
+ early_memunmap(data, sizeof(*data));
+ } else {
+ efi_fw_vendor = systab64->fw_vendor;
+ efi_config_table = systab64->tables;
+
+ over4g |= systab64->fw_vendor > U32_MAX ||
+ systab64->tables > U32_MAX;
}
+ efi_nr_tables = systab64->nr_tables;
+ } else {
+ const efi_system_table_32_t *systab32 = p;
- efi_systab.hdr = systab32->hdr;
- efi_systab.fw_vendor = systab32->fw_vendor;
- efi_systab.fw_revision = systab32->fw_revision;
- efi_systab.con_in_handle = systab32->con_in_handle;
- efi_systab.con_in = systab32->con_in;
- efi_systab.con_out_handle = systab32->con_out_handle;
- efi_systab.con_out = systab32->con_out;
- efi_systab.stderr_handle = systab32->stderr_handle;
- efi_systab.stderr = systab32->stderr;
- efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
- efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
- efi_systab.nr_tables = systab32->nr_tables;
- efi_systab.tables = systab32->tables;
-
- early_iounmap(systab32, sizeof(*systab32));
+ efi_fw_vendor = systab32->fw_vendor;
+ efi_runtime = systab32->runtime;
+ efi_config_table = systab32->tables;
+ efi_nr_tables = systab32->nr_tables;
}
- efi.systab = &efi_systab;
+ efi.runtime_version = hdr->revision;
- /*
- * Verify the EFI Table
- */
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
- pr_err("System table signature incorrect!\n");
+ efi_systab_report_header(hdr, efi_fw_vendor);
+ early_memunmap(p, size);
+
+ if (IS_ENABLED(CONFIG_X86_32) && over4g) {
+ pr_err("EFI data located above 4GB, disabling EFI.\n");
return -EINVAL;
}
- if ((efi.systab->hdr.revision >> 16) == 0)
- pr_err("Warning: System table version "
- "%d.%02d, expected 1.00 or greater!\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff);
return 0;
}
-static int __init efi_config_init(u64 tables, int nr_tables)
+static int __init efi_config_init(const efi_config_table_type_t *arch_tables)
{
- void *config_tables, *tablep;
- int i, sz;
+ void *config_tables;
+ int sz, ret;
+
+ if (efi_nr_tables == 0)
+ return 0;
if (efi_enabled(EFI_64BIT))
sz = sizeof(efi_config_table_64_t);
@@ -591,302 +449,69 @@ static int __init efi_config_init(u64 tables, int nr_tables)
/*
* Let's see what config tables the firmware passed to us.
*/
- config_tables = early_ioremap(tables, nr_tables * sz);
+ config_tables = early_memremap(efi_config_table, efi_nr_tables * sz);
if (config_tables == NULL) {
pr_err("Could not map Configuration table!\n");
return -ENOMEM;
}
- tablep = config_tables;
- pr_info("");
- for (i = 0; i < efi.systab->nr_tables; i++) {
- efi_guid_t guid;
- unsigned long table;
-
- if (efi_enabled(EFI_64BIT)) {
- u64 table64;
- guid = ((efi_config_table_64_t *)tablep)->guid;
- table64 = ((efi_config_table_64_t *)tablep)->table;
- table = table64;
-#ifdef CONFIG_X86_32
- if (table64 >> 32) {
- pr_cont("\n");
- pr_err("Table located above 4GB, disabling EFI.\n");
- early_iounmap(config_tables,
- efi.systab->nr_tables * sz);
- return -EINVAL;
- }
-#endif
- } else {
- guid = ((efi_config_table_32_t *)tablep)->guid;
- table = ((efi_config_table_32_t *)tablep)->table;
- }
- if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
- efi.mps = table;
- pr_cont(" MPS=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
- efi.acpi20 = table;
- pr_cont(" ACPI 2.0=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
- efi.acpi = table;
- pr_cont(" ACPI=0x%lx ", table);
- } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
- efi.smbios = table;
- pr_cont(" SMBIOS=0x%lx ", table);
-#ifdef CONFIG_X86_UV
- } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
- efi.uv_systab = table;
- pr_cont(" UVsystab=0x%lx ", table);
-#endif
- } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
- efi.hcdp = table;
- pr_cont(" HCDP=0x%lx ", table);
- } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
- efi.uga = table;
- pr_cont(" UGA=0x%lx ", table);
- }
- tablep += sz;
- }
- pr_cont("\n");
- early_iounmap(config_tables, efi.systab->nr_tables * sz);
- return 0;
-}
-
-static int __init efi_runtime_init(void)
-{
- efi_runtime_services_t *runtime;
-
- /*
- * Check out the runtime services table. We need to map
- * the runtime services table so that we can grab the physical
- * address of several of the EFI runtime functions, needed to
- * set the firmware into virtual mode.
- */
- runtime = early_ioremap((unsigned long)efi.systab->runtime,
- sizeof(efi_runtime_services_t));
- if (!runtime) {
- pr_err("Could not map the runtime service table!\n");
- return -ENOMEM;
- }
- /*
- * We will only need *early* access to the following
- * two EFI runtime services before set_virtual_address_map
- * is invoked.
- */
- efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- runtime->set_virtual_address_map;
- /*
- * Make efi_get_time can be called before entering
- * virtual mode.
- */
- efi.get_time = phys_efi_get_time;
- early_iounmap(runtime, sizeof(efi_runtime_services_t));
-
- return 0;
-}
-
-static int __init efi_memmap_init(void)
-{
- /* Map the EFI memory map */
- memmap.map = early_ioremap((unsigned long)memmap.phys_map,
- memmap.nr_map * memmap.desc_size);
- if (memmap.map == NULL) {
- pr_err("Could not map the memory map!\n");
- return -ENOMEM;
- }
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-
- if (add_efi_memmap)
- do_add_efi_memmap();
+ ret = efi_config_parse_tables(config_tables, efi_nr_tables,
+ arch_tables);
- return 0;
+ early_memunmap(config_tables, efi_nr_tables * sz);
+ return ret;
}
void __init efi_init(void)
{
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- int i = 0;
- void *tmp;
-
-#ifdef CONFIG_X86_32
- if (boot_params.efi_info.efi_systab_hi ||
- boot_params.efi_info.efi_memmap_hi) {
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ (boot_params.efi_info.efi_systab_hi ||
+ boot_params.efi_info.efi_memmap_hi)) {
pr_info("Table located above 4GB, disabling EFI.\n");
return;
}
- efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
- efi_phys.systab = (efi_system_table_t *)
- (boot_params.efi_info.efi_systab |
- ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-#endif
- if (efi_systab_init(efi_phys.systab))
- return;
+ efi_systab_phys = boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi << 32);
- set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
+ if (efi_systab_init(efi_systab_phys))
+ return;
- /*
- * Show what we know for posterity
- */
- c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
- if (c16) {
- for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
- vendor[i] = *c16++;
- vendor[i] = '\0';
- } else
- pr_err("Could not map the firmware vendor!\n");
- early_iounmap(tmp, 2);
-
- pr_info("EFI v%u.%.02u by %s\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
-
- if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
+ if (efi_reuse_config(efi_config_table, efi_nr_tables))
return;
- set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
+ if (efi_config_init(arch_tables))
+ return;
/*
* Note: We currently don't support runtime services on an EFI
* that doesn't match the kernel 32/64-bit mode.
*/
- if (!efi_is_native())
- pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
- else {
- if (disable_runtime || efi_runtime_init())
- return;
- set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
- }
+ if (!efi_runtime_supported())
+ pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n");
- if (efi_memmap_init())
+ if (!efi_runtime_supported() || efi_runtime_disabled()) {
+ efi_memmap_unmap();
return;
-
- set_bit(EFI_MEMMAP, &x86_efi_facility);
-
-#ifdef CONFIG_X86_32
- if (efi_is_native()) {
- x86_platform.get_wallclock = efi_get_time;
- x86_platform.set_wallclock = efi_set_rtc_mmss;
- }
-#endif
-
-#if EFI_DEBUG
- print_efi_memmap();
-#endif
-}
-
-void __init efi_late_init(void)
-{
- efi_bgrt_init();
-}
-
-void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
-{
- u64 addr, npages;
-
- addr = md->virt_addr;
- npages = md->num_pages;
-
- memrange_efi_to_native(&addr, &npages);
-
- if (executable)
- set_memory_x(addr, npages);
- else
- set_memory_nx(addr, npages);
-}
-
-static void __init runtime_code_page_mkexec(void)
-{
- efi_memory_desc_t *md;
- void *p;
-
- /* Make EFI runtime service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if (md->type != EFI_RUNTIME_SERVICES_CODE)
- continue;
-
- efi_set_executable(md, true);
}
-}
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM. So, look it up in the existing EFI memory map instead. Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
- void *p;
- if (WARN_ON(!memmap.map))
- return NULL;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
- u64 size = md->num_pages << EFI_PAGE_SHIFT;
- u64 end = md->phys_addr + size;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
- if (!md->virt_addr)
- continue;
- if (phys_addr >= md->phys_addr && phys_addr < end) {
- phys_addr += md->virt_addr - md->phys_addr;
- return (__force void __iomem *)(unsigned long)phys_addr;
- }
- }
- return NULL;
-}
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ efi_clean_memmap();
-void efi_memory_uc(u64 addr, unsigned long size)
-{
- unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
- u64 npages;
+ efi_remove_e820_mmio();
- npages = round_up(size, page_shift) / page_shift;
- memrange_efi_to_native(&addr, &npages);
- set_memory_uc(addr, npages);
+ if (efi_enabled(EFI_DBG))
+ efi_print_memmap();
}
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-void __init efi_enter_virtual_mode(void)
+/* Merge contiguous regions of the same type and attribute */
+static void __init efi_merge_regions(void)
{
efi_memory_desc_t *md, *prev_md = NULL;
- efi_status_t status;
- unsigned long size;
- u64 end, systab, start_pfn, end_pfn;
- void *p, *va, *new_memmap = NULL;
- int count = 0;
-
- efi.systab = NULL;
- /*
- * We don't do virtual mode, since we don't do runtime services, on
- * non-native EFI
- */
-
- if (!efi_is_native()) {
- efi_unmap_memmap();
- return;
- }
-
- /* Merge contiguous regions of the same type and attribute */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ for_each_efi_memory_desc(md) {
u64 prev_size;
- md = p;
if (!prev_md) {
prev_md = md;
@@ -909,217 +534,394 @@ void __init efi_enter_virtual_mode(void)
}
prev_md = md;
}
+}
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+static void *realloc_pages(void *old_memmap, int old_shift)
+{
+ void *ret;
- size = md->num_pages << EFI_PAGE_SHIFT;
- end = md->phys_addr + size;
+ ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
+ if (!ret)
+ goto out;
- start_pfn = PFN_DOWN(md->phys_addr);
- end_pfn = PFN_UP(end);
- if (pfn_range_is_mapped(start_pfn, end_pfn)) {
- va = __va(md->phys_addr);
+ /*
+ * A first-time allocation doesn't have anything to copy.
+ */
+ if (!old_memmap)
+ return ret;
- if (!(md->attribute & EFI_MEMORY_WB))
- efi_memory_uc((u64)(unsigned long)va, size);
- } else
- va = efi_ioremap(md->phys_addr, size,
- md->type, md->attribute);
+ memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
- if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
- if (!va)
- pr_err("ioremap of 0x%llX failed!\n",
- (unsigned long long)md->phys_addr);
- continue;
- }
+out:
+ free_pages((unsigned long)old_memmap, old_shift);
+ return ret;
+}
- md->virt_addr = (u64) (unsigned long) va;
+/*
+ * Iterate the EFI memory map in reverse order because the regions
+ * will be mapped top-down. The end result is the same as if we had
+ * mapped things forward, but doesn't require us to change the
+ * existing implementation of efi_map_region().
+ */
+static inline void *efi_map_next_entry_reverse(void *entry)
+{
+ /* Initial call */
+ if (!entry)
+ return efi.memmap.map_end - efi.memmap.desc_size;
- if (!va) {
- pr_err("ioremap of 0x%llX failed!\n",
- (unsigned long long)md->phys_addr);
- continue;
- }
+ entry -= efi.memmap.desc_size;
+ if (entry < efi.memmap.map)
+ return NULL;
- systab = (u64) (unsigned long) efi_phys.systab;
- if (md->phys_addr <= systab && systab < end) {
- systab += md->virt_addr - md->phys_addr;
- efi.systab = (efi_system_table_t *) (unsigned long) systab;
- }
- new_memmap = krealloc(new_memmap,
- (count + 1) * memmap.desc_size,
- GFP_KERNEL);
- memcpy(new_memmap + (count * memmap.desc_size), md,
- memmap.desc_size);
- count++;
+ return entry;
+}
+
+/*
+ * efi_map_next_entry - Return the next EFI memory map descriptor
+ * @entry: Previous EFI memory map descriptor
+ *
+ * This is a helper function to iterate over the EFI memory map, which
+ * we do in different orders depending on the current configuration.
+ *
+ * To begin traversing the memory map @entry must be %NULL.
+ *
+ * Returns %NULL when we reach the end of the memory map.
+ */
+static void *efi_map_next_entry(void *entry)
+{
+ if (efi_enabled(EFI_64BIT)) {
+ /*
+ * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
+ * config table feature requires us to map all entries
+ * in the same order as they appear in the EFI memory
+ * map. That is to say, entry N must have a lower
+ * virtual address than entry N+1. This is because the
+ * firmware toolchain leaves relative references in
+ * the code/data sections, which are split and become
+ * separate EFI memory regions. Mapping things
+ * out-of-order leads to the firmware accessing
+ * unmapped addresses.
+ *
+ * Since we need to map things this way whether or not
+ * the kernel actually makes use of
+ * EFI_PROPERTIES_TABLE, let's just switch to this
+ * scheme by default for 64-bit.
+ */
+ return efi_map_next_entry_reverse(entry);
}
- BUG_ON(!efi.systab);
+ /* Initial call */
+ if (!entry)
+ return efi.memmap.map;
- status = phys_efi_set_virtual_address_map(
- memmap.desc_size * count,
- memmap.desc_size,
- memmap.desc_version,
- (efi_memory_desc_t *)__pa(new_memmap));
+ entry += efi.memmap.desc_size;
+ if (entry >= efi.memmap.map_end)
+ return NULL;
- if (status != EFI_SUCCESS) {
- pr_alert("Unable to switch EFI into virtual mode "
- "(status=%lx)!\n", status);
- panic("EFI call to SetVirtualAddressMap() failed!");
+ return entry;
+}
+
+static bool should_map_region(efi_memory_desc_t *md)
+{
+ /*
+ * Runtime regions always require runtime mappings (obviously).
+ */
+ if (md->attribute & EFI_MEMORY_RUNTIME)
+ return true;
+
+ /*
+ * 32-bit EFI doesn't suffer from the bug that requires us to
+ * reserve boot services regions, and mixed mode support
+ * doesn't exist for 32-bit kernels.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ return false;
+
+ /*
+ * EFI specific purpose memory may be reserved by default
+ * depending on kernel config and boot options.
+ */
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ return false;
+
+ /*
+ * Map all of RAM so that we can access arguments in the 1:1
+ * mapping when making EFI runtime calls.
+ */
+ if (efi_is_mixed()) {
+ if (md->type == EFI_CONVENTIONAL_MEMORY ||
+ md->type == EFI_LOADER_DATA ||
+ md->type == EFI_LOADER_CODE)
+ return true;
}
/*
- * Now that EFI is in virtual mode, update the function
- * pointers in the runtime service table to the new virtual addresses.
+ * Map boot services regions as a workaround for buggy
+ * firmware that accesses them even when they shouldn't.
*
- * Call EFI services through wrapper functions.
+ * See efi_{reserve,free}_boot_services().
*/
- efi.runtime_version = efi_systab.hdr.revision;
- efi.get_time = virt_efi_get_time;
- efi.set_time = virt_efi_set_time;
- efi.get_wakeup_time = virt_efi_get_wakeup_time;
- efi.set_wakeup_time = virt_efi_set_wakeup_time;
- efi.get_variable = virt_efi_get_variable;
- efi.get_next_variable = virt_efi_get_next_variable;
- efi.set_variable = virt_efi_set_variable;
- efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
- efi.reset_system = virt_efi_reset_system;
- efi.set_virtual_address_map = NULL;
- efi.query_variable_info = virt_efi_query_variable_info;
- efi.update_capsule = virt_efi_update_capsule;
- efi.query_capsule_caps = virt_efi_query_capsule_caps;
- if (__supported_pte_mask & _PAGE_NX)
- runtime_code_page_mkexec();
-
- kfree(new_memmap);
+ if (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA)
+ return true;
- /* clean DUMMY object */
- efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS,
- 0, NULL);
+ return false;
}
/*
- * Convenience functions to obtain memory types and attributes
+ * Map the efi memory ranges of the runtime services and update new_mmap with
+ * virtual addresses.
*/
-u32 efi_mem_type(unsigned long phys_addr)
+static void * __init efi_map_regions(int *count, int *pg_shift)
{
+ void *p, *new_memmap = NULL;
+ unsigned long left = 0;
+ unsigned long desc_size;
efi_memory_desc_t *md;
- void *p;
- if (!efi_enabled(EFI_MEMMAP))
- return 0;
+ desc_size = efi.memmap.desc_size;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ p = NULL;
+ while ((p = efi_map_next_entry(p))) {
md = p;
- if ((md->phys_addr <= phys_addr) &&
- (phys_addr < (md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT))))
- return md->type;
+
+ if (!should_map_region(md))
+ continue;
+
+ efi_map_region(md);
+
+ if (left < desc_size) {
+ new_memmap = realloc_pages(new_memmap, *pg_shift);
+ if (!new_memmap)
+ return NULL;
+
+ left += PAGE_SIZE << *pg_shift;
+ (*pg_shift)++;
+ }
+
+ memcpy(new_memmap + (*count * desc_size), md, desc_size);
+
+ left -= desc_size;
+ (*count)++;
}
- return 0;
+
+ return new_memmap;
}
-u64 efi_mem_attributes(unsigned long phys_addr)
+static void __init kexec_enter_virtual_mode(void)
{
+#ifdef CONFIG_KEXEC_CORE
efi_memory_desc_t *md;
- void *p;
+ unsigned int num_pages;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) &&
- (phys_addr < (md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT))))
- return md->attribute;
+ /*
+ * We don't do virtual mode, since we don't do runtime services, on
+ * non-native EFI.
+ */
+ if (efi_is_mixed()) {
+ efi_memmap_unmap();
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
}
- return 0;
+
+ if (efi_alloc_page_tables()) {
+ pr_err("Failed to allocate EFI page tables\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
+ }
+
+ /*
+ * Map efi regions which were passed via setup_data. The virt_addr is a
+ * fixed addr which was used in first kernel of a kexec boot.
+ */
+ for_each_efi_memory_desc(md)
+ efi_map_region_fixed(md); /* FIXME: add error handling */
+
+ /*
+ * Unregister the early EFI memmap from efi_init() and install
+ * the new EFI memory map.
+ */
+ efi_memmap_unmap();
+
+ if (efi_memmap_init_late(efi.memmap.phys_map,
+ efi.memmap.desc_size * efi.memmap.nr_map)) {
+ pr_err("Failed to remap late EFI memory map\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
+ }
+
+ num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
+ num_pages >>= PAGE_SHIFT;
+
+ if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return;
+ }
+
+ efi_sync_low_kernel_mappings();
+ efi_native_runtime_setup();
+ efi_runtime_update_mappings();
+#endif
}
/*
- * Some firmware has serious problems when using more than 50% of the EFI
- * variable store, i.e. it triggers bugs that can brick machines. Ensure that
- * we never use more than this safe limit.
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, we look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor into the
+ * efi_pgd page table.
+ *
+ * The new method does a pagetable switch in a preemption-safe manner
+ * so that we're in a different address space when calling a runtime
+ * function. For function arguments passing we do copy the PUDs of the
+ * kernel page table into efi_pgd prior to each call.
*
- * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
- * store.
+ * Specially for kexec boot, efi runtime maps in previous kernel should
+ * be passed in via setup_data. In that case runtime ranges will be mapped
+ * to the same virtual addresses as the first kernel, see
+ * kexec_enter_virtual_mode().
*/
-efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
+static void __init __efi_enter_virtual_mode(void)
{
+ int count = 0, pg_shift = 0;
+ void *new_memmap = NULL;
efi_status_t status;
- u64 storage_size, remaining_size, max_size;
+ unsigned long pa;
- if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
- return 0;
+ if (efi_alloc_page_tables()) {
+ pr_err("Failed to allocate EFI page tables\n");
+ goto err;
+ }
- status = efi.query_variable_info(attributes, &storage_size,
- &remaining_size, &max_size);
- if (status != EFI_SUCCESS)
- return status;
+ efi_merge_regions();
+ new_memmap = efi_map_regions(&count, &pg_shift);
+ if (!new_memmap) {
+ pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+ goto err;
+ }
+
+ pa = __pa(new_memmap);
/*
- * Some firmware implementations refuse to boot if there's insufficient
- * space in the variable store. We account for that by refusing the
- * write if permitting it would reduce the available space to under
- * 5KB. This figure was provided by Samsung, so should be safe.
+ * Unregister the early EFI memmap from efi_init() and install
+ * the new EFI memory map that we are about to pass to the
+ * firmware via SetVirtualAddressMap().
*/
- if ((remaining_size - size < EFI_MIN_RESERVE) &&
- !efi_no_storage_paranoia) {
+ efi_memmap_unmap();
- /*
- * Triggering garbage collection may require that the firmware
- * generate a real EFI_OUT_OF_RESOURCES error. We can force
- * that by attempting to use more space than is available.
- */
- unsigned long dummy_size = remaining_size + 1024;
- void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
+ if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
+ pr_err("Failed to remap late EFI memory map\n");
+ goto err;
+ }
- if (!dummy)
- return EFI_OUT_OF_RESOURCES;
+ if (efi_enabled(EFI_DBG)) {
+ pr_info("EFI runtime memory map:\n");
+ efi_print_memmap();
+ }
- status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS,
- dummy_size, dummy);
+ if (efi_setup_page_tables(pa, 1 << pg_shift))
+ goto err;
- if (status == EFI_SUCCESS) {
- /*
- * This should have failed, so if it didn't make sure
- * that we delete it...
- */
- efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS,
- 0, dummy);
- }
+ efi_sync_low_kernel_mappings();
- kfree(dummy);
+ status = efi_set_virtual_address_map(efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
+ (efi_memory_desc_t *)pa,
+ efi_systab_phys);
+ if (status != EFI_SUCCESS) {
+ pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n",
+ status);
+ goto err;
+ }
- /*
- * The runtime code may now have triggered a garbage collection
- * run, so check the variable info again
- */
- status = efi.query_variable_info(attributes, &storage_size,
- &remaining_size, &max_size);
+ efi_check_for_embedded_firmwares();
+ efi_free_boot_services();
- if (status != EFI_SUCCESS)
- return status;
+ if (!efi_is_mixed())
+ efi_native_runtime_setup();
+ else
+ efi_thunk_runtime_setup();
- /*
- * There still isn't enough room, so return an error
- */
- if (remaining_size - size < EFI_MIN_RESERVE)
- return EFI_OUT_OF_RESOURCES;
+ /*
+ * Apply more restrictive page table mapping attributes now that
+ * SVAM() has been called and the firmware has performed all
+ * necessary relocation fixups for the new virtual addresses.
+ */
+ efi_runtime_update_mappings();
+
+ /* clean DUMMY object */
+ efi_delete_dummy_variable();
+ return;
+
+err:
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+}
+
+void __init efi_enter_virtual_mode(void)
+{
+ if (efi_enabled(EFI_PARAVIRT))
+ return;
+
+ efi.runtime = (efi_runtime_services_t *)efi_runtime;
+
+ if (efi_setup)
+ kexec_enter_virtual_mode();
+ else
+ __efi_enter_virtual_mode();
+
+ efi_dump_pagetable();
+}
+
+bool efi_is_table_address(unsigned long phys_addr)
+{
+ unsigned int i;
+
+ if (phys_addr == EFI_INVALID_TABLE_ADDR)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+ if (*(efi_tables[i]) == phys_addr)
+ return true;
+
+ return false;
+}
+
+#define EFI_FIELD(var) efi_ ## var
+
+#define EFI_ATTR_SHOW(name) \
+static ssize_t name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \
+}
+
+EFI_ATTR_SHOW(fw_vendor);
+EFI_ATTR_SHOW(runtime);
+EFI_ATTR_SHOW(config_table);
+
+struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor);
+struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime);
+struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table);
+
+umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+ if (attr == &efi_attr_fw_vendor.attr) {
+ if (efi_enabled(EFI_PARAVIRT) ||
+ efi_fw_vendor == EFI_INVALID_TABLE_ADDR)
+ return 0;
+ } else if (attr == &efi_attr_runtime.attr) {
+ if (efi_runtime == EFI_INVALID_TABLE_ADDR)
+ return 0;
+ } else if (attr == &efi_attr_config_table.attr) {
+ if (efi_config_table == EFI_INVALID_TABLE_ADDR)
+ return 0;
}
+ return attr->mode;
+}
- return EFI_SUCCESS;
+enum efi_secureboot_mode __x86_ima_efi_boot_mode(void)
+{
+ return boot_params.secure_boot;
}
-EXPORT_SYMBOL_GPL(efi_query_variable_store);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e446941dd7..b2cc7b4552a1 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Extensible Firmware Interface
*
@@ -23,47 +24,131 @@
#include <linux/types.h>
#include <linux/ioport.h>
#include <linux/efi.h>
+#include <linux/pgtable.h>
#include <asm/io.h>
#include <asm/desc.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/efi.h>
+void __init efi_map_region(efi_memory_desc_t *md)
+{
+ u64 start_pfn, end_pfn, end;
+ unsigned long size;
+ void *va;
+
+ start_pfn = PFN_DOWN(md->phys_addr);
+ size = md->num_pages << PAGE_SHIFT;
+ end = md->phys_addr + size;
+ end_pfn = PFN_UP(end);
+
+ if (pfn_range_is_mapped(start_pfn, end_pfn)) {
+ va = __va(md->phys_addr);
+
+ if (!(md->attribute & EFI_MEMORY_WB))
+ set_memory_uc((unsigned long)va, md->num_pages);
+ } else {
+ va = ioremap_cache(md->phys_addr, size);
+ }
+
+ md->virt_addr = (unsigned long)va;
+ if (!va)
+ pr_err("ioremap of 0x%llX failed!\n", md->phys_addr);
+}
+
/*
* To make EFI call EFI runtime service in physical addressing mode we need
- * prelog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
*/
-static unsigned long efi_rt_eflags;
+int __init efi_alloc_page_tables(void)
+{
+ return 0;
+}
+
+void efi_sync_low_kernel_mappings(void) {}
-void efi_call_phys_prelog(void)
+void __init efi_dump_pagetable(void)
{
- struct desc_ptr gdt_descr;
+#ifdef CONFIG_EFI_PGT_DUMP
+ ptdump_walk_pgd_level(NULL, &init_mm);
+#endif
+}
+
+int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+ return 0;
+}
+
+void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
+void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
- local_irq_save(efi_rt_eflags);
+efi_status_t efi_call_svam(efi_runtime_services_t * const *,
+ u32, u32, u32, void *, u32);
+efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys)
+{
+ const efi_system_table_t *systab = (efi_system_table_t *)systab_phys;
+ struct desc_ptr gdt_descr;
+ efi_status_t status;
+ unsigned long flags;
+ pgd_t *save_pgd;
+
+ /* Current pgd is swapper_pg_dir, we'll restore it later: */
+ save_pgd = swapper_pg_dir;
load_cr3(initial_page_table);
__flush_tlb_all();
- gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.address = get_cpu_gdt_paddr(0);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
+
+ /* Disable interrupts around EFI calls: */
+ local_irq_save(flags);
+ status = efi_call_svam(&systab->runtime,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map,
+ __pa(&efi.runtime));
+ local_irq_restore(flags);
+
+ load_fixmap_gdt(0);
+ load_cr3(save_pgd);
+ __flush_tlb_all();
+
+ return status;
}
-void efi_call_phys_epilog(void)
+void __init efi_runtime_update_mappings(void)
{
- struct desc_ptr gdt_descr;
+ if (__supported_pte_mask & _PAGE_NX) {
+ efi_memory_desc_t *md;
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
+ /* Make EFI runtime service code area executable */
+ for_each_efi_memory_desc(md) {
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
- load_cr3(swapper_pg_dir);
- __flush_tlb_all();
+ set_memory_x(md->virt_addr, md->num_pages);
+ }
+ }
+}
- local_irq_restore(efi_rt_eflags);
+void arch_efi_call_virt_setup(void)
+{
+ efi_fpu_begin();
+ firmware_restrict_branch_speculation_start();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+ firmware_restrict_branch_speculation_end();
+ efi_fpu_end();
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 39a0e7f1f0a3..b4409df2105a 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* x86_64 specific EFI support functions
* Based on Extensible Firmware Interface Specification version 1.0
@@ -15,101 +16,825 @@
*
*/
+#define pr_fmt(fmt) "efi: " fmt
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/ioport.h>
-#include <linux/module.h>
+#include <linux/mc146818rtc.h>
#include <linux/efi.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/slab.h>
+#include <linux/ucs2_string.h>
+#include <linux/cc_platform.h>
+#include <linux/sched/task.h>
#include <asm/setup.h>
#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/pgtable.h>
+#include <asm/e820/api.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/efi.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
+#include <asm/realmode.h>
+#include <asm/time.h>
+#include <asm/pgalloc.h>
+#include <asm/sev.h>
+
+/*
+ * We allocate runtime services regions top-down, starting from -4G, i.e.
+ * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
+ */
+static u64 efi_va = EFI_VA_START;
+static struct mm_struct *efi_prev_mm;
+
+/*
+ * We need our own copy of the higher levels of the page tables
+ * because we want to avoid inserting EFI region mappings (EFI_VA_END
+ * to EFI_VA_START) into the standard kernel page tables. Everything
+ * else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
+ */
+int __init efi_alloc_page_tables(void)
+{
+ pgd_t *pgd, *efi_pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ gfp_t gfp_mask;
+
+ gfp_mask = GFP_KERNEL | __GFP_ZERO;
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order());
+ if (!efi_pgd)
+ goto fail;
+
+ pgd = efi_pgd + pgd_index(EFI_VA_END);
+ p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
+ if (!p4d)
+ goto free_pgd;
+
+ pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
+ if (!pud)
+ goto free_p4d;
+
+ efi_mm.pgd = efi_pgd;
+ mm_init_cpumask(&efi_mm);
+ init_new_context(NULL, &efi_mm);
+ set_notrack_mm(&efi_mm);
+
+ return 0;
+
+free_p4d:
+ if (pgtable_l5_enabled())
+ free_page((unsigned long)pgd_page_vaddr(*pgd));
+free_pgd:
+ free_pages((unsigned long)efi_pgd, pgd_allocation_order());
+fail:
+ return -ENOMEM;
+}
+
+/*
+ * Add low kernel mappings for passing arguments to EFI functions.
+ */
+void efi_sync_low_kernel_mappings(void)
+{
+ unsigned num_entries;
+ pgd_t *pgd_k, *pgd_efi;
+ p4d_t *p4d_k, *p4d_efi;
+ pud_t *pud_k, *pud_efi;
+ pgd_t *efi_pgd = efi_mm.pgd;
+
+ pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
+ pgd_k = pgd_offset_k(PAGE_OFFSET);
+
+ num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
+ memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
+
+ pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
+ pgd_k = pgd_offset_k(EFI_VA_END);
+ p4d_efi = p4d_offset(pgd_efi, 0);
+ p4d_k = p4d_offset(pgd_k, 0);
+
+ num_entries = p4d_index(EFI_VA_END);
+ memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
+
+ /*
+ * We share all the PUD entries apart from those that map the
+ * EFI regions. Copy around them.
+ */
+ BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
+ BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
+
+ p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
+ p4d_k = p4d_offset(pgd_k, EFI_VA_END);
+ pud_efi = pud_offset(p4d_efi, 0);
+ pud_k = pud_offset(p4d_k, 0);
+
+ num_entries = pud_index(EFI_VA_END);
+ memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
+
+ pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+ pud_k = pud_offset(p4d_k, EFI_VA_START);
+
+ num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
+ memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
+}
+
+/*
+ * Wrapper for slow_virt_to_phys() that handles NULL addresses.
+ */
+static inline phys_addr_t
+virt_to_phys_or_null_size(void *va, unsigned long size)
+{
+ phys_addr_t pa;
+
+ if (!va)
+ return 0;
+
+ if (virt_addr_valid(va))
+ return virt_to_phys(va);
+
+ pa = slow_virt_to_phys(va);
+
+ /* check if the object crosses a page boundary */
+ if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK))
+ return 0;
+
+ return pa;
+}
+
+#define virt_to_phys_or_null(addr) \
+ virt_to_phys_or_null_size((addr), sizeof(*(addr)))
+
+int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+ extern const u8 __efi64_thunk_ret_tramp[];
+ unsigned long pfn, text, pf, rodata, tramp;
+ struct page *page;
+ unsigned npages;
+ pgd_t *pgd = efi_mm.pgd;
+
+ /*
+ * It can happen that the physical address of new_memmap lands in memory
+ * which is not mapped in the EFI page table. Therefore we need to go
+ * and ident-map those pages containing the map before calling
+ * phys_efi_set_virtual_address_map().
+ */
+ pfn = pa_memmap >> PAGE_SHIFT;
+ pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
+ pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
+ return 1;
+ }
+
+ /*
+ * Certain firmware versions are way too sentimental and still believe
+ * they are exclusive and unquestionable owners of the first physical page,
+ * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY
+ * (but then write-access it later during SetVirtualAddressMap()).
+ *
+ * Create a 1:1 mapping for this page, to avoid triple faults during early
+ * boot with such firmware. We are free to hand this page to the BIOS,
+ * as trim_bios_range() will reserve the first page and isolate it away
+ * from memory allocators anyway.
+ */
+ if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) {
+ pr_err("Failed to create 1:1 mapping for the first page!\n");
+ return 1;
+ }
+
+ /*
+ * When SEV-ES is active, the GHCB as set by the kernel will be used
+ * by firmware. Create a 1:1 unencrypted mapping for each GHCB.
+ */
+ if (sev_es_efi_map_ghcbs_cas(pgd)) {
+ pr_err("Failed to create 1:1 mapping for the GHCBs and CAs!\n");
+ return 1;
+ }
+
+ /*
+ * When making calls to the firmware everything needs to be 1:1
+ * mapped and addressable with 32-bit pointers. Map the kernel
+ * text and allocate a new stack because we can't rely on the
+ * stack pointer being < 4GB.
+ */
+ if (!efi_is_mixed())
+ return 0;
+
+ page = alloc_page(GFP_KERNEL|__GFP_DMA32);
+ if (!page) {
+ pr_err("Unable to allocate EFI runtime stack < 4GB\n");
+ return 1;
+ }
+
+ efi_mixed_mode_stack_pa = page_to_phys(page + 1); /* stack grows down */
+
+ npages = (_etext - _text) >> PAGE_SHIFT;
+ text = __pa(_text);
+
+ if (kernel_unmap_pages_in_pgd(pgd, text, npages)) {
+ pr_err("Failed to unmap kernel text 1:1 mapping\n");
+ return 1;
+ }
+
+ npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT;
+ rodata = __pa(__start_rodata);
+ pfn = rodata >> PAGE_SHIFT;
+
+ pf = _PAGE_NX | _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) {
+ pr_err("Failed to map kernel rodata 1:1\n");
+ return 1;
+ }
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
+ tramp = __pa(__efi64_thunk_ret_tramp);
+ pfn = tramp >> PAGE_SHIFT;
-static void __init early_code_mapping_set_exec(int executable)
+ pf = _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) {
+ pr_err("Failed to map mixed mode return trampoline\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
- efi_memory_desc_t *md;
- void *p;
+ unsigned long flags = _PAGE_RW;
+ unsigned long pfn;
+ pgd_t *pgd = efi_mm.pgd;
+
+ /*
+ * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF
+ * executable images in memory that consist of both R-X and
+ * RW- sections, so we cannot apply read-only or non-exec
+ * permissions just yet. However, modern EFI systems provide
+ * a memory attributes table that describes those sections
+ * with the appropriate restricted permissions, which are
+ * applied in efi_runtime_update_mappings() below. All other
+ * regions can be mapped non-executable at this point, with
+ * the exception of boot services code regions, but those will
+ * be unmapped again entirely in efi_free_boot_services().
+ */
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_CODE)
+ flags |= _PAGE_NX;
+
+ if (!(md->attribute & EFI_MEMORY_WB))
+ flags |= _PAGE_PCD;
- if (!(__supported_pte_mask & _PAGE_NX))
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+ md->type != EFI_MEMORY_MAPPED_IO)
+ flags |= _PAGE_ENC;
+
+ pfn = md->phys_addr >> PAGE_SHIFT;
+ if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
+ pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
+ md->phys_addr, va);
+}
+
+void __init efi_map_region(efi_memory_desc_t *md)
+{
+ unsigned long size = md->num_pages << PAGE_SHIFT;
+ u64 pa = md->phys_addr;
+
+ /*
+ * Make sure the 1:1 mappings are present as a catch-all for b0rked
+ * firmware which doesn't update all internal pointers after switching
+ * to virtual mode and would otherwise crap on us.
+ */
+ __map_region(md, md->phys_addr);
+
+ /*
+ * Enforce the 1:1 mapping as the default virtual address when
+ * booting in EFI mixed mode, because even though we may be
+ * running a 64-bit kernel, the firmware may only be 32-bit.
+ */
+ if (efi_is_mixed()) {
+ md->virt_addr = md->phys_addr;
return;
+ }
+
+ efi_va -= size;
- /* Make EFI service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if (md->type == EFI_RUNTIME_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_CODE)
- efi_set_executable(md, executable);
+ /* Is PA 2M-aligned? */
+ if (!(pa & (PMD_SIZE - 1))) {
+ efi_va &= PMD_MASK;
+ } else {
+ u64 pa_offset = pa & (PMD_SIZE - 1);
+ u64 prev_va = efi_va;
+
+ /* get us the same offset within this 2M page */
+ efi_va = (efi_va & PMD_MASK) + pa_offset;
+
+ if (efi_va > prev_va)
+ efi_va -= PMD_SIZE;
}
+
+ if (efi_va < EFI_VA_END) {
+ pr_warn(FW_WARN "VA address range overflow!\n");
+ return;
+ }
+
+ /* Do the VA map */
+ __map_region(md, efi_va);
+ md->virt_addr = efi_va;
+}
+
+/*
+ * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges.
+ * md->virt_addr is the original virtual address which had been mapped in kexec
+ * 1st kernel.
+ */
+void __init efi_map_region_fixed(efi_memory_desc_t *md)
+{
+ __map_region(md, md->phys_addr);
+ __map_region(md, md->virt_addr);
+}
+
+void __init parse_efi_setup(u64 phys_addr, u32 data_len)
+{
+ efi_setup = phys_addr + sizeof(struct setup_data);
+}
+
+static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf)
+{
+ unsigned long pfn;
+ pgd_t *pgd = efi_mm.pgd;
+ int err1, err2;
+
+ /* Update the 1:1 mapping */
+ pfn = md->phys_addr >> PAGE_SHIFT;
+ err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf);
+ if (err1) {
+ pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n",
+ md->phys_addr, md->virt_addr);
+ }
+
+ err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf);
+ if (err2) {
+ pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n",
+ md->phys_addr, md->virt_addr);
+ }
+
+ return err1 || err2;
}
-void __init efi_call_phys_prelog(void)
+bool efi_disable_ibt_for_runtime __ro_after_init = true;
+
+static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md,
+ bool has_ibt)
{
- unsigned long vaddress;
- int pgd;
- int n_pgds;
+ unsigned long pf = 0;
+
+ efi_disable_ibt_for_runtime |= !has_ibt;
- early_code_mapping_set_exec(1);
- local_irq_save(efi_flags);
+ if (md->attribute & EFI_MEMORY_XP)
+ pf |= _PAGE_NX;
- n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
- save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+ if (!(md->attribute & EFI_MEMORY_RO))
+ pf |= _PAGE_RW;
- for (pgd = 0; pgd < n_pgds; pgd++) {
- save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
- vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
- set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ pf |= _PAGE_ENC;
+
+ return efi_update_mappings(md, pf);
+}
+
+void __init efi_runtime_update_mappings(void)
+{
+ if (efi_enabled(EFI_MEM_ATTR)) {
+ efi_disable_ibt_for_runtime = false;
+ efi_memattr_apply_permissions(NULL, efi_update_mem_attr);
}
- __flush_tlb_all();
}
-void __init efi_call_phys_epilog(void)
+void __init efi_dump_pagetable(void)
+{
+#ifdef CONFIG_EFI_PGT_DUMP
+ ptdump_walk_pgd_level(NULL, &efi_mm);
+#endif
+}
+
+/*
+ * Makes the calling thread switch to/from efi_mm context. Can be used
+ * in a kernel thread and user context. Preemption needs to remain disabled
+ * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm
+ * can not change under us.
+ * It should be ensured that there are no concurrent calls to this function.
+ */
+static void efi_enter_mm(void)
+{
+ efi_prev_mm = use_temporary_mm(&efi_mm);
+}
+
+static void efi_leave_mm(void)
+{
+ unuse_temporary_mm(efi_prev_mm);
+}
+
+void arch_efi_call_virt_setup(void)
+{
+ efi_sync_low_kernel_mappings();
+ efi_fpu_begin();
+ firmware_restrict_branch_speculation_start();
+ efi_enter_mm();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+ efi_leave_mm();
+ firmware_restrict_branch_speculation_end();
+ efi_fpu_end();
+}
+
+static DEFINE_SPINLOCK(efi_runtime_lock);
+
+/*
+ * DS and ES contain user values. We need to save them.
+ * The 32-bit EFI code needs a valid DS, ES, and SS. There's no
+ * need to save the old SS: __KERNEL_DS is always acceptable.
+ */
+#define __efi_thunk(func, ...) \
+({ \
+ unsigned short __ds, __es; \
+ efi_status_t ____s; \
+ \
+ savesegment(ds, __ds); \
+ savesegment(es, __es); \
+ \
+ loadsegment(ss, __KERNEL_DS); \
+ loadsegment(ds, __KERNEL_DS); \
+ loadsegment(es, __KERNEL_DS); \
+ \
+ ____s = efi64_thunk(efi.runtime->mixed_mode.func, __VA_ARGS__); \
+ \
+ loadsegment(ds, __ds); \
+ loadsegment(es, __es); \
+ \
+ ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \
+ ____s; \
+})
+
+/*
+ * Switch to the EFI page tables early so that we can access the 1:1
+ * runtime services mappings which are not mapped in any other page
+ * tables.
+ *
+ * Also, disable interrupts because the IDT points to 64-bit handlers,
+ * which aren't going to function correctly when we switch to 32-bit.
+ */
+#define efi_thunk(func...) \
+({ \
+ efi_status_t __s; \
+ \
+ arch_efi_call_virt_setup(); \
+ \
+ __s = __efi_thunk(func); \
+ \
+ arch_efi_call_virt_teardown(); \
+ \
+ __s; \
+})
+
+static efi_status_t __init __no_sanitize_address
+efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ efi_status_t status;
+ unsigned long flags;
+
+ efi_sync_low_kernel_mappings();
+ local_irq_save(flags);
+
+ efi_enter_mm();
+
+ status = __efi_thunk(set_virtual_address_map, memory_map_size,
+ descriptor_size, descriptor_version, virtual_map);
+
+ efi_leave_mm();
+ local_irq_restore(flags);
+
+ return status;
+}
+
+static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static efi_status_t efi_thunk_set_time(efi_time_t *tm)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static efi_status_t
+efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
+ efi_time_t *tm)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static efi_status_t
+efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static unsigned long efi_name_size(efi_char16_t *name)
+{
+ return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1;
+}
+
+static efi_status_t
+efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
+ u32 *attr, unsigned long *data_size, void *data)
+{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
+ efi_status_t status;
+ u32 phys_name, phys_vendor, phys_attr;
+ u32 phys_data_size, phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ *vnd = *vendor;
+
+ phys_data_size = virt_to_phys_or_null(data_size);
+ phys_vendor = virt_to_phys_or_null(vnd);
+ phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+ phys_attr = virt_to_phys_or_null(attr);
+ phys_data = virt_to_phys_or_null_size(data, *data_size);
+
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_variable, phys_name, phys_vendor,
+ phys_attr, phys_data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size, void *data)
+{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
+ u32 phys_name, phys_vendor, phys_data;
+ efi_status_t status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ *vnd = *vendor;
+
+ phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+ phys_vendor = virt_to_phys_or_null(vnd);
+ phys_data = virt_to_phys_or_null_size(data, data_size);
+
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size,
+ void *data)
+{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
+ u32 phys_name, phys_vendor, phys_data;
+ efi_status_t status;
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
+
+ *vnd = *vendor;
+
+ phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+ phys_vendor = virt_to_phys_or_null(vnd);
+ phys_data = virt_to_phys_or_null_size(data, data_size);
+
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_get_next_variable(unsigned long *name_size,
+ efi_char16_t *name,
+ efi_guid_t *vendor)
+{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
+ efi_status_t status;
+ u32 phys_name_size, phys_name, phys_vendor;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ *vnd = *vendor;
+
+ phys_name_size = virt_to_phys_or_null(name_size);
+ phys_vendor = virt_to_phys_or_null(vnd);
+ phys_name = virt_to_phys_or_null_size(name, *name_size);
+
+ if (!phys_name)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_next_variable, phys_name_size,
+ phys_name, phys_vendor);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ *vendor = *vnd;
+ return status;
+}
+
+static efi_status_t
+efi_thunk_get_next_high_mono_count(u32 *count)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static void
+efi_thunk_reset_system(int reset_type, efi_status_t status,
+ unsigned long data_size, efi_char16_t *data)
+{
+ u32 phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_data = virt_to_phys_or_null_size(data, data_size);
+
+ efi_thunk(reset_system, reset_type, status, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+}
+
+static efi_status_t
+efi_thunk_update_capsule(efi_capsule_header_t **capsules,
+ unsigned long count, unsigned long sg_list)
{
/*
- * After the lock is released, the original page table is restored.
+ * To properly support this function we would need to repackage
+ * 'capsules' because the firmware doesn't understand 64-bit
+ * pointers.
*/
- int pgd;
- int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
- for (pgd = 0; pgd < n_pgds; pgd++)
- set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
- kfree(save_pgd);
- __flush_tlb_all();
- local_irq_restore(efi_flags);
- early_code_mapping_set_exec(0);
+ return EFI_UNSUPPORTED;
}
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
- u32 type, u64 attribute)
+static efi_status_t
+efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
{
- unsigned long last_map_pfn;
+ efi_status_t status;
+ u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
- if (type == EFI_MEMORY_MAPPED_IO)
- return ioremap(phys_addr, size);
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
- last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
- if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
- unsigned long top = last_map_pfn << PAGE_SHIFT;
- efi_ioremap(top, size - (top - phys_addr), type, attribute);
- }
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_storage = virt_to_phys_or_null(storage_space);
+ phys_remaining = virt_to_phys_or_null(remaining_space);
+ phys_max = virt_to_phys_or_null(max_variable_size);
+
+ status = efi_thunk(query_variable_info, attr, phys_storage,
+ phys_remaining, phys_max);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
+{
+ efi_status_t status;
+ u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
+
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
+
+ phys_storage = virt_to_phys_or_null(storage_space);
+ phys_remaining = virt_to_phys_or_null(remaining_space);
+ phys_max = virt_to_phys_or_null(max_variable_size);
+
+ status = efi_thunk(query_variable_info, attr, phys_storage,
+ phys_remaining, phys_max);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules,
+ unsigned long count, u64 *max_size,
+ int *reset_type)
+{
+ /*
+ * To properly support this function we would need to repackage
+ * 'capsules' because the firmware doesn't understand 64-bit
+ * pointers.
+ */
+ return EFI_UNSUPPORTED;
+}
+
+void __init efi_thunk_runtime_setup(void)
+{
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return;
+
+ efi.get_time = efi_thunk_get_time;
+ efi.set_time = efi_thunk_set_time;
+ efi.get_wakeup_time = efi_thunk_get_wakeup_time;
+ efi.set_wakeup_time = efi_thunk_set_wakeup_time;
+ efi.get_variable = efi_thunk_get_variable;
+ efi.get_next_variable = efi_thunk_get_next_variable;
+ efi.set_variable = efi_thunk_set_variable;
+ efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking;
+ efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count;
+ efi.reset_system = efi_thunk_reset_system;
+ efi.query_variable_info = efi_thunk_query_variable_info;
+ efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking;
+ efi.update_capsule = efi_thunk_update_capsule;
+ efi.query_capsule_caps = efi_thunk_query_capsule_caps;
+}
+
+efi_status_t __init __no_sanitize_address
+efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map,
+ unsigned long systab_phys)
+{
+ const efi_system_table_t *systab = (efi_system_table_t *)systab_phys;
+ efi_status_t status;
+ unsigned long flags;
+
+ if (efi_is_mixed())
+ return efi_thunk_set_virtual_address_map(memory_map_size,
+ descriptor_size,
+ descriptor_version,
+ virtual_map);
+ efi_enter_mm();
+
+ efi_fpu_begin();
+
+ /* Disable interrupts around EFI calls: */
+ local_irq_save(flags);
+ status = arch_efi_call_virt(efi.runtime, set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ local_irq_restore(flags);
+
+ efi_fpu_end();
+
+ /* grab the virtually remapped EFI runtime services table pointer */
+ efi.runtime = READ_ONCE(systab->runtime);
- if (!(attribute & EFI_MEMORY_WB))
- efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+ efi_leave_mm();
- return (void __iomem *)__va(phys_addr);
+ return status;
}
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..f3cfdb1c9a35 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* EFI call stub for IA32.
*
@@ -6,118 +7,54 @@
*/
#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
#include <asm/page_types.h>
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
+ __INIT
+SYM_FUNC_START(efi_call_svam)
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
+ push 16(%esp)
+ push 16(%esp)
+ push %ecx
+ push %edx
+ movl %eax, %ebx // &systab_phys->runtime
-.text
-ENTRY(efi_call_phys)
/*
- * 0. The function can only be called in Linux kernel. So CS has been
- * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
- * the values of these registers are the same. And, the corresponding
- * GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prelog and epilog.
- */
-
- /*
- * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
- * But to make it smoothly switch from virtual mode to flat mode.
- * The mapping of lower virtual memory has been created in prelog and
- * epilog.
+ * Switch to the flat mapped alias of this routine, by jumping to the
+ * address of label '1' after subtracting PAGE_OFFSET from it.
*/
movl $1f, %edx
subl $__PAGE_OFFSET, %edx
jmp *%edx
1:
- /*
- * 2. Now on the top of stack is the return
- * address in the caller of efi_call_phys(), then parameter 1,
- * parameter 2, ..., param n. To make things easy, we save the return
- * address of efi_call_phys in a global variable.
- */
- popl %edx
- movl %edx, saved_return_addr
- /* get the function pointer into ECX*/
- popl %ecx
- movl %ecx, efi_rt_function_ptr
- movl $2f, %edx
- subl $__PAGE_OFFSET, %edx
- pushl %edx
-
- /*
- * 3. Clear PG bit in %CR0.
- */
+ /* disable paging */
movl %cr0, %edx
andl $0x7fffffff, %edx
movl %edx, %cr0
- jmp 1f
-1:
- /*
- * 4. Adjust stack pointer.
- */
+ /* convert the stack pointer to a flat mapped address */
subl $__PAGE_OFFSET, %esp
- /*
- * 5. Call the physical function.
- */
- jmp *%ecx
+ /* call the EFI routine */
+ movl (%eax), %eax
+ call *EFI_svam(%eax)
-2:
- /*
- * 6. After EFI runtime service returns, control will return to
- * following instruction. We'd better readjust stack pointer first.
- */
- addl $__PAGE_OFFSET, %esp
+ /* grab the virtually remapped EFI runtime services table pointer */
+ movl (%ebx), %ecx
+ movl 36(%esp), %edx // &efi.runtime
+ movl %ecx, (%edx)
- /*
- * 7. Restore PG bit
- */
+ /* re-enable paging */
movl %cr0, %edx
orl $0x80000000, %edx
movl %edx, %cr0
- jmp 1f
-1:
- /*
- * 8. Now restore the virtual mode from flat mode by
- * adding EIP with PAGE_OFFSET.
- */
- movl $1f, %edx
- jmp *%edx
-1:
-
- /*
- * 9. Balance the stack. And because EAX contain the return value,
- * we'd better not clobber it.
- */
- leal efi_rt_function_ptr, %edx
- movl (%edx), %ecx
- pushl %ecx
-
- /*
- * 10. Push the saved return address onto the stack and return.
- */
- leal saved_return_addr, %edx
- movl (%edx), %ecx
- pushl %ecx
- ret
-ENDPROC(efi_call_phys)
-.previous
-.data
-saved_return_addr:
- .long 0
-efi_rt_function_ptr:
- .long 0
+ movl 16(%esp), %ebx
+ leave
+ RET
+SYM_FUNC_END(efi_call_svam)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 4c07ccab8146..f0a5fba0717e 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Function calling ABI conversion from Linux to EFI for x86_64
*
@@ -7,110 +8,24 @@
*/
#include <linux/linkage.h>
-
-#define SAVE_XMM \
- mov %rsp, %rax; \
- subq $0x70, %rsp; \
- and $~0xf, %rsp; \
- mov %rax, (%rsp); \
- mov %cr0, %rax; \
- clts; \
- mov %rax, 0x8(%rsp); \
- movaps %xmm0, 0x60(%rsp); \
- movaps %xmm1, 0x50(%rsp); \
- movaps %xmm2, 0x40(%rsp); \
- movaps %xmm3, 0x30(%rsp); \
- movaps %xmm4, 0x20(%rsp); \
- movaps %xmm5, 0x10(%rsp)
-
-#define RESTORE_XMM \
- movaps 0x60(%rsp), %xmm0; \
- movaps 0x50(%rsp), %xmm1; \
- movaps 0x40(%rsp), %xmm2; \
- movaps 0x30(%rsp), %xmm3; \
- movaps 0x20(%rsp), %xmm4; \
- movaps 0x10(%rsp), %xmm5; \
- mov 0x8(%rsp), %rsi; \
- mov %rsi, %cr0; \
- mov (%rsp), %rsp
-
-ENTRY(efi_call0)
- SAVE_XMM
- subq $32, %rsp
- call *%rdi
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call0)
-
-ENTRY(efi_call1)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- call *%rdi
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call1)
-
-ENTRY(efi_call2)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- call *%rdi
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call2)
-
-ENTRY(efi_call3)
- SAVE_XMM
- subq $32, %rsp
- mov %rcx, %r8
- mov %rsi, %rcx
- call *%rdi
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call3)
-
-ENTRY(efi_call4)
- SAVE_XMM
- subq $32, %rsp
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- call *%rdi
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call4)
-
-ENTRY(efi_call5)
- SAVE_XMM
- subq $48, %rsp
- mov %r9, 32(%rsp)
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- call *%rdi
- addq $48, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call5)
-
-ENTRY(efi_call6)
- SAVE_XMM
- mov (%rsp), %rax
- mov 8(%rax), %rax
+#include <asm/nospec-branch.h>
+
+SYM_FUNC_START(__efi_call)
+ /*
+ * The EFI code doesn't have any CFI, annotate away the CFI violation.
+ */
+ ANNOTATE_NOCFI_SYM
+ pushq %rbp
+ movq %rsp, %rbp
+ and $~0xf, %rsp
+ mov 16(%rbp), %rax
subq $48, %rsp
mov %r9, 32(%rsp)
mov %rax, 40(%rsp)
mov %r8, %r9
mov %rcx, %r8
mov %rsi, %rcx
- call *%rdi
- addq $48, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call6)
+ CALL_NOSPEC rdi
+ leave
+ RET
+SYM_FUNC_END(__efi_call)
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
new file mode 100644
index 000000000000..c4b1144f99f6
--- /dev/null
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014 Intel Corporation; author Matt Fleming
+ *
+ * Support for invoking 32-bit EFI runtime services from a 64-bit
+ * kernel.
+ *
+ * The below thunking functions are only used after ExitBootServices()
+ * has been called. This simplifies things considerably as compared with
+ * the early EFI thunking because we can leave all the kernel state
+ * intact (GDT, IDT, etc) and simply invoke the 32-bit EFI runtime
+ * services from __KERNEL32_CS. This means we can continue to service
+ * interrupts across an EFI mixed mode call.
+ *
+ * We do however, need to handle the fact that we're running in a full
+ * 64-bit virtual address space. Things like the stack and instruction
+ * addresses need to be accessible by the 32-bit firmware, so we rely on
+ * using the identity mappings in the EFI page table to access the stack
+ * and kernel text (see efi_setup_page_tables()).
+ */
+
+#include <linux/linkage.h>
+#include <linux/objtool.h>
+#include <asm/page_types.h>
+#include <asm/segment.h>
+
+ .text
+ .code64
+SYM_FUNC_START(__efi64_thunk)
+STACK_FRAME_NON_STANDARD __efi64_thunk
+ push %rbp
+ push %rbx
+
+ /*
+ * Switch to 1:1 mapped 32-bit stack pointer.
+ */
+ movq %rsp, %rax
+ movq efi_mixed_mode_stack_pa(%rip), %rsp
+ push %rax
+
+ /*
+ * Copy args passed via the stack
+ */
+ subq $0x24, %rsp
+ movq 0x18(%rax), %rbp
+ movq 0x20(%rax), %rbx
+ movq 0x28(%rax), %rax
+ movl %ebp, 0x18(%rsp)
+ movl %ebx, 0x1c(%rsp)
+ movl %eax, 0x20(%rsp)
+
+ /*
+ * Calculate the physical address of the kernel text.
+ */
+ movq $__START_KERNEL_map, %rax
+ subq phys_base(%rip), %rax
+
+ leaq 1f(%rip), %rbp
+ leaq 2f(%rip), %rbx
+ subq %rax, %rbp
+ subq %rax, %rbx
+
+ movl %ebx, 0x0(%rsp) /* return address */
+ movl %esi, 0x4(%rsp)
+ movl %edx, 0x8(%rsp)
+ movl %ecx, 0xc(%rsp)
+ movl %r8d, 0x10(%rsp)
+ movl %r9d, 0x14(%rsp)
+
+ /* Switch to 32-bit descriptor */
+ pushq $__KERNEL32_CS
+ pushq %rdi /* EFI runtime service address */
+ lretq
+
+ // This return instruction is not needed for correctness, as it will
+ // never be reached. It only exists to make objtool happy, which will
+ // otherwise complain about unreachable instructions in the callers.
+ RET
+SYM_FUNC_END(__efi64_thunk)
+
+ .section ".rodata", "a", @progbits
+ .balign 16
+SYM_DATA_START(__efi64_thunk_ret_tramp)
+1: movq 0x20(%rsp), %rsp
+ pop %rbx
+ pop %rbp
+ ret
+ int3
+
+ .code32
+2: pushl $__KERNEL_CS
+ pushl %ebp
+ lret
+SYM_DATA_END(__efi64_thunk_ret_tramp)
+
+ .bss
+ .balign 8
+SYM_DATA(efi_mixed_mode_stack_pa, .quad 0)
diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c
new file mode 100644
index 000000000000..023697c88910
--- /dev/null
+++ b/arch/x86/platform/efi/memmap.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common EFI memory map functions.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <asm/early_ioremap.h>
+#include <asm/efi.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
+{
+ return memblock_phys_alloc(size, SMP_CACHE_BYTES);
+}
+
+static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
+{
+ unsigned int order = get_order(size);
+ struct page *p = alloc_pages(GFP_KERNEL, order);
+
+ if (!p)
+ return 0;
+
+ return PFN_PHYS(page_to_pfn(p));
+}
+
+static
+void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
+{
+ if (flags & EFI_MEMMAP_MEMBLOCK) {
+ if (slab_is_available())
+ memblock_free_late(phys, size);
+ else
+ memblock_phys_free(phys, size);
+ } else if (flags & EFI_MEMMAP_SLAB) {
+ struct page *p = pfn_to_page(PHYS_PFN(phys));
+ unsigned int order = get_order(size);
+
+ __free_pages(p, order);
+ }
+}
+
+/**
+ * efi_memmap_alloc - Allocate memory for the EFI memory map
+ * @num_entries: Number of entries in the allocated map.
+ * @data: efi memmap installation parameters
+ *
+ * Depending on whether mm_init() has already been invoked or not,
+ * either memblock or "normal" page allocation is used.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_alloc(unsigned int num_entries,
+ struct efi_memory_map_data *data)
+{
+ /* Expect allocation parameters are zero initialized */
+ WARN_ON(data->phys_map || data->size);
+
+ data->size = num_entries * efi.memmap.desc_size;
+ data->desc_version = efi.memmap.desc_version;
+ data->desc_size = efi.memmap.desc_size;
+ data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK);
+ data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE;
+
+ if (slab_is_available()) {
+ data->flags |= EFI_MEMMAP_SLAB;
+ data->phys_map = __efi_memmap_alloc_late(data->size);
+ } else {
+ data->flags |= EFI_MEMMAP_MEMBLOCK;
+ data->phys_map = __efi_memmap_alloc_early(data->size);
+ }
+
+ if (!data->phys_map)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * efi_memmap_install - Install a new EFI memory map in efi.memmap
+ * @data: efi memmap installation parameters
+ *
+ * Unlike efi_memmap_init_*(), this function does not allow the caller
+ * to switch from early to late mappings. It simply uses the existing
+ * mapping function and installs the new memmap.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_install(struct efi_memory_map_data *data)
+{
+ unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map;
+ unsigned long flags = efi.memmap.flags;
+ u64 phys = efi.memmap.phys_map;
+ int ret;
+
+ efi_memmap_unmap();
+
+ if (efi_enabled(EFI_PARAVIRT))
+ return 0;
+
+ ret = __efi_memmap_init(data);
+ if (ret)
+ return ret;
+
+ __efi_memmap_free(phys, size, flags);
+ return 0;
+}
+
+/**
+ * efi_memmap_split_count - Count number of additional EFI memmap entries
+ * @md: EFI memory descriptor to split
+ * @range: Address range (start, end) to split around
+ *
+ * Returns the number of additional EFI memmap entries required to
+ * accommodate @range.
+ */
+int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
+{
+ u64 m_start, m_end;
+ u64 start, end;
+ int count = 0;
+
+ start = md->phys_addr;
+ end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+ /* modifying range */
+ m_start = range->start;
+ m_end = range->end;
+
+ if (m_start <= start) {
+ /* split into 2 parts */
+ if (start < m_end && m_end < end)
+ count++;
+ }
+
+ if (start < m_start && m_start < end) {
+ /* split into 3 parts */
+ if (m_end < end)
+ count += 2;
+ /* split into 2 parts */
+ if (end <= m_end)
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * efi_memmap_insert - Insert a memory region in an EFI memmap
+ * @old_memmap: The existing EFI memory map structure
+ * @buf: Address of buffer to store new map
+ * @mem: Memory map entry to insert
+ *
+ * It is suggested that you call efi_memmap_split_count() first
+ * to see how large @buf needs to be.
+ */
+void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf,
+ struct efi_mem_range *mem)
+{
+ u64 m_start, m_end, m_attr;
+ efi_memory_desc_t *md;
+ u64 start, end;
+ void *old, *new;
+
+ /* modifying range */
+ m_start = mem->range.start;
+ m_end = mem->range.end;
+ m_attr = mem->attribute;
+
+ /*
+ * The EFI memory map deals with regions in EFI_PAGE_SIZE
+ * units. Ensure that the region described by 'mem' is aligned
+ * correctly.
+ */
+ if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) ||
+ !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) {
+ WARN_ON(1);
+ return;
+ }
+
+ for (old = old_memmap->map, new = buf;
+ old < old_memmap->map_end;
+ old += old_memmap->desc_size, new += old_memmap->desc_size) {
+
+ /* copy original EFI memory descriptor */
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ start = md->phys_addr;
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+ if (m_start <= start && end <= m_end)
+ md->attribute |= m_attr;
+
+ if (m_start <= start &&
+ (start < m_end && m_end < end)) {
+ /* first part */
+ md->attribute |= m_attr;
+ md->num_pages = (m_end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ /* latter part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_end + 1;
+ md->num_pages = (end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ }
+
+ if ((start < m_start && m_start < end) && m_end < end) {
+ /* first part */
+ md->num_pages = (m_start - md->phys_addr) >>
+ EFI_PAGE_SHIFT;
+ /* middle part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->attribute |= m_attr;
+ md->phys_addr = m_start;
+ md->num_pages = (m_end - m_start + 1) >>
+ EFI_PAGE_SHIFT;
+ /* last part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_end + 1;
+ md->num_pages = (end - m_end) >>
+ EFI_PAGE_SHIFT;
+ }
+
+ if ((start < m_start && m_start < end) &&
+ (end <= m_end)) {
+ /* first part */
+ md->num_pages = (m_start - md->phys_addr) >>
+ EFI_PAGE_SHIFT;
+ /* latter part */
+ new += old_memmap->desc_size;
+ memcpy(new, old, old_memmap->desc_size);
+ md = new;
+ md->phys_addr = m_start;
+ md->num_pages = (end - md->phys_addr + 1) >>
+ EFI_PAGE_SHIFT;
+ md->attribute |= m_attr;
+ }
+ }
+}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
new file mode 100644
index 000000000000..553f330198f2
--- /dev/null
+++ b/arch/x86/platform/efi/quirks.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+
+#include <asm/e820/api.h>
+#include <asm/efi.h>
+#include <asm/uv/uv.h>
+#include <asm/cpu_device_id.h>
+#include <asm/realmode.h>
+#include <asm/reboot.h>
+
+#define EFI_MIN_RESERVE 5120
+
+#define EFI_DUMMY_GUID \
+ EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+
+#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */
+#define QUARK_SECURITY_HEADER_SIZE 0x400
+
+/*
+ * Header prepended to the standard EFI capsule on Quark systems the are based
+ * on Intel firmware BSP.
+ * @csh_signature: Unique identifier to sanity check signed module
+ * presence ("_CSH").
+ * @version: Current version of CSH used. Should be one for Quark A0.
+ * @modulesize: Size of the entire module including the module header
+ * and payload.
+ * @security_version_number_index: Index of SVN to use for validation of signed
+ * module.
+ * @security_version_number: Used to prevent against roll back of modules.
+ * @rsvd_module_id: Currently unused for Clanton (Quark).
+ * @rsvd_module_vendor: Vendor Identifier. For Intel products value is
+ * 0x00008086.
+ * @rsvd_date: BCD representation of build date as yyyymmdd, where
+ * yyyy=4 digit year, mm=1-12, dd=1-31.
+ * @headersize: Total length of the header including including any
+ * padding optionally added by the signing tool.
+ * @hash_algo: What Hash is used in the module signing.
+ * @cryp_algo: What Crypto is used in the module signing.
+ * @keysize: Total length of the key data including including any
+ * padding optionally added by the signing tool.
+ * @signaturesize: Total length of the signature including including any
+ * padding optionally added by the signing tool.
+ * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the
+ * chain, if there is a next header.
+ * @rsvd: Reserved, padding structure to required size.
+ *
+ * See also QuartSecurityHeader_t in
+ * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h
+ * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP
+ */
+struct quark_security_header {
+ u32 csh_signature;
+ u32 version;
+ u32 modulesize;
+ u32 security_version_number_index;
+ u32 security_version_number;
+ u32 rsvd_module_id;
+ u32 rsvd_module_vendor;
+ u32 rsvd_date;
+ u32 headersize;
+ u32 hash_algo;
+ u32 cryp_algo;
+ u32 keysize;
+ u32 signaturesize;
+ u32 rsvd_next_header;
+ u32 rsvd[2];
+};
+
+static const efi_char16_t efi_dummy_name[] = L"DUMMY";
+
+static bool efi_no_storage_paranoia;
+
+/*
+ * Some firmware implementations refuse to boot if there's insufficient
+ * space in the variable store. The implementation of garbage collection
+ * in some FW versions causes stale (deleted) variables to take up space
+ * longer than intended and space is only freed once the store becomes
+ * almost completely full.
+ *
+ * Enabling this option disables the space checks in
+ * efi_query_variable_store() and forces garbage collection.
+ *
+ * Only enable this option if deleting EFI variables does not free up
+ * space in your variable store, e.g. if despite deleting variables
+ * you're unable to create new ones.
+ */
+static int __init setup_storage_paranoia(char *arg)
+{
+ efi_no_storage_paranoia = true;
+ return 0;
+}
+early_param("efi_no_storage_paranoia", setup_storage_paranoia);
+
+/*
+ * Deleting the dummy variable which kicks off garbage collection
+*/
+void efi_delete_dummy_variable(void)
+{
+ efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL);
+}
+
+u64 efivar_reserved_space(void)
+{
+ if (efi_no_storage_paranoia)
+ return 0;
+ return EFI_MIN_RESERVE;
+}
+EXPORT_SYMBOL_GPL(efivar_reserved_space);
+
+/*
+ * In the nonblocking case we do not attempt to perform garbage
+ * collection if we do not have enough free space. Rather, we do the
+ * bare minimum check and give up immediately if the available space
+ * is below EFI_MIN_RESERVE.
+ *
+ * This function is intended to be small and simple because it is
+ * invoked from crash handler paths.
+ */
+static efi_status_t
+query_variable_store_nonblocking(u32 attributes, unsigned long size)
+{
+ efi_status_t status;
+ u64 storage_size, remaining_size, max_size;
+
+ status = efi.query_variable_info_nonblocking(attributes, &storage_size,
+ &remaining_size,
+ &max_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ if (remaining_size - size < EFI_MIN_RESERVE)
+ return EFI_OUT_OF_RESOURCES;
+
+ return EFI_SUCCESS;
+}
+
+/*
+ * Some firmware implementations refuse to boot if there's insufficient space
+ * in the variable store. Ensure that we never use more than a safe limit.
+ *
+ * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
+ * store.
+ */
+efi_status_t efi_query_variable_store(u32 attributes, unsigned long size,
+ bool nonblocking)
+{
+ efi_status_t status;
+ u64 storage_size, remaining_size, max_size;
+
+ if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
+ return 0;
+
+ if (nonblocking)
+ return query_variable_store_nonblocking(attributes, size);
+
+ status = efi.query_variable_info(attributes, &storage_size,
+ &remaining_size, &max_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /*
+ * We account for that by refusing the write if permitting it would
+ * reduce the available space to under 5KB. This figure was provided by
+ * Samsung, so should be safe.
+ */
+ if ((remaining_size - size < EFI_MIN_RESERVE) &&
+ !efi_no_storage_paranoia) {
+
+ /*
+ * Triggering garbage collection may require that the firmware
+ * generate a real EFI_OUT_OF_RESOURCES error. We can force
+ * that by attempting to use more space than is available.
+ */
+ unsigned long dummy_size = remaining_size + 1024;
+ void *dummy = kzalloc(dummy_size, GFP_KERNEL);
+
+ if (!dummy)
+ return EFI_OUT_OF_RESOURCES;
+
+ status = efi.set_variable((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS,
+ dummy_size, dummy);
+
+ if (status == EFI_SUCCESS) {
+ /*
+ * This should have failed, so if it didn't make sure
+ * that we delete it...
+ */
+ efi_delete_dummy_variable();
+ }
+
+ kfree(dummy);
+
+ /*
+ * The runtime code may now have triggered a garbage collection
+ * run, so check the variable info again
+ */
+ status = efi.query_variable_info(attributes, &storage_size,
+ &remaining_size, &max_size);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /*
+ * There still isn't enough room, so return an error
+ */
+ if (remaining_size - size < EFI_MIN_RESERVE)
+ return EFI_OUT_OF_RESOURCES;
+ }
+
+ return EFI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(efi_query_variable_store);
+
+/*
+ * The UEFI specification makes it clear that the operating system is
+ * free to do whatever it wants with boot services code after
+ * ExitBootServices() has been called. Ignoring this recommendation a
+ * significant bunch of EFI implementations continue calling into boot
+ * services code (SetVirtualAddressMap). In order to work around such
+ * buggy implementations we reserve boot services region during EFI
+ * init and make sure it stays executable. Then, after
+ * SetVirtualAddressMap(), it is discarded.
+ *
+ * However, some boot services regions contain data that is required
+ * by drivers, so we need to track which memory ranges can never be
+ * freed. This is done by tagging those regions with the
+ * EFI_MEMORY_RUNTIME attribute.
+ *
+ * Any driver that wants to mark a region as reserved must use
+ * efi_mem_reserve() which will insert a new EFI memory descriptor
+ * into efi.memmap (splitting existing regions if necessary) and tag
+ * it with EFI_MEMORY_RUNTIME.
+ */
+void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+{
+ struct efi_memory_map_data data = { 0 };
+ struct efi_mem_range mr;
+ efi_memory_desc_t md;
+ int num_entries;
+ void *new;
+
+ if (efi_mem_desc_lookup(addr, &md) ||
+ md.type != EFI_BOOT_SERVICES_DATA) {
+ pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
+ return;
+ }
+
+ if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) {
+ pr_err("Region spans EFI memory descriptors, %pa\n", &addr);
+ return;
+ }
+
+ size += addr % EFI_PAGE_SIZE;
+ size = round_up(size, EFI_PAGE_SIZE);
+ addr = round_down(addr, EFI_PAGE_SIZE);
+
+ mr.range.start = addr;
+ mr.range.end = addr + size - 1;
+ mr.attribute = md.attribute | EFI_MEMORY_RUNTIME;
+
+ num_entries = efi_memmap_split_count(&md, &mr.range);
+ num_entries += efi.memmap.nr_map;
+
+ if (efi_memmap_alloc(num_entries, &data) != 0) {
+ pr_err("Could not allocate boot services memmap\n");
+ return;
+ }
+
+ new = early_memremap_prot(data.phys_map, data.size,
+ pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL)));
+ if (!new) {
+ pr_err("Failed to map new boot services memmap\n");
+ return;
+ }
+
+ efi_memmap_insert(&efi.memmap, new, &mr);
+ early_memunmap(new, data.size);
+
+ efi_memmap_install(&data);
+ e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__update_table(e820_table);
+}
+
+/*
+ * Helper function for efi_reserve_boot_services() to figure out if we
+ * can free regions in efi_free_boot_services().
+ *
+ * Use this function to ensure we do not free regions owned by somebody
+ * else. We must only reserve (and then free) regions:
+ *
+ * - Not within any part of the kernel
+ * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc)
+ */
+static __init bool can_free_region(u64 start, u64 size)
+{
+ if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
+ return false;
+
+ if (!e820__mapped_all(start, start+size, E820_TYPE_RAM))
+ return false;
+
+ return true;
+}
+
+void __init efi_reserve_boot_services(void)
+{
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
+ for_each_efi_memory_desc(md) {
+ u64 start = md->phys_addr;
+ u64 size = md->num_pages << EFI_PAGE_SHIFT;
+ bool already_reserved;
+
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+ continue;
+
+ already_reserved = memblock_is_region_reserved(start, size);
+
+ /*
+ * Because the following memblock_reserve() is paired
+ * with memblock_free_late() for this region in
+ * efi_free_boot_services(), we must be extremely
+ * careful not to reserve, and subsequently free,
+ * critical regions of memory (like the kernel image) or
+ * those regions that somebody else has already
+ * reserved.
+ *
+ * A good example of a critical region that must not be
+ * freed is page zero (first 4Kb of memory), which may
+ * contain boot services code/data but is marked
+ * E820_TYPE_RESERVED by trim_bios_range().
+ */
+ if (!already_reserved) {
+ memblock_reserve(start, size);
+
+ /*
+ * If we are the first to reserve the region, no
+ * one else cares about it. We own it and can
+ * free it later.
+ */
+ if (can_free_region(start, size))
+ continue;
+ }
+
+ /*
+ * We don't own the region. We must not free it.
+ *
+ * Setting this bit for a boot services region really
+ * doesn't make sense as far as the firmware is
+ * concerned, but it does provide us with a way to tag
+ * those regions that must not be paired with
+ * memblock_free_late().
+ */
+ md->attribute |= EFI_MEMORY_RUNTIME;
+ }
+}
+
+/*
+ * Apart from having VA mappings for EFI boot services code/data regions,
+ * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So,
+ * unmap both 1:1 and VA mappings.
+ */
+static void __init efi_unmap_pages(efi_memory_desc_t *md)
+{
+ pgd_t *pgd = efi_mm.pgd;
+ u64 pa = md->phys_addr;
+ u64 va = md->virt_addr;
+
+ /*
+ * EFI mixed mode has all RAM mapped to access arguments while making
+ * EFI runtime calls, hence don't unmap EFI boot services code/data
+ * regions.
+ */
+ if (efi_is_mixed())
+ return;
+
+ if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages))
+ pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa);
+
+ if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages))
+ pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
+}
+
+void __init efi_free_boot_services(void)
+{
+ struct efi_memory_map_data data = { 0 };
+ efi_memory_desc_t *md;
+ int num_entries = 0;
+ void *new, *new_md;
+
+ /* Keep all regions for /sys/kernel/debug/efi */
+ if (efi_enabled(EFI_DBG))
+ return;
+
+ for_each_efi_memory_desc(md) {
+ unsigned long long start = md->phys_addr;
+ unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+ size_t rm_size;
+
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA) {
+ num_entries++;
+ continue;
+ }
+
+ /* Do not free, someone else owns it: */
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ num_entries++;
+ continue;
+ }
+
+ /*
+ * Before calling set_virtual_address_map(), EFI boot services
+ * code/data regions were mapped as a quirk for buggy firmware.
+ * Unmap them from efi_pgd before freeing them up.
+ */
+ efi_unmap_pages(md);
+
+ /*
+ * Nasty quirk: if all sub-1MB memory is used for boot
+ * services, we can get here without having allocated the
+ * real mode trampoline. It's too late to hand boot services
+ * memory back to the memblock allocator, so instead
+ * try to manually allocate the trampoline if needed.
+ *
+ * I've seen this on a Dell XPS 13 9350 with firmware
+ * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+ * grub2-efi on a hard disk. (And no, I don't know why
+ * this happened, but Linux should still try to boot rather
+ * panicking early.)
+ */
+ rm_size = real_mode_size_needed();
+ if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+ set_real_mode_mem(start);
+ start += rm_size;
+ size -= rm_size;
+ }
+
+ /*
+ * Don't free memory under 1M for two reasons:
+ * - BIOS might clobber it
+ * - Crash kernel needs it to be reserved
+ */
+ if (start + size < SZ_1M)
+ continue;
+ if (start < SZ_1M) {
+ size -= (SZ_1M - start);
+ start = SZ_1M;
+ }
+
+ memblock_free_late(start, size);
+ }
+
+ if (!num_entries)
+ return;
+
+ if (efi_memmap_alloc(num_entries, &data) != 0) {
+ pr_err("Failed to allocate new EFI memmap\n");
+ return;
+ }
+
+ new = memremap(data.phys_map, data.size, MEMREMAP_WB);
+ if (!new) {
+ pr_err("Failed to map new EFI memmap\n");
+ return;
+ }
+
+ /*
+ * Build a new EFI memmap that excludes any boot services
+ * regions that are not tagged EFI_MEMORY_RUNTIME, since those
+ * regions have now been freed.
+ */
+ new_md = new;
+ for_each_efi_memory_desc(md) {
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+ (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA))
+ continue;
+
+ memcpy(new_md, md, efi.memmap.desc_size);
+ new_md += efi.memmap.desc_size;
+ }
+
+ memunmap(new);
+
+ if (efi_memmap_install(&data) != 0) {
+ pr_err("Could not install new EFI memmap\n");
+ return;
+ }
+}
+
+/*
+ * A number of config table entries get remapped to virtual addresses
+ * after entering EFI virtual mode. However, the kexec kernel requires
+ * their physical addresses therefore we pass them via setup_data and
+ * correct those entries to their respective physical addresses here.
+ *
+ * Currently only handles smbios which is necessary for some firmware
+ * implementation.
+ */
+int __init efi_reuse_config(u64 tables, int nr_tables)
+{
+ int i, sz, ret = 0;
+ void *p, *tablep;
+ struct efi_setup_data *data;
+
+ if (nr_tables == 0)
+ return 0;
+
+ if (!efi_setup)
+ return 0;
+
+ if (!efi_enabled(EFI_64BIT))
+ return 0;
+
+ data = early_memremap(efi_setup, sizeof(*data));
+ if (!data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!data->smbios)
+ goto out_memremap;
+
+ sz = sizeof(efi_config_table_64_t);
+
+ p = tablep = early_memremap(tables, nr_tables * sz);
+ if (!p) {
+ pr_err("Could not map Configuration table!\n");
+ ret = -ENOMEM;
+ goto out_memremap;
+ }
+
+ for (i = 0; i < nr_tables; i++) {
+ efi_guid_t guid;
+
+ guid = ((efi_config_table_64_t *)p)->guid;
+
+ if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
+ ((efi_config_table_64_t *)p)->table = data->smbios;
+
+ /* Do not bother to play with mem attr table across kexec */
+ if (!efi_guidcmp(guid, EFI_MEMORY_ATTRIBUTES_TABLE_GUID))
+ ((efi_config_table_64_t *)p)->table = EFI_INVALID_TABLE_ADDR;
+
+ p += sz;
+ }
+ early_memunmap(tablep, nr_tables * sz);
+
+out_memremap:
+ early_memunmap(data, sizeof(*data));
+out:
+ return ret;
+}
+
+void __init efi_apply_memmap_quirks(void)
+{
+ /*
+ * Once setup is done earlier, unmap the EFI memory map on mismatched
+ * firmware/kernel architectures since there is no support for runtime
+ * services.
+ */
+ if (!efi_runtime_supported()) {
+ pr_info("Setup done, disabling due to 32/64-bit mismatch\n");
+ efi_memmap_unmap();
+ }
+}
+
+/*
+ * For most modern platforms the preferred method of powering off is via
+ * ACPI. However, there are some that are known to require the use of
+ * EFI runtime services and for which ACPI does not work at all.
+ *
+ * Using EFI is a last resort, to be used only if no other option
+ * exists.
+ */
+bool efi_reboot_required(void)
+{
+ if (!acpi_gbl_reduced_hardware)
+ return false;
+
+ efi_reboot_quirk_mode = EFI_RESET_WARM;
+ return true;
+}
+
+bool efi_poweroff_required(void)
+{
+ return acpi_gbl_reduced_hardware || acpi_no_s5;
+}
+
+#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH
+
+static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
+ size_t hdr_bytes)
+{
+ struct quark_security_header *csh = *pkbuff;
+
+ /* Only process data block that is larger than the security header */
+ if (hdr_bytes < sizeof(struct quark_security_header))
+ return 0;
+
+ if (csh->csh_signature != QUARK_CSH_SIGNATURE ||
+ csh->headersize != QUARK_SECURITY_HEADER_SIZE)
+ return 1;
+
+ /* Only process data block if EFI header is included */
+ if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE +
+ sizeof(efi_capsule_header_t))
+ return 0;
+
+ pr_debug("Quark security header detected\n");
+
+ if (csh->rsvd_next_header != 0) {
+ pr_err("multiple Quark security headers not supported\n");
+ return -EINVAL;
+ }
+
+ *pkbuff += csh->headersize;
+ cap_info->total_size = csh->headersize;
+
+ /*
+ * Update the first page pointer to skip over the CSH header.
+ */
+ cap_info->phys[0] += csh->headersize;
+
+ /*
+ * cap_info->capsule should point at a virtual mapping of the entire
+ * capsule, starting at the capsule header. Our image has the Quark
+ * security header prepended, so we cannot rely on the default vmap()
+ * mapping created by the generic capsule code.
+ * Given that the Quark firmware does not appear to care about the
+ * virtual mapping, let's just point cap_info->capsule at our copy
+ * of the capsule header.
+ */
+ cap_info->capsule = &cap_info->header;
+
+ return 1;
+}
+
+static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
+ X86_MATCH_VFM(INTEL_QUARK_X1000, &qrk_capsule_setup_info),
+ { }
+};
+
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+ size_t hdr_bytes)
+{
+ int (*quirk_handler)(struct capsule_info *, void **, size_t);
+ const struct x86_cpu_id *id;
+ int ret;
+
+ if (hdr_bytes < sizeof(efi_capsule_header_t))
+ return 0;
+
+ cap_info->total_size = 0;
+
+ id = x86_match_cpu(efi_capsule_quirk_ids);
+ if (id) {
+ /*
+ * The quirk handler is supposed to return
+ * - a value > 0 if the setup should continue, after advancing
+ * kbuff as needed
+ * - 0 if not enough hdr_bytes are available yet
+ * - a negative error code otherwise
+ */
+ quirk_handler = (typeof(quirk_handler))id->driver_data;
+ ret = quirk_handler(cap_info, &kbuff, hdr_bytes);
+ if (ret <= 0)
+ return ret;
+ }
+
+ memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+
+ cap_info->total_size += cap_info->header.imagesize;
+
+ return __efi_capsule_setup_info(cap_info);
+}
+
+#endif
+
+/*
+ * If any access by any efi runtime service causes a page fault, then,
+ * 1. If it's efi_reset_system(), reboot through BIOS.
+ * 2. If any other efi runtime service, then
+ * a. Return error status to the efi caller process.
+ * b. Disable EFI Runtime Services forever and
+ * c. Freeze efi_rts_wq and schedule new process.
+ *
+ * @return: Returns, if the page fault is not handled. This function
+ * will never return if the page fault is handled successfully.
+ */
+void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
+{
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /*
+ * If we get an interrupt/NMI while processing an EFI runtime service
+ * then this is a regular OOPS, not an EFI failure.
+ */
+ if (in_interrupt())
+ return;
+
+ /*
+ * Make sure that an efi runtime service caused the page fault.
+ * READ_ONCE() because we might be OOPSing in a different thread,
+ * and we don't want to trip KTSAN while trying to OOPS.
+ */
+ if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE ||
+ current_work() != &efi_rts_work.work)
+ return;
+
+ /*
+ * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
+ * page faulting on these addresses isn't expected.
+ */
+ if (phys_addr <= 0x0fff)
+ return;
+
+ /*
+ * Print stack trace as it might be useful to know which EFI Runtime
+ * Service is buggy.
+ */
+ WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n",
+ phys_addr);
+
+ /*
+ * Buggy efi_reset_system() is handled differently from other EFI
+ * Runtime Services as it doesn't use efi_rts_wq. Although,
+ * native_machine_emergency_restart() says that machine_real_restart()
+ * could fail, it's better not to complicate this fault handler
+ * because this case occurs *very* rarely and hence could be improved
+ * on a need by basis.
+ */
+ if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) {
+ pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
+ machine_real_restart(MRR_BIOS);
+ return;
+ }
+
+ /*
+ * Before calling EFI Runtime Service, the kernel has switched the
+ * calling process to efi_mm. Hence, switch back to task_mm.
+ */
+ arch_efi_call_virt_teardown();
+
+ /* Signal error status to the efi caller process */
+ efi_rts_work.status = EFI_ABORTED;
+ complete(&efi_rts_work.efi_rts_comp);
+
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n");
+
+ /*
+ * Call schedule() in an infinite loop, so that any spurious wake ups
+ * will never run efi_rts_wq again.
+ */
+ for (;;) {
+ set_current_state(TASK_IDLE);
+ schedule();
+ }
+}
diff --git a/arch/x86/platform/efi/runtime-map.c b/arch/x86/platform/efi/runtime-map.c
new file mode 100644
index 000000000000..a6f02cef3ca2
--- /dev/null
+++ b/arch/x86/platform/efi/runtime-map.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com>
+ */
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+
+#include <asm/efi.h>
+#include <asm/setup.h>
+
+struct efi_runtime_map_entry {
+ efi_memory_desc_t md;
+ struct kobject kobj; /* kobject for each entry */
+};
+
+static struct efi_runtime_map_entry **map_entries;
+
+struct map_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf);
+};
+
+static inline struct map_attribute *to_map_attr(struct attribute *attr)
+{
+ return container_of(attr, struct map_attribute, attr);
+}
+
+static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type);
+}
+
+#define EFI_RUNTIME_FIELD(var) entry->md.var
+
+#define EFI_RUNTIME_U64_ATTR_SHOW(name) \
+static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \
+{ \
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \
+}
+
+EFI_RUNTIME_U64_ATTR_SHOW(phys_addr);
+EFI_RUNTIME_U64_ATTR_SHOW(virt_addr);
+EFI_RUNTIME_U64_ATTR_SHOW(num_pages);
+EFI_RUNTIME_U64_ATTR_SHOW(attribute);
+
+static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj)
+{
+ return container_of(kobj, struct efi_runtime_map_entry, kobj);
+}
+
+static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct efi_runtime_map_entry *entry = to_map_entry(kobj);
+ struct map_attribute *map_attr = to_map_attr(attr);
+
+ return map_attr->show(entry, buf);
+}
+
+static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400);
+static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400);
+static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400);
+static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400);
+static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400);
+
+/*
+ * These are default attributes that are added for every memmap entry.
+ */
+static struct attribute *def_attrs[] = {
+ &map_type_attr.attr,
+ &map_phys_addr_attr.attr,
+ &map_virt_addr_attr.attr,
+ &map_num_pages_attr.attr,
+ &map_attribute_attr.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(def);
+
+static const struct sysfs_ops map_attr_ops = {
+ .show = map_attr_show,
+};
+
+static void map_release(struct kobject *kobj)
+{
+ struct efi_runtime_map_entry *entry;
+
+ entry = to_map_entry(kobj);
+ kfree(entry);
+}
+
+static const struct kobj_type __refconst map_ktype = {
+ .sysfs_ops = &map_attr_ops,
+ .default_groups = def_groups,
+ .release = map_release,
+};
+
+static struct kset *map_kset;
+
+static struct efi_runtime_map_entry *
+add_sysfs_runtime_map_entry(struct kobject *kobj, int nr,
+ efi_memory_desc_t *md)
+{
+ int ret;
+ struct efi_runtime_map_entry *entry;
+
+ if (!map_kset) {
+ map_kset = kset_create_and_add("runtime-map", NULL, kobj);
+ if (!map_kset)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ kset_unregister(map_kset);
+ map_kset = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(&entry->md, md, sizeof(efi_memory_desc_t));
+
+ kobject_init(&entry->kobj, &map_ktype);
+ entry->kobj.kset = map_kset;
+ ret = kobject_add(&entry->kobj, NULL, "%d", nr);
+ if (ret) {
+ kobject_put(&entry->kobj);
+ kset_unregister(map_kset);
+ map_kset = NULL;
+ return ERR_PTR(ret);
+ }
+
+ return entry;
+}
+
+int efi_get_runtime_map_size(void)
+{
+ return efi.memmap.nr_map * efi.memmap.desc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+ return efi.memmap.desc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ size_t sz = efi_get_runtime_map_size();
+
+ if (sz > bufsz)
+ sz = bufsz;
+
+ memcpy(buf, efi.memmap.map, sz);
+ return 0;
+}
+
+static int __init efi_runtime_map_init(void)
+{
+ int i, j, ret = 0;
+ struct efi_runtime_map_entry *entry;
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP) || !efi_kobj)
+ return 0;
+
+ map_entries = kcalloc(efi.memmap.nr_map, sizeof(entry), GFP_KERNEL);
+ if (!map_entries) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ i = 0;
+ for_each_efi_memory_desc(md) {
+ entry = add_sysfs_runtime_map_entry(efi_kobj, i, md);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry);
+ goto out_add_entry;
+ }
+ *(map_entries + i++) = entry;
+ }
+
+ return 0;
+out_add_entry:
+ for (j = i - 1; j >= 0; j--) {
+ entry = *(map_entries + j);
+ kobject_put(&entry->kobj);
+ }
+out:
+ return ret;
+}
+subsys_initcall_sync(efi_runtime_map_init);
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 5b51194f4c8d..34b53e97a0ad 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_GEODE_COMMON) += geode-common.o
obj-$(CONFIG_ALIX) += alix.o
obj-$(CONFIG_NET5501) += net5501.o
obj-$(CONFIG_GEOS) += geos.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 90e23e7679a5..be65cd704e21 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for PCEngines ALIX.
* At the moment this means setup of GPIO control of LEDs
* on Alix.2/3/6 boards.
*
- *
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
* and Philip Prindeville <philipp@redfish-solutions.com>
@@ -11,102 +11,43 @@
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
* In the future leds-net5501.c should be migrated over to platform
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/leds.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/input.h>
-#include <linux/gpio_keys.h>
+#include <linux/moduleparam.h>
#include <linux/dmi.h>
#include <asm/geode.h>
+#include "geode-common.h"
+
#define BIOS_SIGNATURE_TINYBIOS 0xf0000
#define BIOS_SIGNATURE_COREBOOT 0x500
#define BIOS_REGION_SIZE 0x10000
+/*
+ * This driver is not modular, but to keep back compatibility
+ * with existing use cases, continuing with module_param is
+ * the easiest way forward.
+ */
static bool force = 0;
module_param(force, bool, 0444);
/* FIXME: Award bios is not automatically detected as Alix platform */
MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
-static struct gpio_keys_button alix_gpio_buttons[] = {
- {
- .code = KEY_RESTART,
- .gpio = 24,
- .active_low = 1,
- .desc = "Reset button",
- .type = EV_KEY,
- .wakeup = 0,
- .debounce_interval = 100,
- .can_disable = 0,
- }
-};
-static struct gpio_keys_platform_data alix_buttons_data = {
- .buttons = alix_gpio_buttons,
- .nbuttons = ARRAY_SIZE(alix_gpio_buttons),
- .poll_interval = 20,
-};
-
-static struct platform_device alix_buttons_dev = {
- .name = "gpio-keys-polled",
- .id = 1,
- .dev = {
- .platform_data = &alix_buttons_data,
- }
-};
-
-static struct gpio_led alix_leds[] = {
- {
- .name = "alix:1",
- .gpio = 6,
- .default_trigger = "default-on",
- .active_low = 1,
- },
- {
- .name = "alix:2",
- .gpio = 25,
- .default_trigger = "default-off",
- .active_low = 1,
- },
- {
- .name = "alix:3",
- .gpio = 27,
- .default_trigger = "default-off",
- .active_low = 1,
- },
-};
-
-static struct gpio_led_platform_data alix_leds_data = {
- .num_leds = ARRAY_SIZE(alix_leds),
- .leds = alix_leds,
-};
-
-static struct platform_device alix_leds_dev = {
- .name = "leds-gpio",
- .id = -1,
- .dev.platform_data = &alix_leds_data,
-};
-
-static struct __initdata platform_device *alix_devs[] = {
- &alix_buttons_dev,
- &alix_leds_dev,
+static const struct geode_led alix_leds[] __initconst = {
+ { 6, true },
+ { 25, false },
+ { 27, false },
};
static void __init register_alix(void)
{
- /* Setup LED control through leds-gpio driver */
- platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
+ geode_create_restart_key(24);
+ geode_create_leds("alix", alix_leds, ARRAY_SIZE(alix_leds));
}
static bool __init alix_present(unsigned long bios_phys,
@@ -192,9 +133,4 @@ static int __init alix_init(void)
return 0;
}
-
-module_init(alix_init);
-
-MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
-MODULE_DESCRIPTION("PCEngines ALIX System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(alix_init);
diff --git a/arch/x86/platform/geode/geode-common.c b/arch/x86/platform/geode/geode-common.c
new file mode 100644
index 000000000000..8fd78e60bf15
--- /dev/null
+++ b/arch/x86/platform/geode/geode-common.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared helpers to register GPIO-connected buttons and LEDs
+ * on AMD Geode boards.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio/machine.h>
+#include <linux/gpio/property.h>
+#include <linux/input.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "geode-common.h"
+
+static const struct software_node geode_gpiochip_node = {
+ .name = "cs5535-gpio",
+};
+
+static const struct property_entry geode_gpio_keys_props[] = {
+ PROPERTY_ENTRY_U32("poll-interval", 20),
+ { }
+};
+
+static const struct software_node geode_gpio_keys_node = {
+ .name = "geode-gpio-keys",
+ .properties = geode_gpio_keys_props,
+};
+
+static struct property_entry geode_restart_key_props[] = {
+ { /* Placeholder for GPIO property */ },
+ PROPERTY_ENTRY_U32("linux,code", KEY_RESTART),
+ PROPERTY_ENTRY_STRING("label", "Reset button"),
+ PROPERTY_ENTRY_U32("debounce-interval", 100),
+ { }
+};
+
+static const struct software_node geode_restart_key_node = {
+ .parent = &geode_gpio_keys_node,
+ .properties = geode_restart_key_props,
+};
+
+static const struct software_node *geode_gpio_keys_swnodes[] __initconst = {
+ &geode_gpiochip_node,
+ &geode_gpio_keys_node,
+ &geode_restart_key_node,
+ NULL
+};
+
+/*
+ * Creates gpio-keys-polled device for the restart key.
+ *
+ * Note that it needs to be called first, before geode_create_leds(),
+ * because it registers gpiochip software node used by both gpio-keys and
+ * leds-gpio devices.
+ */
+int __init geode_create_restart_key(unsigned int pin)
+{
+ struct platform_device_info keys_info = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ };
+ struct platform_device *pd;
+ int err;
+
+ geode_restart_key_props[0] = PROPERTY_ENTRY_GPIO("gpios",
+ &geode_gpiochip_node,
+ pin, GPIO_ACTIVE_LOW);
+
+ err = software_node_register_node_group(geode_gpio_keys_swnodes);
+ if (err) {
+ pr_err("failed to register gpio-keys software nodes: %d\n", err);
+ return err;
+ }
+
+ keys_info.fwnode = software_node_fwnode(&geode_gpio_keys_node);
+
+ pd = platform_device_register_full(&keys_info);
+ err = PTR_ERR_OR_ZERO(pd);
+ if (err) {
+ pr_err("failed to create gpio-keys device: %d\n", err);
+ software_node_unregister_node_group(geode_gpio_keys_swnodes);
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct software_node geode_gpio_leds_node = {
+ .name = "geode-leds",
+};
+
+#define MAX_LEDS 3
+
+int __init geode_create_leds(const char *label, const struct geode_led *leds,
+ unsigned int n_leds)
+{
+ const struct software_node *group[MAX_LEDS + 2] = { 0 };
+ struct software_node *swnodes;
+ struct property_entry *props;
+ struct platform_device_info led_info = {
+ .name = "leds-gpio",
+ .id = PLATFORM_DEVID_NONE,
+ };
+ struct platform_device *led_dev;
+ const char *node_name;
+ int err;
+ int i;
+
+ if (n_leds > MAX_LEDS) {
+ pr_err("%s: too many LEDs\n", __func__);
+ return -EINVAL;
+ }
+
+ swnodes = kcalloc(n_leds, sizeof(*swnodes), GFP_KERNEL);
+ if (!swnodes)
+ return -ENOMEM;
+
+ /*
+ * Each LED is represented by 3 properties: "gpios",
+ * "linux,default-trigger", and am empty terminator.
+ */
+ props = kcalloc(n_leds * 3, sizeof(*props), GFP_KERNEL);
+ if (!props) {
+ err = -ENOMEM;
+ goto err_free_swnodes;
+ }
+
+ group[0] = &geode_gpio_leds_node;
+ for (i = 0; i < n_leds; i++) {
+ node_name = kasprintf(GFP_KERNEL, "%s:%d", label, i);
+ if (!node_name) {
+ err = -ENOMEM;
+ goto err_free_names;
+ }
+
+ props[i * 3 + 0] =
+ PROPERTY_ENTRY_GPIO("gpios", &geode_gpiochip_node,
+ leds[i].pin, GPIO_ACTIVE_LOW);
+ props[i * 3 + 1] =
+ PROPERTY_ENTRY_STRING("linux,default-trigger",
+ leds[i].default_on ?
+ "default-on" : "default-off");
+ /* props[i * 3 + 2] is an empty terminator */
+
+ swnodes[i] = SOFTWARE_NODE(node_name, &props[i * 3],
+ &geode_gpio_leds_node);
+ group[i + 1] = &swnodes[i];
+ }
+
+ err = software_node_register_node_group(group);
+ if (err) {
+ pr_err("failed to register LED software nodes: %d\n", err);
+ goto err_free_names;
+ }
+
+ led_info.fwnode = software_node_fwnode(&geode_gpio_leds_node);
+
+ led_dev = platform_device_register_full(&led_info);
+ err = PTR_ERR_OR_ZERO(led_dev);
+ if (err) {
+ pr_err("failed to create LED device: %d\n", err);
+ goto err_unregister_group;
+ }
+
+ return 0;
+
+err_unregister_group:
+ software_node_unregister_node_group(group);
+err_free_names:
+ while (--i >= 0)
+ kfree(swnodes[i].name);
+ kfree(props);
+err_free_swnodes:
+ kfree(swnodes);
+ return err;
+}
diff --git a/arch/x86/platform/geode/geode-common.h b/arch/x86/platform/geode/geode-common.h
new file mode 100644
index 000000000000..9e0afd34bfad
--- /dev/null
+++ b/arch/x86/platform/geode/geode-common.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared helpers to register GPIO-connected buttons and LEDs
+ * on AMD Geode boards.
+ */
+
+#ifndef __PLATFORM_GEODE_COMMON_H
+#define __PLATFORM_GEODE_COMMON_H
+
+#include <linux/property.h>
+
+struct geode_led {
+ unsigned int pin;
+ bool default_on;
+};
+
+int geode_create_restart_key(unsigned int pin);
+int geode_create_leds(const char *label, const struct geode_led *leds,
+ unsigned int n_leds);
+
+#endif /* __PLATFORM_GEODE_COMMON_H */
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index c2e6d53558be..98027fb1ec32 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for Traverse Technologies GEOS.
* At the moment this means setup of GPIO control of LEDs.
@@ -9,93 +10,28 @@
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
* In the future leds-net5501.c should be migrated over to platform
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/leds.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/input.h>
-#include <linux/gpio_keys.h>
#include <linux/dmi.h>
#include <asm/geode.h>
-static struct gpio_keys_button geos_gpio_buttons[] = {
- {
- .code = KEY_RESTART,
- .gpio = 3,
- .active_low = 1,
- .desc = "Reset button",
- .type = EV_KEY,
- .wakeup = 0,
- .debounce_interval = 100,
- .can_disable = 0,
- }
-};
-static struct gpio_keys_platform_data geos_buttons_data = {
- .buttons = geos_gpio_buttons,
- .nbuttons = ARRAY_SIZE(geos_gpio_buttons),
- .poll_interval = 20,
-};
-
-static struct platform_device geos_buttons_dev = {
- .name = "gpio-keys-polled",
- .id = 1,
- .dev = {
- .platform_data = &geos_buttons_data,
- }
-};
-
-static struct gpio_led geos_leds[] = {
- {
- .name = "geos:1",
- .gpio = 6,
- .default_trigger = "default-on",
- .active_low = 1,
- },
- {
- .name = "geos:2",
- .gpio = 25,
- .default_trigger = "default-off",
- .active_low = 1,
- },
- {
- .name = "geos:3",
- .gpio = 27,
- .default_trigger = "default-off",
- .active_low = 1,
- },
-};
+#include "geode-common.h"
-static struct gpio_led_platform_data geos_leds_data = {
- .num_leds = ARRAY_SIZE(geos_leds),
- .leds = geos_leds,
-};
-
-static struct platform_device geos_leds_dev = {
- .name = "leds-gpio",
- .id = -1,
- .dev.platform_data = &geos_leds_data,
-};
-
-static struct __initdata platform_device *geos_devs[] = {
- &geos_buttons_dev,
- &geos_leds_dev,
+static const struct geode_led geos_leds[] __initconst = {
+ { 6, true },
+ { 25, false },
+ { 27, false },
};
static void __init register_geos(void)
{
- /* Setup LED control through leds-gpio driver */
- platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs));
+ geode_create_restart_key(3);
+ geode_create_leds("geos", geos_leds, ARRAY_SIZE(geos_leds));
}
static int __init geos_init(void)
@@ -120,9 +56,4 @@ static int __init geos_init(void)
return 0;
}
-
-module_init(geos_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(geos_init);
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 646e3b5b4bb6..c9cee7dea99b 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -1,92 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for Soekris net5501
* At the moment this means setup of GPIO control of LEDs and buttons
* on net5501 boards.
*
- *
* Copyright (C) 2008-2009 Tower Technologies
* Written by Alessandro Zummo <a.zummo@towertech.it>
*
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
* and Philip Prindeville <philipp@redfish-solutions.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/leds.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
-#include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
+#include <linux/gpio/property.h>
#include <asm/geode.h>
+#include "geode-common.h"
+
#define BIOS_REGION_BASE 0xffff0000
#define BIOS_REGION_SIZE 0x00010000
-static struct gpio_keys_button net5501_gpio_buttons[] = {
- {
- .code = KEY_RESTART,
- .gpio = 24,
- .active_low = 1,
- .desc = "Reset button",
- .type = EV_KEY,
- .wakeup = 0,
- .debounce_interval = 100,
- .can_disable = 0,
- }
-};
-static struct gpio_keys_platform_data net5501_buttons_data = {
- .buttons = net5501_gpio_buttons,
- .nbuttons = ARRAY_SIZE(net5501_gpio_buttons),
- .poll_interval = 20,
-};
-
-static struct platform_device net5501_buttons_dev = {
- .name = "gpio-keys-polled",
- .id = 1,
- .dev = {
- .platform_data = &net5501_buttons_data,
- }
-};
-
-static struct gpio_led net5501_leds[] = {
- {
- .name = "net5501:1",
- .gpio = 6,
- .default_trigger = "default-on",
- .active_low = 0,
- },
-};
-
-static struct gpio_led_platform_data net5501_leds_data = {
- .num_leds = ARRAY_SIZE(net5501_leds),
- .leds = net5501_leds,
-};
-
-static struct platform_device net5501_leds_dev = {
- .name = "leds-gpio",
- .id = -1,
- .dev.platform_data = &net5501_leds_data,
-};
-
-static struct __initdata platform_device *net5501_devs[] = {
- &net5501_buttons_dev,
- &net5501_leds_dev,
+static const struct geode_led net5501_leds[] __initconst = {
+ { 6, true },
};
static void __init register_net5501(void)
{
- /* Setup LED control through leds-gpio driver */
- platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
+ geode_create_restart_key(24);
+ geode_create_leds("net5501", net5501_leds, ARRAY_SIZE(net5501_leds));
}
struct net5501_board {
@@ -146,9 +94,4 @@ static int __init net5501_init(void)
return 0;
}
-
-module_init(net5501_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Soekris net5501 System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(net5501_init);
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
deleted file mode 100644
index f030b532fdf3..000000000000
--- a/arch/x86/platform/goldfish/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
deleted file mode 100644
index 1693107a518e..000000000000
--- a/arch/x86/platform/goldfish/goldfish.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2007 Google, Inc.
- * Copyright (C) 2011 Intel, Inc.
- * Copyright (C) 2013 Intel, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/platform_device.h>
-
-/*
- * Where in virtual device memory the IO devices (timers, system controllers
- * and so on)
- */
-
-#define GOLDFISH_PDEV_BUS_BASE (0xff001000)
-#define GOLDFISH_PDEV_BUS_END (0xff7fffff)
-#define GOLDFISH_PDEV_BUS_IRQ (4)
-
-#define GOLDFISH_TTY_BASE (0x2000)
-
-static struct resource goldfish_pdev_bus_resources[] = {
- {
- .start = GOLDFISH_PDEV_BUS_BASE,
- .end = GOLDFISH_PDEV_BUS_END,
- .flags = IORESOURCE_MEM,
- },
- {
- .start = GOLDFISH_PDEV_BUS_IRQ,
- .end = GOLDFISH_PDEV_BUS_IRQ,
- .flags = IORESOURCE_IRQ,
- }
-};
-
-static int __init goldfish_init(void)
-{
- platform_device_register_simple("goldfish_pdev_bus", -1,
- goldfish_pdev_bus_resources, 2);
- return 0;
-}
-device_initcall(goldfish_init);
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
new file mode 100644
index 000000000000..ddfc08783fb8
--- /dev/null
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o pwr.o
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
new file mode 100644
index 000000000000..a8e75f8c14fd
--- /dev/null
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel MID platform setup code
+ *
+ * (C) Copyright 2008, 2012, 2021 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ */
+
+#define pr_fmt(fmt) "intel_mid: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/regulator/machine.h>
+#include <linux/scatterlist.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/notifier.h>
+
+#include <asm/setup.h>
+#include <asm/mpspec_def.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/cpu_device_id.h>
+#include <asm/io_apic.h>
+#include <asm/intel-mid.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+#include <asm/reboot.h>
+
+#include <linux/platform_data/x86/intel_scu_ipc.h>
+
+#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
+#define IPCMSG_COLD_RESET 0xF1
+
+static void intel_mid_power_off(void)
+{
+ /* Shut down South Complex via PWRMU */
+ intel_mid_pwr_power_off();
+
+ /* Only for Tangier, the rest will ignore this command */
+ intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_OFF, 1);
+};
+
+static void intel_mid_reboot(void)
+{
+ intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_RESET, 0);
+}
+
+static void __init intel_mid_time_init(void)
+{
+ /* Lapic only, no apbt */
+ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
+}
+
+static void intel_mid_arch_setup(void)
+{
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_ATOM_SILVERMONT_MID:
+ x86_platform.legacy.rtc = 1;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Intel MID platforms are using explicitly defined regulators.
+ *
+ * Let the regulator core know that we do not have any additional
+ * regulators left. This lets it substitute unprovided regulators with
+ * dummy ones:
+ */
+ regulator_has_full_constraints();
+}
+
+/*
+ * Moorestown does not have external NMI source nor port 0x61 to report
+ * NMI status. The possible NMI sources are from pmu as a result of NMI
+ * watchdog or lock debug. Reading io port 0x61 results in 0xff which
+ * misled NMI handler.
+ */
+static unsigned char intel_mid_get_nmi_reason(void)
+{
+ return 0;
+}
+
+/*
+ * Moorestown specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_intel_mid_early_setup(void)
+{
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.resources.reserve_resources = x86_init_noop;
+
+ x86_init.timers.timer_init = intel_mid_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+
+ x86_init.oem.arch_setup = intel_mid_arch_setup;
+
+ x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
+
+ x86_init.pci.arch_init = intel_mid_pci_init;
+ x86_init.pci.fixup_irqs = x86_init_noop;
+
+ legacy_pic = &null_legacy_pic;
+
+ /*
+ * Do nothing for now as everything needed done in
+ * x86_intel_mid_early_setup() below.
+ */
+ x86_init.acpi.reduced_hw_early_init = x86_init_noop;
+
+ pm_power_off = intel_mid_power_off;
+ machine_ops.emergency_restart = intel_mid_reboot;
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.mpparse.early_parse_smp_cfg = x86_init_noop;
+ x86_init.mpparse.parse_smp_cfg = x86_init_noop;
+ set_bit(MP_BUS_ISA, mp_bus_not_pci);
+}
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
new file mode 100644
index 000000000000..cd7e0c71adde
--- /dev/null
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel MID Power Management Unit (PWRMU) device driver
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * Intel MID Power Management Unit device driver handles the South Complex PCI
+ * devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core
+ * modifies bits in PMCSR register in the PCI configuration space. This is not
+ * enough on some SoCs like Intel Tangier. In such case PCI core sets a new
+ * power state of the device in question through a PM hook registered in struct
+ * pci_platform_pm_ops (see drivers/pci/pci-mid.c).
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+
+#include <asm/intel-mid.h>
+
+/* Registers */
+#define PM_STS 0x00
+#define PM_CMD 0x04
+#define PM_ICS 0x08
+#define PM_WKC(x) (0x10 + (x) * 4)
+#define PM_WKS(x) (0x18 + (x) * 4)
+#define PM_SSC(x) (0x20 + (x) * 4)
+#define PM_SSS(x) (0x30 + (x) * 4)
+
+/* Bits in PM_STS */
+#define PM_STS_BUSY (1 << 8)
+
+/* Bits in PM_CMD */
+#define PM_CMD_CMD(x) ((x) << 0)
+#define PM_CMD_IOC (1 << 8)
+#define PM_CMD_CM_NOP (0 << 9)
+#define PM_CMD_CM_IMMEDIATE (1 << 9)
+#define PM_CMD_CM_DELAY (2 << 9)
+#define PM_CMD_CM_TRIGGER (3 << 9)
+
+/* System states */
+#define PM_CMD_SYS_STATE_S5 (5 << 16)
+
+/* Trigger variants */
+#define PM_CMD_CFG_TRIGGER_NC (3 << 19)
+
+/* Message to wait for TRIGGER_NC case */
+#define TRIGGER_NC_MSG_2 (2 << 22)
+
+/* List of commands */
+#define CMD_SET_CFG 0x01
+
+/* Bits in PM_ICS */
+#define PM_ICS_INT_STATUS(x) ((x) & 0xff)
+#define PM_ICS_IE (1 << 8)
+#define PM_ICS_IP (1 << 9)
+#define PM_ICS_SW_INT_STS (1 << 10)
+
+/* List of interrupts */
+#define INT_INVALID 0
+#define INT_CMD_COMPLETE 1
+#define INT_CMD_ERR 2
+#define INT_WAKE_EVENT 3
+#define INT_LSS_POWER_ERR 4
+#define INT_S0iX_MSG_ERR 5
+#define INT_NO_C6 6
+#define INT_TRIGGER_ERR 7
+#define INT_INACTIVITY 8
+
+/* South Complex devices */
+#define LSS_MAX_SHARED_DEVS 4
+#define LSS_MAX_DEVS 64
+
+#define LSS_WS_BITS 1 /* wake state width */
+#define LSS_PWS_BITS 2 /* power state width */
+
+/* Supported device IDs */
+#define PCI_DEVICE_ID_PENWELL 0x0828
+#define PCI_DEVICE_ID_TANGIER 0x11a1
+
+struct mid_pwr_dev {
+ struct pci_dev *pdev;
+ pci_power_t state;
+};
+
+struct mid_pwr {
+ struct device *dev;
+ void __iomem *regs;
+ int irq;
+ bool available;
+
+ struct mutex lock;
+ struct mid_pwr_dev lss[LSS_MAX_DEVS][LSS_MAX_SHARED_DEVS];
+};
+
+static struct mid_pwr *midpwr;
+
+static u32 mid_pwr_get_state(struct mid_pwr *pwr, int reg)
+{
+ return readl(pwr->regs + PM_SSS(reg));
+}
+
+static void mid_pwr_set_state(struct mid_pwr *pwr, int reg, u32 value)
+{
+ writel(value, pwr->regs + PM_SSC(reg));
+}
+
+static void mid_pwr_set_wake(struct mid_pwr *pwr, int reg, u32 value)
+{
+ writel(value, pwr->regs + PM_WKC(reg));
+}
+
+static void mid_pwr_interrupt_disable(struct mid_pwr *pwr)
+{
+ writel(~PM_ICS_IE, pwr->regs + PM_ICS);
+}
+
+static bool mid_pwr_is_busy(struct mid_pwr *pwr)
+{
+ return !!(readl(pwr->regs + PM_STS) & PM_STS_BUSY);
+}
+
+/* Wait 500ms that the latest PWRMU command finished */
+static int mid_pwr_wait(struct mid_pwr *pwr)
+{
+ unsigned int count = 500000;
+ bool busy;
+
+ do {
+ busy = mid_pwr_is_busy(pwr);
+ if (!busy)
+ return 0;
+ udelay(1);
+ } while (--count);
+
+ return -EBUSY;
+}
+
+static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
+{
+ writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs + PM_CMD);
+ return mid_pwr_wait(pwr);
+}
+
+static int __update_power_state(struct mid_pwr *pwr, int reg, int bit, int new)
+{
+ int curstate;
+ u32 power;
+ int ret;
+
+ /* Check if the device is already in desired state */
+ power = mid_pwr_get_state(pwr, reg);
+ curstate = (power >> bit) & 3;
+ if (curstate == new)
+ return 0;
+
+ /* Update the power state */
+ mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new << bit));
+
+ /* Send command to SCU */
+ ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+ if (ret)
+ return ret;
+
+ /* Check if the device is already in desired state */
+ power = mid_pwr_get_state(pwr, reg);
+ curstate = (power >> bit) & 3;
+ if (curstate != new)
+ return -EAGAIN;
+
+ return 0;
+}
+
+static pci_power_t __find_weakest_power_state(struct mid_pwr_dev *lss,
+ struct pci_dev *pdev,
+ pci_power_t state)
+{
+ pci_power_t weakest = PCI_D3hot;
+ unsigned int j;
+
+ /* Find device in cache or first free cell */
+ for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+ if (lss[j].pdev == pdev || !lss[j].pdev)
+ break;
+ }
+
+ /* Store the desired state in cache */
+ if (j < LSS_MAX_SHARED_DEVS) {
+ lss[j].pdev = pdev;
+ lss[j].state = state;
+ } else {
+ dev_WARN(&pdev->dev, "No room for device in PWRMU LSS cache\n");
+ weakest = state;
+ }
+
+ /* Find the power state we may use */
+ for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+ if (lss[j].state < weakest)
+ weakest = lss[j].state;
+ }
+
+ return weakest;
+}
+
+static int __set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+ pci_power_t state, int id, int reg, int bit)
+{
+ const char *name;
+ int ret;
+
+ state = __find_weakest_power_state(pwr->lss[id], pdev, state);
+ name = pci_power_name(state);
+
+ ret = __update_power_state(pwr, reg, bit, (__force int)state);
+ if (ret) {
+ dev_warn(&pdev->dev, "Can't set power state %s: %d\n", name, ret);
+ return ret;
+ }
+
+ dev_vdbg(&pdev->dev, "Set power state %s\n", name);
+ return 0;
+}
+
+static int mid_pwr_set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+ pci_power_t state)
+{
+ int id, reg, bit;
+ int ret;
+
+ id = intel_mid_pwr_get_lss_id(pdev);
+ if (id < 0)
+ return id;
+
+ reg = (id * LSS_PWS_BITS) / 32;
+ bit = (id * LSS_PWS_BITS) % 32;
+
+ /* We support states between PCI_D0 and PCI_D3hot */
+ if (state < PCI_D0)
+ state = PCI_D0;
+ if (state > PCI_D3hot)
+ state = PCI_D3hot;
+
+ mutex_lock(&pwr->lock);
+ ret = __set_power_state(pwr, pdev, state, id, reg, bit);
+ mutex_unlock(&pwr->lock);
+ return ret;
+}
+
+int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+ struct mid_pwr *pwr = midpwr;
+ int ret = 0;
+
+ might_sleep();
+
+ if (pwr && pwr->available)
+ ret = mid_pwr_set_power_state(pwr, pdev, state);
+ dev_vdbg(&pdev->dev, "set_power_state() returns %d\n", ret);
+
+ return 0;
+}
+
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+ struct mid_pwr *pwr = midpwr;
+ int id, reg, bit;
+ u32 power;
+
+ if (!pwr || !pwr->available)
+ return PCI_UNKNOWN;
+
+ id = intel_mid_pwr_get_lss_id(pdev);
+ if (id < 0)
+ return PCI_UNKNOWN;
+
+ reg = (id * LSS_PWS_BITS) / 32;
+ bit = (id * LSS_PWS_BITS) % 32;
+ power = mid_pwr_get_state(pwr, reg);
+ return (__force pci_power_t)((power >> bit) & 3);
+}
+
+void intel_mid_pwr_power_off(void)
+{
+ struct mid_pwr *pwr = midpwr;
+ u32 cmd = PM_CMD_SYS_STATE_S5 |
+ PM_CMD_CMD(CMD_SET_CFG) |
+ PM_CMD_CM_TRIGGER |
+ PM_CMD_CFG_TRIGGER_NC |
+ TRIGGER_NC_MSG_2;
+
+ /* Send command to SCU */
+ writel(cmd, pwr->regs + PM_CMD);
+ mid_pwr_wait(pwr);
+}
+
+int intel_mid_pwr_get_lss_id(struct pci_dev *pdev)
+{
+ int vndr;
+ u8 id;
+
+ /*
+ * Mapping to PWRMU index is kept in the Logical SubSystem ID byte of
+ * Vendor capability.
+ */
+ vndr = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+ if (!vndr)
+ return -EINVAL;
+
+ /* Read the Logical SubSystem ID byte */
+ pci_read_config_byte(pdev, vndr + INTEL_MID_PWR_LSS_OFFSET, &id);
+ if (!(id & INTEL_MID_PWR_LSS_TYPE))
+ return -ENODEV;
+
+ id &= ~INTEL_MID_PWR_LSS_TYPE;
+ if (id >= LSS_MAX_DEVS)
+ return -ERANGE;
+
+ return id;
+}
+
+static irqreturn_t mid_pwr_irq_handler(int irq, void *dev_id)
+{
+ struct mid_pwr *pwr = dev_id;
+ u32 ics;
+
+ ics = readl(pwr->regs + PM_ICS);
+ if (!(ics & PM_ICS_IP))
+ return IRQ_NONE;
+
+ writel(ics | PM_ICS_IP, pwr->regs + PM_ICS);
+
+ dev_warn(pwr->dev, "Unexpected IRQ: %#x\n", PM_ICS_INT_STATUS(ics));
+ return IRQ_HANDLED;
+}
+
+struct mid_pwr_device_info {
+ int (*set_initial_state)(struct mid_pwr *pwr);
+};
+
+static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mid_pwr_device_info *info = (void *)id->driver_data;
+ struct device *dev = &pdev->dev;
+ struct mid_pwr *pwr;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "error: could not enable device\n");
+ return ret;
+ }
+
+ pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL);
+ if (!pwr)
+ return -ENOMEM;
+
+ pwr->regs = pcim_iomap_region(pdev, 0, "intel_mid_pwr");
+ ret = PTR_ERR_OR_ZERO(pwr->regs);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not request / ioremap I/O-Mem: %d\n", ret);
+ return ret;
+ }
+
+ pwr->dev = dev;
+ pwr->irq = pdev->irq;
+
+ mutex_init(&pwr->lock);
+
+ /* Disable interrupts */
+ mid_pwr_interrupt_disable(pwr);
+
+ if (info && info->set_initial_state) {
+ ret = info->set_initial_state(pwr);
+ if (ret)
+ dev_warn(dev, "Can't set initial state: %d\n", ret);
+ }
+
+ ret = devm_request_irq(dev, pdev->irq, mid_pwr_irq_handler,
+ IRQF_NO_SUSPEND, pci_name(pdev), pwr);
+ if (ret)
+ return ret;
+
+ pwr->available = true;
+ midpwr = pwr;
+
+ pci_set_drvdata(pdev, pwr);
+ return 0;
+}
+
+static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states)
+{
+ unsigned int i, j;
+ int ret;
+
+ /*
+ * Enable wake events.
+ *
+ * PWRMU supports up to 32 sources for wake up the system. Ungate them
+ * all here.
+ */
+ mid_pwr_set_wake(pwr, 0, 0xffffffff);
+ mid_pwr_set_wake(pwr, 1, 0xffffffff);
+
+ /*
+ * Power off South Complex devices.
+ *
+ * There is a map (see a note below) of 64 devices with 2 bits per each
+ * on 32-bit HW registers. The following calls set all devices to one
+ * known initial state, i.e. PCI_D3hot. This is done in conjunction
+ * with PMCSR setting in arch/x86/pci/intel_mid_pci.c.
+ *
+ * NOTE: The actual device mapping is provided by a platform at run
+ * time using vendor capability of PCI configuration space.
+ */
+ mid_pwr_set_state(pwr, 0, states[0]);
+ mid_pwr_set_state(pwr, 1, states[1]);
+ mid_pwr_set_state(pwr, 2, states[2]);
+ mid_pwr_set_state(pwr, 3, states[3]);
+
+ /* Send command to SCU */
+ ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < LSS_MAX_DEVS; i++) {
+ for (j = 0; j < LSS_MAX_SHARED_DEVS; j++)
+ pwr->lss[i][j].state = PCI_D3hot;
+ }
+
+ return 0;
+}
+
+static int pnw_set_initial_state(struct mid_pwr *pwr)
+{
+ /* On Penwell SRAM must stay powered on */
+ static const u32 states[] = {
+ 0xf00fffff, /* PM_SSC(0) */
+ 0xffffffff, /* PM_SSC(1) */
+ 0xffffffff, /* PM_SSC(2) */
+ 0xffffffff, /* PM_SSC(3) */
+ };
+ return mid_set_initial_state(pwr, states);
+}
+
+static int tng_set_initial_state(struct mid_pwr *pwr)
+{
+ static const u32 states[] = {
+ 0xffffffff, /* PM_SSC(0) */
+ 0xffffffff, /* PM_SSC(1) */
+ 0xffffffff, /* PM_SSC(2) */
+ 0xffffffff, /* PM_SSC(3) */
+ };
+ return mid_set_initial_state(pwr, states);
+}
+
+static const struct mid_pwr_device_info pnw_info = {
+ .set_initial_state = pnw_set_initial_state,
+};
+
+static const struct mid_pwr_device_info tng_info = {
+ .set_initial_state = tng_set_initial_state,
+};
+
+/* This table should be in sync with the one in drivers/pci/pci-mid.c */
+static const struct pci_device_id mid_pwr_pci_ids[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&pnw_info },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info },
+ {}
+};
+
+static struct pci_driver mid_pwr_pci_driver = {
+ .name = "intel_mid_pwr",
+ .probe = mid_pwr_probe,
+ .id_table = mid_pwr_pci_ids,
+};
+
+builtin_pci_driver(mid_pwr_pci_driver);
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
new file mode 100644
index 000000000000..ed77cb9529ce
--- /dev/null
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_INTEL_IMR) += imr.o
+obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
new file mode 100644
index 000000000000..ee25b032c0b3
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * imr.c -- Intel Isolated Memory Region driver
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR registers define an isolated region of memory that can
+ * be masked to prohibit certain system agents from accessing memory.
+ * When a device behind a masked port performs an access - snooped or
+ * not, an IMR may optionally prevent that transaction from changing
+ * the state of memory or from getting correct data in response to the
+ * operation.
+ *
+ * Write data will be dropped and reads will return 0xFFFFFFFF, the
+ * system will reset and system BIOS will print out an error message to
+ * inform the user that an IMR has been violated.
+ *
+ * This code is based on the Linux MTRR code and reference code from
+ * Intel's Quark BSP EFI, Linux and grub code.
+ *
+ * See quark-x1000-datasheet.pdf for register definitions.
+ * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
+#include <asm/imr.h>
+#include <asm/iosf_mbi.h>
+#include <asm/io.h>
+
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+struct imr_device {
+ bool init;
+ struct mutex lock;
+ int max_imr;
+ int reg_base;
+};
+
+static struct imr_device imr_dev;
+
+/*
+ * IMR read/write mask control registers.
+ * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for
+ * bit definitions.
+ *
+ * addr_hi
+ * 31 Lock bit
+ * 30:24 Reserved
+ * 23:2 1 KiB aligned lo address
+ * 1:0 Reserved
+ *
+ * addr_hi
+ * 31:24 Reserved
+ * 23:2 1 KiB aligned hi address
+ * 1:0 Reserved
+ */
+#define IMR_LOCK BIT(31)
+
+struct imr_regs {
+ u32 addr_lo;
+ u32 addr_hi;
+ u32 rmask;
+ u32 wmask;
+};
+
+#define IMR_NUM_REGS (sizeof(struct imr_regs)/sizeof(u32))
+#define IMR_SHIFT 8
+#define imr_to_phys(x) ((x) << IMR_SHIFT)
+#define phys_to_imr(x) ((x) >> IMR_SHIFT)
+
+/**
+ * imr_is_enabled - true if an IMR is enabled false otherwise.
+ *
+ * Determines if an IMR is enabled based on address range and read/write
+ * mask. An IMR set with an address range set to zero and a read/write
+ * access mask set to all is considered to be disabled. An IMR in any
+ * other state - for example set to zero but without read/write access
+ * all is considered to be enabled. This definition of disabled is how
+ * firmware switches off an IMR and is maintained in kernel for
+ * consistency.
+ *
+ * @imr: pointer to IMR descriptor.
+ * @return: true if IMR enabled false if disabled.
+ */
+static inline int imr_is_enabled(struct imr_regs *imr)
+{
+ return !(imr->rmask == IMR_READ_ACCESS_ALL &&
+ imr->wmask == IMR_WRITE_ACCESS_ALL &&
+ imr_to_phys(imr->addr_lo) == 0 &&
+ imr_to_phys(imr->addr_hi) == 0);
+}
+
+/**
+ * imr_read - read an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ *
+ * @idev: pointer to imr_device structure.
+ * @imr_id: IMR entry to read.
+ * @imr: IMR structure representing address and access masks.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
+{
+ u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+ int ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_lo);
+ if (ret)
+ return ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_hi);
+ if (ret)
+ return ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->rmask);
+ if (ret)
+ return ret;
+
+ return iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->wmask);
+}
+
+/**
+ * imr_write - write an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ * Note lock bits need to be written independently of address bits.
+ *
+ * @idev: pointer to imr_device structure.
+ * @imr_id: IMR entry to write.
+ * @imr: IMR structure representing address and access masks.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
+{
+ unsigned long flags;
+ u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+ int ret;
+
+ local_irq_save(flags);
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_lo);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_hi);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->rmask);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->wmask);
+ if (ret)
+ goto failed;
+
+ local_irq_restore(flags);
+ return 0;
+failed:
+ /*
+ * If writing to the IOSF failed then we're in an unknown state,
+ * likely a very bad state. An IMR in an invalid state will almost
+ * certainly lead to a memory access violation.
+ */
+ local_irq_restore(flags);
+ WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n",
+ imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK);
+
+ return ret;
+}
+
+/**
+ * imr_dbgfs_state_show - print state of IMR registers.
+ *
+ * @s: pointer to seq_file for output.
+ * @unused: unused parameter.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
+{
+ phys_addr_t base;
+ phys_addr_t end;
+ int i;
+ struct imr_device *idev = s->private;
+ struct imr_regs imr;
+ size_t size;
+ int ret = -ENODEV;
+
+ mutex_lock(&idev->lock);
+
+ for (i = 0; i < idev->max_imr; i++) {
+
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ break;
+
+ /*
+ * Remember to add IMR_ALIGN bytes to size to indicate the
+ * inherent IMR_ALIGN size bytes contained in the masked away
+ * lower ten bits.
+ */
+ if (imr_is_enabled(&imr)) {
+ base = imr_to_phys(imr.addr_lo);
+ end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+ size = end - base + 1;
+ } else {
+ base = 0;
+ end = 0;
+ size = 0;
+ }
+ seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
+ "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
+ &base, &end, size, imr.rmask, imr.wmask,
+ imr_is_enabled(&imr) ? "enabled " : "disabled",
+ imr.addr_lo & IMR_LOCK ? "locked" : "unlocked");
+ }
+
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state);
+
+/**
+ * imr_debugfs_register - register debugfs hooks.
+ *
+ * @idev: pointer to imr_device structure.
+ */
+static void imr_debugfs_register(struct imr_device *idev)
+{
+ debugfs_create_file("imr_state", 0444, NULL, idev,
+ &imr_dbgfs_state_fops);
+}
+
+/**
+ * imr_check_params - check passed address range IMR alignment and non-zero size
+ *
+ * @base: base address of intended IMR.
+ * @size: size of intended IMR.
+ * @return: zero on valid range -EINVAL on unaligned base/size.
+ */
+static int imr_check_params(phys_addr_t base, size_t size)
+{
+ if ((base & IMR_MASK) || (size & IMR_MASK)) {
+ pr_err("base %pa size 0x%08zx must align to 1KiB\n",
+ &base, size);
+ return -EINVAL;
+ }
+ if (size == 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends.
+ *
+ * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the
+ * value in the register. We need to subtract IMR_ALIGN bytes from input sizes
+ * as a result.
+ *
+ * @size: input size bytes.
+ * @return: reduced size.
+ */
+static inline size_t imr_raw_size(size_t size)
+{
+ return size - IMR_ALIGN;
+}
+
+/**
+ * imr_address_overlap - detects an address overlap.
+ *
+ * @addr: address to check against an existing IMR.
+ * @imr: imr being checked.
+ * @return: true for overlap false for no overlap.
+ */
+static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
+{
+ return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi);
+}
+
+/**
+ * imr_add_range - add an Isolated Memory Region.
+ *
+ * @base: physical base address of region aligned to 1KiB.
+ * @size: physical size of region in bytes must be aligned to 1KiB.
+ * @read_mask: read access mask.
+ * @write_mask: write access mask.
+ * @return: zero on success or negative value indicating error.
+ */
+int imr_add_range(phys_addr_t base, size_t size,
+ unsigned int rmask, unsigned int wmask)
+{
+ phys_addr_t end;
+ unsigned int i;
+ struct imr_device *idev = &imr_dev;
+ struct imr_regs imr;
+ size_t raw_size;
+ int reg;
+ int ret;
+
+ if (WARN_ONCE(idev->init == false, "driver not initialized"))
+ return -ENODEV;
+
+ ret = imr_check_params(base, size);
+ if (ret)
+ return ret;
+
+ /* Tweak the size value. */
+ raw_size = imr_raw_size(size);
+ end = base + raw_size;
+
+ /*
+ * Check for reserved IMR value common to firmware, kernel and grub
+ * indicating a disabled IMR.
+ */
+ imr.addr_lo = phys_to_imr(base);
+ imr.addr_hi = phys_to_imr(end);
+ imr.rmask = rmask;
+ imr.wmask = wmask;
+ if (!imr_is_enabled(&imr))
+ return -ENOTSUPP;
+
+ mutex_lock(&idev->lock);
+
+ /*
+ * Find a free IMR while checking for an existing overlapping range.
+ * Note there's no restriction in silicon to prevent IMR overlaps.
+ * For the sake of simplicity and ease in defining/debugging an IMR
+ * memory map we exclude IMR overlaps.
+ */
+ reg = -1;
+ for (i = 0; i < idev->max_imr; i++) {
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ goto failed;
+
+ /* Find overlap @ base or end of requested range. */
+ ret = -EINVAL;
+ if (imr_is_enabled(&imr)) {
+ if (imr_address_overlap(base, &imr))
+ goto failed;
+ if (imr_address_overlap(end, &imr))
+ goto failed;
+ } else {
+ reg = i;
+ }
+ }
+
+ /* Error out if we have no free IMR entries. */
+ if (reg == -1) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n",
+ reg, &base, &end, raw_size, rmask, wmask);
+
+ /* Enable IMR at specified range and access mask. */
+ imr.addr_lo = phys_to_imr(base);
+ imr.addr_hi = phys_to_imr(end);
+ imr.rmask = rmask;
+ imr.wmask = wmask;
+
+ ret = imr_write(idev, reg, &imr);
+ if (ret < 0) {
+ /*
+ * In the highly unlikely event iosf_mbi_write failed
+ * attempt to rollback the IMR setup skipping the trapping
+ * of further IOSF write failures.
+ */
+ imr.addr_lo = 0;
+ imr.addr_hi = 0;
+ imr.rmask = IMR_READ_ACCESS_ALL;
+ imr.wmask = IMR_WRITE_ACCESS_ALL;
+ imr_write(idev, reg, &imr);
+ }
+failed:
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(imr_add_range);
+
+/**
+ * __imr_remove_range - delete an Isolated Memory Region.
+ *
+ * This function allows you to delete an IMR by its index specified by reg or
+ * by address range specified by base and size respectively. If you specify an
+ * index on its own the base and size parameters are ignored.
+ * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored.
+ * imr_remove_range(-1, base, size); delete IMR from base to base+size.
+ *
+ * @reg: imr index to remove.
+ * @base: physical base address of region aligned to 1 KiB.
+ * @size: physical size of region in bytes aligned to 1 KiB.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
+{
+ phys_addr_t end;
+ bool found = false;
+ unsigned int i;
+ struct imr_device *idev = &imr_dev;
+ struct imr_regs imr;
+ size_t raw_size;
+ int ret = 0;
+
+ if (WARN_ONCE(idev->init == false, "driver not initialized"))
+ return -ENODEV;
+
+ /*
+ * Validate address range if deleting by address, else we are
+ * deleting by index where base and size will be ignored.
+ */
+ if (reg == -1) {
+ ret = imr_check_params(base, size);
+ if (ret)
+ return ret;
+ }
+
+ /* Tweak the size value. */
+ raw_size = imr_raw_size(size);
+ end = base + raw_size;
+
+ mutex_lock(&idev->lock);
+
+ if (reg >= 0) {
+ /* If a specific IMR is given try to use it. */
+ ret = imr_read(idev, reg, &imr);
+ if (ret)
+ goto failed;
+
+ if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) {
+ ret = -ENODEV;
+ goto failed;
+ }
+ found = true;
+ } else {
+ /* Search for match based on address range. */
+ for (i = 0; i < idev->max_imr; i++) {
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ goto failed;
+
+ if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK)
+ continue;
+
+ if ((imr_to_phys(imr.addr_lo) == base) &&
+ (imr_to_phys(imr.addr_hi) == end)) {
+ found = true;
+ reg = i;
+ break;
+ }
+ }
+ }
+
+ if (!found) {
+ ret = -ENODEV;
+ goto failed;
+ }
+
+ pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size);
+
+ /* Tear down the IMR. */
+ imr.addr_lo = 0;
+ imr.addr_hi = 0;
+ imr.rmask = IMR_READ_ACCESS_ALL;
+ imr.wmask = IMR_WRITE_ACCESS_ALL;
+
+ ret = imr_write(idev, reg, &imr);
+
+failed:
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+
+/**
+ * imr_remove_range - delete an Isolated Memory Region by address
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by base and size respectively.
+ * imr_remove_range(base, size); delete IMR from base to base+size.
+ *
+ * @base: physical base address of region aligned to 1 KiB.
+ * @size: physical size of region in bytes aligned to 1 KiB.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+int imr_remove_range(phys_addr_t base, size_t size)
+{
+ return __imr_remove_range(-1, base, size);
+}
+EXPORT_SYMBOL_GPL(imr_remove_range);
+
+/**
+ * imr_clear - delete an Isolated Memory Region by index
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by the index of the IMR. Useful for initial sanitization of the IMR
+ * address map.
+ * imr_ge(base, size); delete IMR from base to base+size.
+ *
+ * @reg: imr index to remove.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+static inline int imr_clear(int reg)
+{
+ return __imr_remove_range(reg, 0, 0);
+}
+
+/**
+ * imr_fixup_memmap - Tear down IMRs used during bootup.
+ *
+ * BIOS and Grub both setup IMRs around compressed kernel, initrd memory
+ * that need to be removed before the kernel hands out one of the IMR
+ * encased addresses to a downstream DMA agent such as the SD or Ethernet.
+ * IMRs on Galileo are setup to immediately reset the system on violation.
+ * As a result if you're running a root filesystem from SD - you'll need
+ * the boot-time IMRs torn down or you'll find seemingly random resets when
+ * using your filesystem.
+ *
+ * @idev: pointer to imr_device structure.
+ * @return:
+ */
+static void __init imr_fixup_memmap(struct imr_device *idev)
+{
+ phys_addr_t base = virt_to_phys(&_text);
+ size_t size = virt_to_phys(&__end_rodata) - base;
+ unsigned long start, end;
+ int i;
+ int ret;
+
+ /* Tear down all existing unlocked IMRs. */
+ for (i = 0; i < idev->max_imr; i++)
+ imr_clear(i);
+
+ start = (unsigned long)_text;
+ end = (unsigned long)__end_rodata - 1;
+
+ /*
+ * Setup an unlocked IMR around the physical extent of the kernel
+ * from the beginning of the .text section to the end of the
+ * .rodata section as one physically contiguous block.
+ *
+ * We don't round up @size since it is already PAGE_SIZE aligned.
+ * See vmlinux.lds.S for details.
+ */
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
+ if (ret < 0) {
+ pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+ size / 1024, start, end);
+ } else {
+ pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+ size / 1024, start, end);
+ }
+
+}
+
+static const struct x86_cpu_id imr_ids[] __initconst = {
+ X86_MATCH_VFM(INTEL_QUARK_X1000, NULL),
+ {}
+};
+
+/**
+ * imr_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_init(void)
+{
+ struct imr_device *idev = &imr_dev;
+
+ if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
+ return -ENODEV;
+
+ idev->max_imr = QUARK_X1000_IMR_MAX;
+ idev->reg_base = QUARK_X1000_IMR_REGBASE;
+ idev->init = true;
+
+ mutex_init(&idev->lock);
+ imr_debugfs_register(idev);
+ imr_fixup_memmap(idev);
+ return 0;
+}
+device_initcall(imr_init);
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
new file mode 100644
index 000000000000..657925b0f428
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * imr_selftest.c -- Intel Isolated Memory Region self-test driver
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR self test. The purpose of this module is to run a set of tests on the
+ * IMR API to validate its sanity. We check for overlapping, reserved
+ * addresses and setup/teardown sanity.
+ *
+ */
+
+#include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
+#include <asm/imr.h>
+#include <asm/io.h>
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#define SELFTEST KBUILD_MODNAME ": "
+/**
+ * imr_self_test_result - Print result string for self test.
+ *
+ * @res: result code - true if test passed false otherwise.
+ * @fmt: format string.
+ * ... variadic argument list.
+ */
+static __printf(2, 3)
+void __init imr_self_test_result(int res, const char *fmt, ...)
+{
+ va_list vlist;
+
+ /* Print pass/fail. */
+ if (res)
+ pr_info(SELFTEST "pass ");
+ else
+ pr_info(SELFTEST "fail ");
+
+ /* Print variable string. */
+ va_start(vlist, fmt);
+ vprintk(fmt, vlist);
+ va_end(vlist);
+
+ /* Optional warning. */
+ WARN(res == 0, "test failed");
+}
+#undef SELFTEST
+
+/**
+ * imr_self_test
+ *
+ * Verify IMR self_test with some simple tests to verify overlap,
+ * zero sized allocations and 1 KiB sized areas.
+ *
+ */
+static void __init imr_self_test(void)
+{
+ phys_addr_t base = virt_to_phys(&_text);
+ size_t size = virt_to_phys(&__end_rodata) - base;
+ const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
+ int ret;
+
+ /* Test zero zero. */
+ ret = imr_add_range(0, 0, 0, 0);
+ imr_self_test_result(ret < 0, "zero sized IMR\n");
+
+ /* Test exact overlap. */
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test overlap with base inside of existing. */
+ base += size - IMR_ALIGN;
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test overlap with end inside of existing. */
+ base -= size + IMR_ALIGN * 2;
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
+ ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
+ IMR_WRITE_ACCESS_ALL);
+ imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
+
+ /* Test that a 1 KiB IMR @ zero with CPU only will work. */
+ ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU);
+ imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
+ if (ret >= 0) {
+ ret = imr_remove_range(0, IMR_ALIGN);
+ imr_self_test_result(ret == 0, "teardown - cpu-access\n");
+ }
+
+ /* Test 2 KiB works. */
+ size = IMR_ALIGN * 2;
+ ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL);
+ imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
+ if (ret >= 0) {
+ ret = imr_remove_range(0, size);
+ imr_self_test_result(ret == 0, "teardown 2KiB\n");
+ }
+}
+
+static const struct x86_cpu_id imr_ids[] __initconst = {
+ X86_MATCH_VFM(INTEL_QUARK_X1000, NULL),
+ {}
+};
+
+/**
+ * imr_self_test_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_self_test_init(void)
+{
+ if (x86_match_cpu(imr_ids))
+ imr_self_test();
+ return 0;
+}
+
+/**
+ * imr_self_test_exit - exit point for IMR code.
+ *
+ * return:
+ */
+device_initcall(imr_self_test_init);
diff --git a/arch/x86/platform/intel/Makefile b/arch/x86/platform/intel/Makefile
new file mode 100644
index 000000000000..dbee3b00f9d0
--- /dev/null
+++ b/arch/x86/platform/intel/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
new file mode 100644
index 000000000000..40ae94db20d8
--- /dev/null
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOSF-SB MailBox Interface Driver
+ * Copyright (c) 2013, Intel Corporation.
+ *
+ * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a
+ * mailbox interface (MBI) to communicate with multiple devices. This
+ * driver implements access to this interface for those platforms that can
+ * enumerate the device using PCI.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/capability.h>
+#include <linux/pm_qos.h>
+#include <linux/wait.h>
+
+#include <asm/iosf_mbi.h>
+
+#define PCI_DEVICE_ID_INTEL_BAYTRAIL 0x0F00
+#define PCI_DEVICE_ID_INTEL_BRASWELL 0x2280
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000 0x0958
+#define PCI_DEVICE_ID_INTEL_TANGIER 0x1170
+
+static struct pci_dev *mbi_pdev;
+static DEFINE_SPINLOCK(iosf_mbi_lock);
+
+/**************** Generic iosf_mbi access helpers ****************/
+
+static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
+{
+ return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE;
+}
+
+static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr)
+{
+ int result;
+
+ if (!mbi_pdev)
+ return -ENODEV;
+
+ if (mcrx) {
+ result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
+ mcrx);
+ if (result < 0)
+ goto fail_read;
+ }
+
+ result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
+ if (result < 0)
+ goto fail_read;
+
+ result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
+ if (result < 0)
+ goto fail_read;
+
+ return 0;
+
+fail_read:
+ dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
+ return pcibios_err_to_errno(result);
+}
+
+static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
+{
+ int result;
+
+ if (!mbi_pdev)
+ return -ENODEV;
+
+ result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
+ if (result < 0)
+ goto fail_write;
+
+ if (mcrx) {
+ result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
+ mcrx);
+ if (result < 0)
+ goto fail_write;
+ }
+
+ result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
+ if (result < 0)
+ goto fail_write;
+
+ return 0;
+
+fail_write:
+ dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
+ return pcibios_err_to_errno(result);
+}
+
+int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
+{
+ u32 mcr, mcrx;
+ unsigned long flags;
+ int ret;
+
+ /* Access to the GFX unit is handled by GPU code */
+ if (port == BT_MBI_UNIT_GFX) {
+ WARN_ON(1);
+ return -EPERM;
+ }
+
+ mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+ mcrx = offset & MBI_MASK_HI;
+
+ spin_lock_irqsave(&iosf_mbi_lock, flags);
+ ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr);
+ spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_read);
+
+int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
+{
+ u32 mcr, mcrx;
+ unsigned long flags;
+ int ret;
+
+ /* Access to the GFX unit is handled by GPU code */
+ if (port == BT_MBI_UNIT_GFX) {
+ WARN_ON(1);
+ return -EPERM;
+ }
+
+ mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+ mcrx = offset & MBI_MASK_HI;
+
+ spin_lock_irqsave(&iosf_mbi_lock, flags);
+ ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr);
+ spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_write);
+
+int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
+{
+ u32 mcr, mcrx;
+ u32 value;
+ unsigned long flags;
+ int ret;
+
+ /* Access to the GFX unit is handled by GPU code */
+ if (port == BT_MBI_UNIT_GFX) {
+ WARN_ON(1);
+ return -EPERM;
+ }
+
+ mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
+ mcrx = offset & MBI_MASK_HI;
+
+ spin_lock_irqsave(&iosf_mbi_lock, flags);
+
+ /* Read current mdr value */
+ ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value);
+ if (ret < 0) {
+ spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+ return ret;
+ }
+
+ /* Apply mask */
+ value &= ~mask;
+ mdr &= mask;
+ value |= mdr;
+
+ /* Write back */
+ ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value);
+
+ spin_unlock_irqrestore(&iosf_mbi_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_modify);
+
+bool iosf_mbi_available(void)
+{
+ /* Mbi isn't hot-pluggable. No remove routine is provided */
+ return mbi_pdev;
+}
+EXPORT_SYMBOL(iosf_mbi_available);
+
+/*
+ **************** P-Unit/kernel shared I2C bus arbitration ****************
+ *
+ * Some Bay Trail and Cherry Trail devices have the P-Unit and us (the kernel)
+ * share a single I2C bus to the PMIC. Below are helpers to arbitrate the
+ * accesses between the kernel and the P-Unit.
+ *
+ * See arch/x86/include/asm/iosf_mbi.h for kernel-doc text for each function.
+ */
+
+#define SEMAPHORE_TIMEOUT 500
+#define PUNIT_SEMAPHORE_BYT 0x7
+#define PUNIT_SEMAPHORE_CHT 0x10e
+#define PUNIT_SEMAPHORE_BIT BIT(0)
+#define PUNIT_SEMAPHORE_ACQUIRE BIT(1)
+
+static DEFINE_MUTEX(iosf_mbi_pmic_access_mutex);
+static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier);
+static DECLARE_WAIT_QUEUE_HEAD(iosf_mbi_pmic_access_waitq);
+static u32 iosf_mbi_pmic_punit_access_count;
+static u32 iosf_mbi_pmic_i2c_access_count;
+static u32 iosf_mbi_sem_address;
+static unsigned long iosf_mbi_sem_acquired;
+static struct pm_qos_request iosf_mbi_pm_qos;
+
+void iosf_mbi_punit_acquire(void)
+{
+ /* Wait for any I2C PMIC accesses from in kernel drivers to finish. */
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ while (iosf_mbi_pmic_i2c_access_count != 0) {
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+ wait_event(iosf_mbi_pmic_access_waitq,
+ iosf_mbi_pmic_i2c_access_count == 0);
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ }
+ /*
+ * We do not need to do anything to allow the PUNIT to safely access
+ * the PMIC, other then block in kernel accesses to the PMIC.
+ */
+ iosf_mbi_pmic_punit_access_count++;
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_acquire);
+
+void iosf_mbi_punit_release(void)
+{
+ bool do_wakeup;
+
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ iosf_mbi_pmic_punit_access_count--;
+ do_wakeup = iosf_mbi_pmic_punit_access_count == 0;
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+
+ if (do_wakeup)
+ wake_up(&iosf_mbi_pmic_access_waitq);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_release);
+
+static int iosf_mbi_get_sem(u32 *sem)
+{
+ int ret;
+
+ ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ iosf_mbi_sem_address, sem);
+ if (ret) {
+ dev_err(&mbi_pdev->dev, "Error P-Unit semaphore read failed\n");
+ return ret;
+ }
+
+ *sem &= PUNIT_SEMAPHORE_BIT;
+ return 0;
+}
+
+static void iosf_mbi_reset_semaphore(void)
+{
+ if (iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ iosf_mbi_sem_address, 0, PUNIT_SEMAPHORE_BIT))
+ dev_err(&mbi_pdev->dev, "Error P-Unit semaphore reset failed\n");
+
+ cpu_latency_qos_update_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE);
+
+ blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier,
+ MBI_PMIC_BUS_ACCESS_END, NULL);
+}
+
+/*
+ * This function blocks P-Unit accesses to the PMIC I2C bus, so that kernel
+ * I2C code, such as e.g. a fuel-gauge driver, can access it safely.
+ *
+ * This function may be called by I2C controller code while an I2C driver has
+ * already blocked P-Unit accesses because it wants them blocked over multiple
+ * i2c-transfers, for e.g. read-modify-write of an I2C client register.
+ *
+ * To allow safe PMIC i2c bus accesses this function takes the following steps:
+ *
+ * 1) Some code sends request to the P-Unit which make it access the PMIC
+ * I2C bus. Testing has shown that the P-Unit does not check its internal
+ * PMIC bus semaphore for these requests. Callers of these requests call
+ * iosf_mbi_punit_acquire()/_release() around their P-Unit accesses, these
+ * functions increase/decrease iosf_mbi_pmic_punit_access_count, so first
+ * we wait for iosf_mbi_pmic_punit_access_count to become 0.
+ *
+ * 2) Check iosf_mbi_pmic_i2c_access_count, if access has already
+ * been blocked by another caller, we only need to increment
+ * iosf_mbi_pmic_i2c_access_count and we can skip the other steps.
+ *
+ * 3) Some code makes such P-Unit requests from atomic contexts where it
+ * cannot call iosf_mbi_punit_acquire() as that may sleep.
+ * As the second step we call a notifier chain which allows any code
+ * needing P-Unit resources from atomic context to acquire them before
+ * we take control over the PMIC I2C bus.
+ *
+ * 4) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
+ * if this happens while the kernel itself is accessing the PMIC I2C bus
+ * the SoC hangs.
+ * As the third step we call cpu_latency_qos_update_request() to disallow the
+ * CPU to enter C6 or C7.
+ *
+ * 5) The P-Unit has a PMIC bus semaphore which we can request to stop
+ * autonomous P-Unit tasks from accessing the PMIC I2C bus while we hold it.
+ * As the fourth and final step we request this semaphore and wait for our
+ * request to be acknowledged.
+ */
+int iosf_mbi_block_punit_i2c_access(void)
+{
+ unsigned long start, end;
+ int ret = 0;
+ u32 sem;
+
+ if (WARN_ON(!mbi_pdev || !iosf_mbi_sem_address))
+ return -ENXIO;
+
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+
+ while (iosf_mbi_pmic_punit_access_count != 0) {
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+ wait_event(iosf_mbi_pmic_access_waitq,
+ iosf_mbi_pmic_punit_access_count == 0);
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ }
+
+ if (iosf_mbi_pmic_i2c_access_count > 0)
+ goto success;
+
+ blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier,
+ MBI_PMIC_BUS_ACCESS_BEGIN, NULL);
+
+ /*
+ * Disallow the CPU to enter C6 or C7 state, entering these states
+ * requires the P-Unit to talk to the PMIC and if this happens while
+ * we're holding the semaphore, the SoC hangs.
+ */
+ cpu_latency_qos_update_request(&iosf_mbi_pm_qos, 0);
+
+ /* host driver writes to side band semaphore register */
+ ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ iosf_mbi_sem_address, PUNIT_SEMAPHORE_ACQUIRE);
+ if (ret) {
+ dev_err(&mbi_pdev->dev, "Error P-Unit semaphore request failed\n");
+ goto error;
+ }
+
+ /* host driver waits for bit 0 to be set in semaphore register */
+ start = jiffies;
+ end = start + msecs_to_jiffies(SEMAPHORE_TIMEOUT);
+ do {
+ ret = iosf_mbi_get_sem(&sem);
+ if (!ret && sem) {
+ iosf_mbi_sem_acquired = jiffies;
+ dev_dbg(&mbi_pdev->dev, "P-Unit semaphore acquired after %ums\n",
+ jiffies_to_msecs(jiffies - start));
+ goto success;
+ }
+
+ usleep_range(1000, 2000);
+ } while (time_before(jiffies, end));
+
+ ret = -ETIMEDOUT;
+ dev_err(&mbi_pdev->dev, "Error P-Unit semaphore timed out, resetting\n");
+error:
+ iosf_mbi_reset_semaphore();
+ if (!iosf_mbi_get_sem(&sem))
+ dev_err(&mbi_pdev->dev, "P-Unit semaphore: %d\n", sem);
+success:
+ if (!WARN_ON(ret))
+ iosf_mbi_pmic_i2c_access_count++;
+
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_block_punit_i2c_access);
+
+void iosf_mbi_unblock_punit_i2c_access(void)
+{
+ bool do_wakeup = false;
+
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ iosf_mbi_pmic_i2c_access_count--;
+ if (iosf_mbi_pmic_i2c_access_count == 0) {
+ iosf_mbi_reset_semaphore();
+ dev_dbg(&mbi_pdev->dev, "punit semaphore held for %ums\n",
+ jiffies_to_msecs(jiffies - iosf_mbi_sem_acquired));
+ do_wakeup = true;
+ }
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+
+ if (do_wakeup)
+ wake_up(&iosf_mbi_pmic_access_waitq);
+}
+EXPORT_SYMBOL(iosf_mbi_unblock_punit_i2c_access);
+
+int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb)
+{
+ int ret;
+
+ /* Wait for the bus to go inactive before registering */
+ iosf_mbi_punit_acquire();
+ ret = blocking_notifier_chain_register(
+ &iosf_mbi_pmic_bus_access_notifier, nb);
+ iosf_mbi_punit_release();
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier);
+
+int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(
+ struct notifier_block *nb)
+{
+ iosf_mbi_assert_punit_acquired();
+
+ return blocking_notifier_chain_unregister(
+ &iosf_mbi_pmic_bus_access_notifier, nb);
+}
+EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked);
+
+void iosf_mbi_assert_punit_acquired(void)
+{
+ WARN_ON(iosf_mbi_pmic_punit_access_count == 0);
+}
+EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired);
+
+/**************** iosf_mbi debug code ****************/
+
+#ifdef CONFIG_IOSF_MBI_DEBUG
+static u32 dbg_mdr;
+static u32 dbg_mcr;
+static u32 dbg_mcrx;
+
+static int mcr_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+static int mcr_set(void *data, u64 val)
+{
+ u8 command = ((u32)val & 0xFF000000) >> 24,
+ port = ((u32)val & 0x00FF0000) >> 16,
+ offset = ((u32)val & 0x0000FF00) >> 8;
+ int err;
+
+ *(u32 *)data = val;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EACCES;
+
+ if (command & 1u)
+ err = iosf_mbi_write(port,
+ command,
+ dbg_mcrx | offset,
+ dbg_mdr);
+ else
+ err = iosf_mbi_read(port,
+ command,
+ dbg_mcrx | offset,
+ &dbg_mdr);
+
+ return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n");
+
+static struct dentry *iosf_dbg;
+
+static void iosf_sideband_debug_init(void)
+{
+ iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
+
+ /* mdr */
+ debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
+
+ /* mcrx */
+ debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
+
+ /* mcr - initiates mailbox transaction */
+ debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
+}
+
+static void iosf_debugfs_init(void)
+{
+ iosf_sideband_debug_init();
+}
+
+static void iosf_debugfs_remove(void)
+{
+ debugfs_remove_recursive(iosf_dbg);
+}
+#else
+static inline void iosf_debugfs_init(void) { }
+static inline void iosf_debugfs_remove(void) { }
+#endif /* CONFIG_IOSF_MBI_DEBUG */
+
+static int iosf_mbi_probe(struct pci_dev *pdev,
+ const struct pci_device_id *dev_id)
+{
+ int ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "error: could not enable device\n");
+ return ret;
+ }
+
+ mbi_pdev = pci_dev_get(pdev);
+ iosf_mbi_sem_address = dev_id->driver_data;
+
+ return 0;
+}
+
+static const struct pci_device_id iosf_mbi_pci_ids[] = {
+ { PCI_DEVICE_DATA(INTEL, BAYTRAIL, PUNIT_SEMAPHORE_BYT) },
+ { PCI_DEVICE_DATA(INTEL, BRASWELL, PUNIT_SEMAPHORE_CHT) },
+ { PCI_DEVICE_DATA(INTEL, QUARK_X1000, 0) },
+ { PCI_DEVICE_DATA(INTEL, TANGIER, 0) },
+ { 0, },
+};
+MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids);
+
+static struct pci_driver iosf_mbi_pci_driver = {
+ .name = "iosf_mbi_pci",
+ .probe = iosf_mbi_probe,
+ .id_table = iosf_mbi_pci_ids,
+};
+
+static int __init iosf_mbi_init(void)
+{
+ iosf_debugfs_init();
+
+ cpu_latency_qos_add_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE);
+
+ return pci_register_driver(&iosf_mbi_pci_driver);
+}
+
+static void __exit iosf_mbi_exit(void)
+{
+ iosf_debugfs_remove();
+
+ pci_unregister_driver(&iosf_mbi_pci_driver);
+ pci_dev_put(mbi_pdev);
+ mbi_pdev = NULL;
+
+ cpu_latency_qos_remove_request(&iosf_mbi_pm_qos);
+}
+
+module_init(iosf_mbi_init);
+module_exit(iosf_mbi_exit);
+
+MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
+MODULE_DESCRIPTION("IOSF Mailbox Interface accessor");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
index db921983a102..354352748428 100644
--- a/arch/x86/platform/iris/Makefile
+++ b/arch/x86/platform/iris/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index e6cb80f620af..5591a2d9cfe8 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Eurobraille/Iris power off support.
*
@@ -5,20 +6,6 @@
* It is shutdown by a special I/O sequence which this module provides.
*
* Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
- *
- * This program is free software ; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation ; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY ; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with the program ; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/moduleparam.h>
@@ -27,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/delay.h>
-#include <linux/init.h>
#include <linux/pm.h>
#include <asm/io.h>
@@ -41,7 +27,6 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
-MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
static bool force;
@@ -77,17 +62,15 @@ static int iris_probe(struct platform_device *pdev)
return 0;
}
-static int iris_remove(struct platform_device *pdev)
+static void iris_remove(struct platform_device *pdev)
{
pm_power_off = old_pm_power_off;
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
- return 0;
}
static struct platform_driver iris_driver = {
.driver = {
.name = "iris",
- .owner = THIS_MODULE,
},
.probe = iris_probe,
.remove = iris_remove,
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
deleted file mode 100644
index af1da7e623f9..000000000000
--- a/arch/x86/platform/mrst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_X86_INTEL_MID) += mrst.o
-obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
deleted file mode 100644
index 028454f0c3a5..000000000000
--- a/arch/x86/platform/mrst/early_printk_mrst.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * early_printk_mrst.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements two early consoles named mrst and hsu.
- * mrst is based on Maxim3110 spi-uart device, it exists in both
- * Moorestown and Medfield platforms, while hsu is based on a High
- * Speed UART device which only exists in the Medfield platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/kmsg_dump.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/mrst.h>
-
-#define MRST_SPI_TIMEOUT 0x200000
-#define MRST_REGBASE_SPI0 0xff128000
-#define MRST_REGBASE_SPI1 0xff128400
-#define MRST_CLK_SPI0_REG 0xff11d86c
-
-/* Bit fields in CTRLR0 */
-#define SPI_DFS_OFFSET 0
-
-#define SPI_FRF_OFFSET 4
-#define SPI_FRF_SPI 0x0
-#define SPI_FRF_SSP 0x1
-#define SPI_FRF_MICROWIRE 0x2
-#define SPI_FRF_RESV 0x3
-
-#define SPI_MODE_OFFSET 6
-#define SPI_SCPH_OFFSET 6
-#define SPI_SCOL_OFFSET 7
-#define SPI_TMOD_OFFSET 8
-#define SPI_TMOD_TR 0x0 /* xmit & recv */
-#define SPI_TMOD_TO 0x1 /* xmit only */
-#define SPI_TMOD_RO 0x2 /* recv only */
-#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
-
-#define SPI_SLVOE_OFFSET 10
-#define SPI_SRL_OFFSET 11
-#define SPI_CFS_OFFSET 12
-
-/* Bit fields in SR, 7 bits */
-#define SR_MASK 0x7f /* cover 7 bits */
-#define SR_BUSY (1 << 0)
-#define SR_TF_NOT_FULL (1 << 1)
-#define SR_TF_EMPT (1 << 2)
-#define SR_RF_NOT_EMPT (1 << 3)
-#define SR_RF_FULL (1 << 4)
-#define SR_TX_ERR (1 << 5)
-#define SR_DCOL (1 << 6)
-
-struct dw_spi_reg {
- u32 ctrl0;
- u32 ctrl1;
- u32 ssienr;
- u32 mwcr;
- u32 ser;
- u32 baudr;
- u32 txfltr;
- u32 rxfltr;
- u32 txflr;
- u32 rxflr;
- u32 sr;
- u32 imr;
- u32 isr;
- u32 risr;
- u32 txoicr;
- u32 rxoicr;
- u32 rxuicr;
- u32 msticr;
- u32 icr;
- u32 dmacr;
- u32 dmatdlr;
- u32 dmardlr;
- u32 idr;
- u32 version;
-
- /* Currently operates as 32 bits, though only the low 16 bits matter */
- u32 dr;
-} __packed;
-
-#define dw_readl(dw, name) __raw_readl(&(dw)->name)
-#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
-
-/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
-static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
-
-static u32 *pclk_spi0;
-/* Always contains an accessible address, start with 0 */
-static struct dw_spi_reg *pspi;
-
-static struct kmsg_dumper dw_dumper;
-static int dumper_registered;
-
-static void dw_kmsg_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
-{
- static char line[1024];
- size_t len;
-
- /* When run to this, we'd better re-init the HW */
- mrst_early_console_init();
-
- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len))
- early_mrst_console.write(&early_mrst_console, line, len);
-}
-
-/* Set the ratio rate to 115200, 8n1, IRQ disabled */
-static void max3110_write_config(void)
-{
- u16 config;
-
- config = 0xc001;
- dw_writel(pspi, dr, config);
-}
-
-/* Translate char to a eligible word and send to max3110 */
-static void max3110_write_data(char c)
-{
- u16 data;
-
- data = 0x8000 | c;
- dw_writel(pspi, dr, data);
-}
-
-void mrst_early_console_init(void)
-{
- u32 ctrlr0 = 0;
- u32 spi0_cdiv;
- u32 freq; /* Freqency info only need be searched once */
-
- /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
- pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
- MRST_CLK_SPI0_REG);
- spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
- freq = 100000000 / (spi0_cdiv + 1);
-
- if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
- mrst_spi_paddr = MRST_REGBASE_SPI1;
-
- pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
- mrst_spi_paddr);
-
- /* Disable SPI controller */
- dw_writel(pspi, ssienr, 0);
-
- /* Set control param, 8 bits, transmit only mode */
- ctrlr0 = dw_readl(pspi, ctrl0);
-
- ctrlr0 &= 0xfcc0;
- ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
- | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
- dw_writel(pspi, ctrl0, ctrlr0);
-
- /*
- * Change the spi0 clk to comply with 115200 bps, use 100000 to
- * calculate the clk dividor to make the clock a little slower
- * than real baud rate.
- */
- dw_writel(pspi, baudr, freq/100000);
-
- /* Disable all INT for early phase */
- dw_writel(pspi, imr, 0x0);
-
- /* Set the cs to spi-uart */
- dw_writel(pspi, ser, 0x2);
-
- /* Enable the HW, the last step for HW init */
- dw_writel(pspi, ssienr, 0x1);
-
- /* Set the default configuration */
- max3110_write_config();
-
- /* Register the kmsg dumper */
- if (!dumper_registered) {
- dw_dumper.dump = dw_kmsg_dump;
- kmsg_dump_register(&dw_dumper);
- dumper_registered = 1;
- }
-}
-
-/* Slave select should be called in the read/write function */
-static void early_mrst_spi_putc(char c)
-{
- unsigned int timeout;
- u32 sr;
-
- timeout = MRST_SPI_TIMEOUT;
- /* Early putc needs to make sure the TX FIFO is not full */
- while (--timeout) {
- sr = dw_readl(pspi, sr);
- if (!(sr & SR_TF_NOT_FULL))
- cpu_relax();
- else
- break;
- }
-
- if (!timeout)
- pr_warning("MRST earlycon: timed out\n");
- else
- max3110_write_data(c);
-}
-
-/* Early SPI only uses polling mode */
-static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
-{
- int i;
-
- for (i = 0; i < n && *str; i++) {
- if (*str == '\n')
- early_mrst_spi_putc('\r');
- early_mrst_spi_putc(*str);
- str++;
- }
-}
-
-struct console early_mrst_console = {
- .name = "earlymrst",
- .write = early_mrst_spi_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT_BASE 0xffa28080
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(const char *s)
-{
- unsigned long paddr, port = 0;
- u8 lcr;
-
- /*
- * Select the early HSU console port if specified by user in the
- * kernel command line.
- */
- if (*s && !kstrtoul(s, 10, &port))
- port = clamp_val(port, 0, 2);
-
- paddr = HSU_PORT_BASE + port * 0x80;
- phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
-
- /* Disable FIFO */
- writeb(0x0, phsu + UART_FCR);
-
- /* Set to default 115200 bps, 8n1 */
- lcr = readb(phsu + UART_LCR);
- writeb((0x80 | lcr), phsu + UART_LCR);
- writeb(0x18, phsu + UART_DLL);
- writeb(lcr, phsu + UART_LCR);
- writel(0x3600, phsu + UART_MUL*4);
-
- writeb(0x8, phsu + UART_MCR);
- writeb(0x7, phsu + UART_FCR);
- writeb(0x3, phsu + UART_LCR);
-
- /* Clear IRQ status */
- readb(phsu + UART_LSR);
- readb(phsu + UART_RX);
- readb(phsu + UART_IIR);
- readb(phsu + UART_MSR);
-
- /* Enable FIFO */
- writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
- unsigned int timeout = 10000; /* 10ms */
- u8 status;
-
- while (--timeout) {
- status = readb(phsu + UART_LSR);
- if (status & BOTH_EMPTY)
- break;
- udelay(1);
- }
-
- /* Only write the char when there was no timeout */
- if (timeout)
- writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
- int i;
-
- for (i = 0; i < n && *str; i++) {
- if (*str == '\n')
- early_hsu_putc('\r');
- early_hsu_putc(*str);
- str++;
- }
-}
-
-struct console early_hsu_console = {
- .name = "earlyhsu",
- .write = early_hsu_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
deleted file mode 100644
index a0a0a4389bbd..000000000000
--- a/arch/x86/platform/mrst/mrst.c
+++ /dev/null
@@ -1,1052 +0,0 @@
-/*
- * mrst.c: Intel Moorestown platform specific setup code
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#define pr_fmt(fmt) "mrst: " fmt
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/intel_pmic_gpio.h>
-#include <linux/spi/spi.h>
-#include <linux/i2c.h>
-#include <linux/i2c/pca953x.h>
-#include <linux/gpio_keys.h>
-#include <linux/input.h>
-#include <linux/platform_device.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/mfd/intel_msic.h>
-#include <linux/gpio.h>
-#include <linux/i2c/tc35876x.h>
-
-#include <asm/setup.h>
-#include <asm/mpspec_def.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/io_apic.h>
-#include <asm/mrst.h>
-#include <asm/mrst-vrtc.h>
-#include <asm/io.h>
-#include <asm/i8259.h>
-#include <asm/intel_scu_ipc.h>
-#include <asm/apb_timer.h>
-#include <asm/reboot.h>
-
-/*
- * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
- * cmdline option x86_mrst_timer can be used to override the configuration
- * to prefer one or the other.
- * at runtime, there are basically three timer configurations:
- * 1. per cpu apbt clock only
- * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
- * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
- *
- * by default (without cmdline option), platform code first detects cpu type
- * to see if we are on lincroft or penwell, then set up both lapic or apbt
- * clocks accordingly.
- * i.e. by default, medfield uses configuration #2, moorestown uses #1.
- * config #3 is supported but not recommended on medfield.
- *
- * rating and feature summary:
- * lapic (with C3STOP) --------- 100
- * apbt (always-on) ------------ 110
- * lapic (always-on,ARAT) ------ 150
- */
-
-__cpuinitdata enum mrst_timer_options mrst_timer_options;
-
-static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
-static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
-enum mrst_cpu_type __mrst_cpu_chip;
-EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
-
-int sfi_mtimer_num;
-
-struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
-EXPORT_SYMBOL_GPL(sfi_mrtc_array);
-int sfi_mrtc_num;
-
-static void mrst_power_off(void)
-{
-}
-
-static void mrst_reboot(void)
-{
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
-}
-
-/* parse all the mtimer info to a static mtimer array */
-static int __init sfi_parse_mtmr(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_timer_table_entry *pentry;
- struct mpc_intsrc mp_irq;
- int totallen;
-
- sb = (struct sfi_table_simple *)table;
- if (!sfi_mtimer_num) {
- sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
- struct sfi_timer_table_entry);
- pentry = (struct sfi_timer_table_entry *) sb->pentry;
- totallen = sfi_mtimer_num * sizeof(*pentry);
- memcpy(sfi_mtimer_array, pentry, totallen);
- }
-
- pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
- pentry = sfi_mtimer_array;
- for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
- pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
- " irq = %d\n", totallen, (u32)pentry->phys_addr,
- pentry->freq_hz, pentry->irq);
- if (!pentry->irq)
- continue;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
-/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
- mp_irq.irqflag = 5;
- mp_irq.srcbus = MP_BUS_ISA;
- mp_irq.srcbusirq = pentry->irq; /* IRQ */
- mp_irq.dstapic = MP_APIC_ALL;
- mp_irq.dstirq = pentry->irq;
- mp_save_irq(&mp_irq);
- }
-
- return 0;
-}
-
-struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
-{
- int i;
- if (hint < sfi_mtimer_num) {
- if (!sfi_mtimer_usage[hint]) {
- pr_debug("hint taken for timer %d irq %d\n",\
- hint, sfi_mtimer_array[hint].irq);
- sfi_mtimer_usage[hint] = 1;
- return &sfi_mtimer_array[hint];
- }
- }
- /* take the first timer available */
- for (i = 0; i < sfi_mtimer_num;) {
- if (!sfi_mtimer_usage[i]) {
- sfi_mtimer_usage[i] = 1;
- return &sfi_mtimer_array[i];
- }
- i++;
- }
- return NULL;
-}
-
-void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
-{
- int i;
- for (i = 0; i < sfi_mtimer_num;) {
- if (mtmr->irq == sfi_mtimer_array[i].irq) {
- sfi_mtimer_usage[i] = 0;
- return;
- }
- i++;
- }
-}
-
-/* parse all the mrtc info to a global mrtc array */
-int __init sfi_parse_mrtc(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_rtc_table_entry *pentry;
- struct mpc_intsrc mp_irq;
-
- int totallen;
-
- sb = (struct sfi_table_simple *)table;
- if (!sfi_mrtc_num) {
- sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
- struct sfi_rtc_table_entry);
- pentry = (struct sfi_rtc_table_entry *)sb->pentry;
- totallen = sfi_mrtc_num * sizeof(*pentry);
- memcpy(sfi_mrtc_array, pentry, totallen);
- }
-
- pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
- pentry = sfi_mrtc_array;
- for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
- pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
- totallen, (u32)pentry->phys_addr, pentry->irq);
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = 0xf; /* level trigger and active low */
- mp_irq.srcbus = MP_BUS_ISA;
- mp_irq.srcbusirq = pentry->irq; /* IRQ */
- mp_irq.dstapic = MP_APIC_ALL;
- mp_irq.dstirq = pentry->irq;
- mp_save_irq(&mp_irq);
- }
- return 0;
-}
-
-static unsigned long __init mrst_calibrate_tsc(void)
-{
- unsigned long fast_calibrate;
- u32 lo, hi, ratio, fsb;
-
- rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
- pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
- ratio = (hi >> 8) & 0x1f;
- pr_debug("ratio is %d\n", ratio);
- if (!ratio) {
- pr_err("read a zero ratio, should be incorrect!\n");
- pr_err("force tsc ratio to 16 ...\n");
- ratio = 16;
- }
- rdmsr(MSR_FSB_FREQ, lo, hi);
- if ((lo & 0x7) == 0x7)
- fsb = PENWELL_FSB_FREQ_83SKU;
- else
- fsb = PENWELL_FSB_FREQ_100SKU;
- fast_calibrate = ratio * fsb;
- pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
- lapic_timer_frequency = fsb * 1000 / HZ;
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
-
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
-}
-
-static void __init mrst_time_init(void)
-{
- sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
- switch (mrst_timer_options) {
- case MRST_TIMER_APBT_ONLY:
- break;
- case MRST_TIMER_LAPIC_APBT:
- x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
- x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
- break;
- default:
- if (!boot_cpu_has(X86_FEATURE_ARAT))
- break;
- x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
- x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
- return;
- }
- /* we need at least one APB timer */
- pre_init_apic_IRQ0();
- apbt_time_init();
-}
-
-static void __cpuinit mrst_arch_setup(void)
-{
- if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
- __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- else {
- pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
- boot_cpu_data.x86, boot_cpu_data.x86_model);
- __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- }
-}
-
-/* MID systems don't have i8042 controller */
-static int mrst_i8042_detect(void)
-{
- return 0;
-}
-
-/*
- * Moorestown does not have external NMI source nor port 0x61 to report
- * NMI status. The possible NMI sources are from pmu as a result of NMI
- * watchdog or lock debug. Reading io port 0x61 results in 0xff which
- * misled NMI handler.
- */
-static unsigned char mrst_get_nmi_reason(void)
-{
- return 0;
-}
-
-/*
- * Moorestown specific x86_init function overrides and early setup
- * calls.
- */
-void __init x86_mrst_early_setup(void)
-{
- x86_init.resources.probe_roms = x86_init_noop;
- x86_init.resources.reserve_resources = x86_init_noop;
-
- x86_init.timers.timer_init = mrst_time_init;
- x86_init.timers.setup_percpu_clockev = x86_init_noop;
-
- x86_init.irqs.pre_vector_init = x86_init_noop;
-
- x86_init.oem.arch_setup = mrst_arch_setup;
-
- x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
-
- x86_platform.calibrate_tsc = mrst_calibrate_tsc;
- x86_platform.i8042_detect = mrst_i8042_detect;
- x86_init.timers.wallclock_init = mrst_rtc_init;
- x86_platform.get_nmi_reason = mrst_get_nmi_reason;
-
- x86_init.pci.init = pci_mrst_init;
- x86_init.pci.fixup_irqs = x86_init_noop;
-
- legacy_pic = &null_legacy_pic;
-
- /* Moorestown specific power_off/restart method */
- pm_power_off = mrst_power_off;
- machine_ops.emergency_restart = mrst_reboot;
-
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- set_bit(MP_BUS_ISA, mp_bus_not_pci);
-}
-
-/*
- * if user does not want to use per CPU apb timer, just give it a lower rating
- * than local apic timer and skip the late per cpu timer init.
- */
-static inline int __init setup_x86_mrst_timer(char *arg)
-{
- if (!arg)
- return -EINVAL;
-
- if (strcmp("apbt_only", arg) == 0)
- mrst_timer_options = MRST_TIMER_APBT_ONLY;
- else if (strcmp("lapic_and_apbt", arg) == 0)
- mrst_timer_options = MRST_TIMER_LAPIC_APBT;
- else {
- pr_warning("X86 MRST timer option %s not recognised"
- " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
- arg);
- return -EINVAL;
- }
- return 0;
-}
-__setup("x86_mrst_timer=", setup_x86_mrst_timer);
-
-/*
- * Parsing GPIO table first, since the DEVS table will need this table
- * to map the pin name to the actual pin.
- */
-static struct sfi_gpio_table_entry *gpio_table;
-static int gpio_num_entry;
-
-static int __init sfi_parse_gpio(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_gpio_table_entry *pentry;
- int num, i;
-
- if (gpio_table)
- return 0;
- sb = (struct sfi_table_simple *)table;
- num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
- pentry = (struct sfi_gpio_table_entry *)sb->pentry;
-
- gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL);
- if (!gpio_table)
- return -1;
- memcpy(gpio_table, pentry, num * sizeof(*pentry));
- gpio_num_entry = num;
-
- pr_debug("GPIO pin info:\n");
- for (i = 0; i < num; i++, pentry++)
- pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
- " pin = %d\n", i,
- pentry->controller_name,
- pentry->pin_name,
- pentry->pin_no);
- return 0;
-}
-
-static int get_gpio_by_name(const char *name)
-{
- struct sfi_gpio_table_entry *pentry = gpio_table;
- int i;
-
- if (!pentry)
- return -1;
- for (i = 0; i < gpio_num_entry; i++, pentry++) {
- if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
- return pentry->pin_no;
- }
- return -1;
-}
-
-/*
- * Here defines the array of devices platform data that IAFW would export
- * through SFI "DEVS" table, we use name and type to match the device and
- * its platform data.
- */
-struct devs_id {
- char name[SFI_NAME_LEN + 1];
- u8 type;
- u8 delay;
- void *(*get_platform_data)(void *info);
-};
-
-/* the offset for the mapping of global gpio pin to irq */
-#define MRST_IRQ_OFFSET 0x100
-
-static void __init *pmic_gpio_platform_data(void *info)
-{
- static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
- int gpio_base = get_gpio_by_name("pmic_gpio_base");
-
- if (gpio_base == -1)
- gpio_base = 64;
- pmic_gpio_pdata.gpio_base = gpio_base;
- pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
- pmic_gpio_pdata.gpiointr = 0xffffeff8;
-
- return &pmic_gpio_pdata;
-}
-
-static void __init *max3111_platform_data(void *info)
-{
- struct spi_board_info *spi_info = info;
- int intr = get_gpio_by_name("max3111_int");
-
- spi_info->mode = SPI_MODE_0;
- if (intr == -1)
- return NULL;
- spi_info->irq = intr + MRST_IRQ_OFFSET;
- return NULL;
-}
-
-/* we have multiple max7315 on the board ... */
-#define MAX7315_NUM 2
-static void __init *max7315_platform_data(void *info)
-{
- static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
- static int nr;
- struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
- struct i2c_board_info *i2c_info = info;
- int gpio_base, intr;
- char base_pin_name[SFI_NAME_LEN + 1];
- char intr_pin_name[SFI_NAME_LEN + 1];
-
- if (nr == MAX7315_NUM) {
- pr_err("too many max7315s, we only support %d\n",
- MAX7315_NUM);
- return NULL;
- }
- /* we have several max7315 on the board, we only need load several
- * instances of the same pca953x driver to cover them
- */
- strcpy(i2c_info->type, "max7315");
- if (nr++) {
- sprintf(base_pin_name, "max7315_%d_base", nr);
- sprintf(intr_pin_name, "max7315_%d_int", nr);
- } else {
- strcpy(base_pin_name, "max7315_base");
- strcpy(intr_pin_name, "max7315_int");
- }
-
- gpio_base = get_gpio_by_name(base_pin_name);
- intr = get_gpio_by_name(intr_pin_name);
-
- if (gpio_base == -1)
- return NULL;
- max7315->gpio_base = gpio_base;
- if (intr != -1) {
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
- } else {
- i2c_info->irq = -1;
- max7315->irq_base = -1;
- }
- return max7315;
-}
-
-static void *tca6416_platform_data(void *info)
-{
- static struct pca953x_platform_data tca6416;
- struct i2c_board_info *i2c_info = info;
- int gpio_base, intr;
- char base_pin_name[SFI_NAME_LEN + 1];
- char intr_pin_name[SFI_NAME_LEN + 1];
-
- strcpy(i2c_info->type, "tca6416");
- strcpy(base_pin_name, "tca6416_base");
- strcpy(intr_pin_name, "tca6416_int");
-
- gpio_base = get_gpio_by_name(base_pin_name);
- intr = get_gpio_by_name(intr_pin_name);
-
- if (gpio_base == -1)
- return NULL;
- tca6416.gpio_base = gpio_base;
- if (intr != -1) {
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET;
- } else {
- i2c_info->irq = -1;
- tca6416.irq_base = -1;
- }
- return &tca6416;
-}
-
-static void *mpu3050_platform_data(void *info)
-{
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("mpu3050_int");
-
- if (intr == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- return NULL;
-}
-
-static void __init *emc1403_platform_data(void *info)
-{
- static short intr2nd_pdata;
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("thermal_int");
- int intr2nd = get_gpio_by_name("thermal_alert");
-
- if (intr == -1 || intr2nd == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
-
- return &intr2nd_pdata;
-}
-
-static void __init *lis331dl_platform_data(void *info)
-{
- static short intr2nd_pdata;
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("accel_int");
- int intr2nd = get_gpio_by_name("accel_2");
-
- if (intr == -1 || intr2nd == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
-
- return &intr2nd_pdata;
-}
-
-static void __init *no_platform_data(void *info)
-{
- return NULL;
-}
-
-static struct resource msic_resources[] = {
- {
- .start = INTEL_MSIC_IRQ_PHYS_BASE,
- .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
- .flags = IORESOURCE_MEM,
- },
-};
-
-static struct intel_msic_platform_data msic_pdata;
-
-static struct platform_device msic_device = {
- .name = "intel_msic",
- .id = -1,
- .dev = {
- .platform_data = &msic_pdata,
- },
- .num_resources = ARRAY_SIZE(msic_resources),
- .resource = msic_resources,
-};
-
-static inline bool mrst_has_msic(void)
-{
- return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL;
-}
-
-static int msic_scu_status_change(struct notifier_block *nb,
- unsigned long code, void *data)
-{
- if (code == SCU_DOWN) {
- platform_device_unregister(&msic_device);
- return 0;
- }
-
- return platform_device_register(&msic_device);
-}
-
-static int __init msic_init(void)
-{
- static struct notifier_block msic_scu_notifier = {
- .notifier_call = msic_scu_status_change,
- };
-
- /*
- * We need to be sure that the SCU IPC is ready before MSIC device
- * can be registered.
- */
- if (mrst_has_msic())
- intel_scu_notifier_add(&msic_scu_notifier);
-
- return 0;
-}
-arch_initcall(msic_init);
-
-/*
- * msic_generic_platform_data - sets generic platform data for the block
- * @info: pointer to the SFI device table entry for this block
- * @block: MSIC block
- *
- * Function sets IRQ number from the SFI table entry for given device to
- * the MSIC platform data.
- */
-static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
-{
- struct sfi_device_table_entry *entry = info;
-
- BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
- msic_pdata.irq[block] = entry->irq;
-
- return no_platform_data(info);
-}
-
-static void *msic_battery_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
-}
-
-static void *msic_gpio_platform_data(void *info)
-{
- static struct intel_msic_gpio_pdata pdata;
- int gpio = get_gpio_by_name("msic_gpio_base");
-
- if (gpio < 0)
- return NULL;
-
- pdata.gpio_base = gpio;
- msic_pdata.gpio = &pdata;
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
-}
-
-static void *msic_audio_platform_data(void *info)
-{
- struct platform_device *pdev;
-
- pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- pr_err("failed to create audio platform device\n");
- return NULL;
- }
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
-}
-
-static void *msic_power_btn_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
-}
-
-static void *msic_ocd_platform_data(void *info)
-{
- static struct intel_msic_ocd_pdata pdata;
- int gpio = get_gpio_by_name("ocd_gpio");
-
- if (gpio < 0)
- return NULL;
-
- pdata.gpio = gpio;
- msic_pdata.ocd = &pdata;
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
-}
-
-static void *msic_thermal_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
-}
-
-/* tc35876x DSI-LVDS bridge chip and panel platform data */
-static void *tc35876x_platform_data(void *data)
-{
- static struct tc35876x_platform_data pdata;
-
- /* gpio pins set to -1 will not be used by the driver */
- pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
- pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
- pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
-
- return &pdata;
-}
-
-static const struct devs_id __initconst device_ids[] = {
- {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
- {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
- {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data},
- {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
- {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
- {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
- {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data},
- {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
- {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
- {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
- {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
- {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
-
- /* MSIC subdevices */
- {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
- {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data},
- {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
- {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
- {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
- {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
-
- {},
-};
-
-#define MAX_IPCDEVS 24
-static struct platform_device *ipc_devs[MAX_IPCDEVS];
-static int ipc_next_dev;
-
-#define MAX_SCU_SPI 24
-static struct spi_board_info *spi_devs[MAX_SCU_SPI];
-static int spi_next_dev;
-
-#define MAX_SCU_I2C 24
-static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
-static int i2c_bus[MAX_SCU_I2C];
-static int i2c_next_dev;
-
-static void __init intel_scu_device_register(struct platform_device *pdev)
-{
- if(ipc_next_dev == MAX_IPCDEVS)
- pr_err("too many SCU IPC devices");
- else
- ipc_devs[ipc_next_dev++] = pdev;
-}
-
-static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
-{
- struct spi_board_info *new_dev;
-
- if (spi_next_dev == MAX_SCU_SPI) {
- pr_err("too many SCU SPI devices");
- return;
- }
-
- new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
- if (!new_dev) {
- pr_err("failed to alloc mem for delayed spi dev %s\n",
- sdev->modalias);
- return;
- }
- memcpy(new_dev, sdev, sizeof(*sdev));
-
- spi_devs[spi_next_dev++] = new_dev;
-}
-
-static void __init intel_scu_i2c_device_register(int bus,
- struct i2c_board_info *idev)
-{
- struct i2c_board_info *new_dev;
-
- if (i2c_next_dev == MAX_SCU_I2C) {
- pr_err("too many SCU I2C devices");
- return;
- }
-
- new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
- if (!new_dev) {
- pr_err("failed to alloc mem for delayed i2c dev %s\n",
- idev->type);
- return;
- }
- memcpy(new_dev, idev, sizeof(*idev));
-
- i2c_bus[i2c_next_dev] = bus;
- i2c_devs[i2c_next_dev++] = new_dev;
-}
-
-BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
-EXPORT_SYMBOL_GPL(intel_scu_notifier);
-
-/* Called by IPC driver */
-void intel_scu_devices_create(void)
-{
- int i;
-
- for (i = 0; i < ipc_next_dev; i++)
- platform_device_add(ipc_devs[i]);
-
- for (i = 0; i < spi_next_dev; i++)
- spi_register_board_info(spi_devs[i], 1);
-
- for (i = 0; i < i2c_next_dev; i++) {
- struct i2c_adapter *adapter;
- struct i2c_client *client;
-
- adapter = i2c_get_adapter(i2c_bus[i]);
- if (adapter) {
- client = i2c_new_device(adapter, i2c_devs[i]);
- if (!client)
- pr_err("can't create i2c device %s\n",
- i2c_devs[i]->type);
- } else
- i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
- }
- intel_scu_notifier_post(SCU_AVAILABLE, NULL);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_create);
-
-/* Called by IPC driver */
-void intel_scu_devices_destroy(void)
-{
- int i;
-
- intel_scu_notifier_post(SCU_DOWN, NULL);
-
- for (i = 0; i < ipc_next_dev; i++)
- platform_device_del(ipc_devs[i]);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
-
-static void __init install_irq_resource(struct platform_device *pdev, int irq)
-{
- /* Single threaded */
- static struct resource __initdata res = {
- .name = "IRQ",
- .flags = IORESOURCE_IRQ,
- };
- res.start = irq;
- platform_device_add_resources(pdev, &res, 1);
-}
-
-static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
-{
- const struct devs_id *dev = device_ids;
- struct platform_device *pdev;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_IPC &&
- !strncmp(dev->name, entry->name, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(entry);
- break;
- }
- dev++;
- }
-
- /*
- * On Medfield the platform device creation is handled by the MSIC
- * MFD driver so we don't need to do it here.
- */
- if (mrst_has_msic())
- return;
-
- pdev = platform_device_alloc(entry->name, 0);
- if (pdev == NULL) {
- pr_err("out of memory for SFI platform device '%s'.\n",
- entry->name);
- return;
- }
- install_irq_resource(pdev, entry->irq);
-
- pdev->dev.platform_data = pdata;
- intel_scu_device_register(pdev);
-}
-
-static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
-{
- const struct devs_id *dev = device_ids;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_SPI &&
- !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(spi_info);
- break;
- }
- dev++;
- }
- spi_info->platform_data = pdata;
- if (dev->delay)
- intel_scu_spi_device_register(spi_info);
- else
- spi_register_board_info(spi_info, 1);
-}
-
-static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
-{
- const struct devs_id *dev = device_ids;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_I2C &&
- !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(i2c_info);
- break;
- }
- dev++;
- }
- i2c_info->platform_data = pdata;
-
- if (dev->delay)
- intel_scu_i2c_device_register(bus, i2c_info);
- else
- i2c_register_board_info(bus, i2c_info, 1);
- }
-
-
-static int __init sfi_parse_devs(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_device_table_entry *pentry;
- struct spi_board_info spi_info;
- struct i2c_board_info i2c_info;
- int num, i, bus;
- int ioapic;
- struct io_apic_irq_attr irq_attr;
-
- sb = (struct sfi_table_simple *)table;
- num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
- pentry = (struct sfi_device_table_entry *)sb->pentry;
-
- for (i = 0; i < num; i++, pentry++) {
- int irq = pentry->irq;
-
- if (irq != (u8)0xff) { /* native RTE case */
- /* these SPI2 devices are not exposed to system as PCI
- * devices, but they have separate RTE entry in IOAPIC
- * so we have to enable them one by one here
- */
- ioapic = mp_find_ioapic(irq);
- irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = irq;
- irq_attr.trigger = 1;
- irq_attr.polarity = 1;
- io_apic_set_pci_routing(NULL, irq, &irq_attr);
- } else
- irq = 0; /* No irq */
-
- switch (pentry->type) {
- case SFI_DEV_TYPE_IPC:
- pr_debug("info[%2d]: IPC bus, name = %16.16s, "
- "irq = 0x%2x\n", i, pentry->name, pentry->irq);
- sfi_handle_ipc_dev(pentry);
- break;
- case SFI_DEV_TYPE_SPI:
- memset(&spi_info, 0, sizeof(spi_info));
- strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
- spi_info.irq = irq;
- spi_info.bus_num = pentry->host_num;
- spi_info.chip_select = pentry->addr;
- spi_info.max_speed_hz = pentry->max_freq;
- pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
- "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
- spi_info.bus_num,
- spi_info.modalias,
- spi_info.irq,
- spi_info.max_speed_hz,
- spi_info.chip_select);
- sfi_handle_spi_dev(&spi_info);
- break;
- case SFI_DEV_TYPE_I2C:
- memset(&i2c_info, 0, sizeof(i2c_info));
- bus = pentry->host_num;
- strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
- i2c_info.irq = irq;
- i2c_info.addr = pentry->addr;
- pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
- "irq = 0x%2x, addr = 0x%x\n", i, bus,
- i2c_info.type,
- i2c_info.irq,
- i2c_info.addr);
- sfi_handle_i2c_dev(bus, &i2c_info);
- break;
- case SFI_DEV_TYPE_UART:
- case SFI_DEV_TYPE_HSI:
- default:
- ;
- }
- }
- return 0;
-}
-
-static int __init mrst_platform_init(void)
-{
- sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
- sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
- return 0;
-}
-arch_initcall(mrst_platform_init);
-
-/*
- * we will search these buttons in SFI GPIO table (by name)
- * and register them dynamically. Please add all possible
- * buttons here, we will shrink them if no GPIO found.
- */
-static struct gpio_keys_button gpio_button[] = {
- {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
- {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
- {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
- {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
- {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
- {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
- {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
- {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
- {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
- {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
-};
-
-static struct gpio_keys_platform_data mrst_gpio_keys = {
- .buttons = gpio_button,
- .rep = 1,
- .nbuttons = -1, /* will fill it after search */
-};
-
-static struct platform_device pb_device = {
- .name = "gpio-keys",
- .id = -1,
- .dev = {
- .platform_data = &mrst_gpio_keys,
- },
-};
-
-/*
- * Shrink the non-existent buttons, register the gpio button
- * device if there is some
- */
-static int __init pb_keys_init(void)
-{
- struct gpio_keys_button *gb = gpio_button;
- int i, num, good = 0;
-
- num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
- for (i = 0; i < num; i++) {
- gb[i].gpio = get_gpio_by_name(gb[i].desc);
- pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio);
- if (gb[i].gpio == -1)
- continue;
-
- if (i != good)
- gb[good] = gb[i];
- good++;
- }
-
- if (good) {
- mrst_gpio_keys.nbuttons = good;
- return platform_device_register(&pb_device);
- }
- return 0;
-}
-late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
deleted file mode 100644
index 5e355b134ba4..000000000000
--- a/arch/x86/platform/mrst/vrtc.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * vrtc.c: Driver for virtual RTC device on Intel MID platform
- *
- * (C) Copyright 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * Note:
- * VRTC is emulated by system controller firmware, the real HW
- * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
- * in a memory mapped IO space that is visible to the host IA
- * processor.
- *
- * This driver is based on RTC CMOS driver.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/platform_device.h>
-
-#include <asm/mrst.h>
-#include <asm/mrst-vrtc.h>
-#include <asm/time.h>
-#include <asm/fixmap.h>
-
-static unsigned char __iomem *vrtc_virt_base;
-
-unsigned char vrtc_cmos_read(unsigned char reg)
-{
- unsigned char retval;
-
- /* vRTC's registers range from 0x0 to 0xD */
- if (reg > 0xd || !vrtc_virt_base)
- return 0xff;
-
- lock_cmos_prefix(reg);
- retval = __raw_readb(vrtc_virt_base + (reg << 2));
- lock_cmos_suffix(reg);
- return retval;
-}
-EXPORT_SYMBOL_GPL(vrtc_cmos_read);
-
-void vrtc_cmos_write(unsigned char val, unsigned char reg)
-{
- if (reg > 0xd || !vrtc_virt_base)
- return;
-
- lock_cmos_prefix(reg);
- __raw_writeb(val, vrtc_virt_base + (reg << 2));
- lock_cmos_suffix(reg);
-}
-EXPORT_SYMBOL_GPL(vrtc_cmos_write);
-
-void vrtc_get_time(struct timespec *now)
-{
- u8 sec, min, hour, mday, mon;
- unsigned long flags;
- u32 year;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
- cpu_relax();
-
- sec = vrtc_cmos_read(RTC_SECONDS);
- min = vrtc_cmos_read(RTC_MINUTES);
- hour = vrtc_cmos_read(RTC_HOURS);
- mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
- mon = vrtc_cmos_read(RTC_MONTH);
- year = vrtc_cmos_read(RTC_YEAR);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- /* vRTC YEAR reg contains the offset to 1972 */
- year += 1972;
-
- printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
- "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
-
- now->tv_sec = mktime(year, mon, mday, hour, min, sec);
- now->tv_nsec = 0;
-}
-
-int vrtc_set_mmss(const struct timespec *now)
-{
- unsigned long flags;
- struct rtc_time tm;
- int year;
- int retval = 0;
-
- rtc_time_to_tm(now->tv_sec, &tm);
- if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
- /*
- * tm.year is the number of years since 1900, and the
- * vrtc need the years since 1972.
- */
- year = tm.tm_year - 72;
- spin_lock_irqsave(&rtc_lock, flags);
- vrtc_cmos_write(year, RTC_YEAR);
- vrtc_cmos_write(tm.tm_mon, RTC_MONTH);
- vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH);
- vrtc_cmos_write(tm.tm_hour, RTC_HOURS);
- vrtc_cmos_write(tm.tm_min, RTC_MINUTES);
- vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
- spin_unlock_irqrestore(&rtc_lock, flags);
- } else {
- printk(KERN_ERR
- "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
- __FUNCTION__, now->tv_sec);
- retval = -EINVAL;
- }
- return retval;
-}
-
-void __init mrst_rtc_init(void)
-{
- unsigned long vrtc_paddr;
-
- sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-
- vrtc_paddr = sfi_mrtc_array[0].phys_addr;
- if (!sfi_mrtc_num || !vrtc_paddr)
- return;
-
- vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
- vrtc_paddr);
- x86_platform.get_wallclock = vrtc_get_time;
- x86_platform.set_wallclock = vrtc_set_mmss;
-}
-
-/*
- * The Moorestown platform has a memory mapped virtual RTC device that emulates
- * the programming interface of the RTC.
- */
-
-static struct resource vrtc_resources[] = {
- [0] = {
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .flags = IORESOURCE_IRQ,
- }
-};
-
-static struct platform_device vrtc_device = {
- .name = "rtc_mrst",
- .id = -1,
- .resource = vrtc_resources,
- .num_resources = ARRAY_SIZE(vrtc_resources),
-};
-
-/* Register the RTC device if appropriate */
-static int __init mrst_device_create(void)
-{
- /* No Moorestown, no device */
- if (!mrst_identify_cpu())
- return -ENODEV;
- /* No timer, no device */
- if (!sfi_mrtc_num)
- return -ENODEV;
-
- /* iomem resource */
- vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
- vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
- MRST_VRTC_MAP_SZ;
- /* irq resource */
- vrtc_resources[1].start = sfi_mrtc_array[0].irq;
- vrtc_resources[1].end = sfi_mrtc_array[0].irq;
-
- return platform_device_register(&vrtc_device);
-}
-
-module_init(mrst_device_create);
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index fd332c533947..049f92a9379d 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o
obj-$(CONFIG_OLPC_XO1_PM) += olpc-xo1-pm.o xo1-wakeup.o
obj-$(CONFIG_OLPC_XO1_RTC) += olpc-xo1-rtc.o
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index ff0174dda810..424eeae12759 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for power management features of the OLPC XO-1 laptop
*
@@ -5,18 +6,12 @@
* Copyright (C) 2010 One Laptop per Child
* Copyright (C) 2006 Red Hat, Inc.
* Copyright (C) 2006 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/cs5535.h>
#include <linux/platform_device.h>
#include <linux/export.h>
#include <linux/pm.h>
-#include <linux/mfd/core.h>
#include <linux/suspend.h>
#include <linux/olpc-ec.h>
@@ -75,9 +70,9 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
return 0;
}
-asmlinkage int xo1_do_sleep(u8 sleep_state)
+asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
- void *pgd_addr = __va(read_cr3());
+ void *pgd_addr = __va(read_cr3_pa());
/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
@@ -124,16 +119,11 @@ static const struct platform_suspend_ops xo1_suspend_ops = {
static int xo1_pm_probe(struct platform_device *pdev)
{
struct resource *res;
- int err;
/* don't run on non-XOs */
if (!machine_is_olpc())
return -ENODEV;
- err = mfd_cell_enable(pdev);
- if (err)
- return err;
-
res = platform_get_resource(pdev, IORESOURCE_IO, 0);
if (!res) {
dev_err(&pdev->dev, "can't fetch device resource info\n");
@@ -154,23 +144,19 @@ static int xo1_pm_probe(struct platform_device *pdev)
return 0;
}
-static int xo1_pm_remove(struct platform_device *pdev)
+static void xo1_pm_remove(struct platform_device *pdev)
{
- mfd_cell_disable(pdev);
-
if (strcmp(pdev->name, "cs5535-pms") == 0)
pms_base = 0;
else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
acpi_base = 0;
pm_power_off = NULL;
- return 0;
}
static struct platform_driver cs5535_pms_driver = {
.driver = {
.name = "cs5535-pms",
- .owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
.remove = xo1_pm_remove,
@@ -179,7 +165,6 @@ static struct platform_driver cs5535_pms_driver = {
static struct platform_driver cs5535_acpi_driver = {
.driver = {
.name = "olpc-xo1-pm-acpi",
- .owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
.remove = xo1_pm_remove,
diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c
index a2b4efddd61a..ee77d57bcab7 100644
--- a/arch/x86/platform/olpc/olpc-xo1-rtc.c
+++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1 Real Time Clock (RTC)
*
* Copyright (C) 2011 One Laptop per Child
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/mc146818rtc.h>
@@ -16,6 +12,7 @@
#include <asm/msr.h>
#include <asm/olpc.h>
+#include <asm/x86_init.h>
static void rtc_wake_on(struct device *dev)
{
@@ -67,14 +64,16 @@ static int __init xo1_rtc_init(void)
of_node_put(node);
pr_info("olpc-xo1-rtc: Initializing OLPC XO-1 RTC\n");
- rdmsrl(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm);
- rdmsrl(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm);
- rdmsrl(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century);
+ rdmsrq(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm);
+ rdmsrq(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm);
+ rdmsrq(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century);
r = platform_device_register(&xo1_rtc_device);
if (r)
return r;
+ x86_platform.legacy.rtc = 0;
+
device_init_wakeup(&xo1_rtc_device.dev, 1);
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 9a2e590dd202..30751b42d54e 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1 System Control Interrupts (SCI)
*
* Copyright (C) 2010 One Laptop per Child
* Copyright (C) 2006 Red Hat, Inc.
* Copyright (C) 2006 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/cs5535.h>
@@ -18,8 +14,6 @@
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
-#include <linux/pm_wakeup.h>
-#include <linux/mfd/core.h>
#include <linux/power_supply.h>
#include <linux/suspend.h>
#include <linux/workqueue.h>
@@ -57,21 +51,21 @@ static const char * const lid_wake_mode_names[] = {
static void battery_status_changed(void)
{
- struct power_supply *psy = power_supply_get_by_name("olpc-battery");
+ struct power_supply *psy = power_supply_get_by_name("olpc_battery");
if (psy) {
power_supply_changed(psy);
- put_device(psy->dev);
+ power_supply_put(psy);
}
}
static void ac_status_changed(void)
{
- struct power_supply *psy = power_supply_get_by_name("olpc-ac");
+ struct power_supply *psy = power_supply_get_by_name("olpc_ac");
if (psy) {
power_supply_changed(psy);
- put_device(psy->dev);
+ power_supply_put(psy);
}
}
@@ -85,7 +79,7 @@ static void send_ebook_state(void)
return;
}
- if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
+ if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state)
return; /* Nothing new to report. */
input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
@@ -109,7 +103,7 @@ static void detect_lid_state(void)
* the edge detector hookup on the gpio inputs on the geode is
* odd, to say the least. See http://dev.laptop.org/ticket/5703
* for details, but in a nutshell: we don't use the edge
- * detectors. instead, we make use of an anomoly: with the both
+ * detectors. instead, we make use of an anomaly: with the both
* edge detectors turned off, we still get an edge event on a
* positive edge transition. to take advantage of this, we use the
* front-end inverter to ensure that that's the edge we're always
@@ -161,6 +155,12 @@ static ssize_t lid_wake_mode_set(struct device *dev,
static DEVICE_ATTR(lid_wake_mode, S_IWUSR | S_IRUGO, lid_wake_mode_show,
lid_wake_mode_set);
+static struct attribute *lid_attrs[] = {
+ &dev_attr_lid_wake_mode.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(lid);
+
/*
* Process all items in the EC's SCI queue.
*
@@ -325,7 +325,7 @@ static int setup_sci_interrupt(struct platform_device *pdev)
dev_info(&pdev->dev, "SCI unmapped. Mapping to IRQ 3\n");
sci_irq = 3;
lo |= 0x00300000;
- wrmsrl(0x51400020, lo);
+ wrmsrq(0x51400020, lo);
}
/* Select level triggered in PIC */
@@ -514,17 +514,8 @@ static int setup_lid_switch(struct platform_device *pdev)
goto err_register;
}
- r = device_create_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
- if (r) {
- dev_err(&pdev->dev, "failed to create wake mode attr: %d\n", r);
- goto err_create_attr;
- }
-
return 0;
-err_create_attr:
- input_unregister_device(lid_switch_idev);
- lid_switch_idev = NULL;
err_register:
input_free_device(lid_switch_idev);
return r;
@@ -532,7 +523,6 @@ err_register:
static void free_lid_switch(void)
{
- device_remove_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode);
input_unregister_device(lid_switch_idev);
}
@@ -545,10 +535,6 @@ static int xo1_sci_probe(struct platform_device *pdev)
if (!machine_is_olpc())
return -ENODEV;
- r = mfd_cell_enable(pdev);
- if (r)
- return r;
-
res = platform_get_resource(pdev, IORESOURCE_IO, 0);
if (!res) {
dev_err(&pdev->dev, "can't fetch device resource info\n");
@@ -611,9 +597,8 @@ err_ebook:
return r;
}
-static int xo1_sci_remove(struct platform_device *pdev)
+static void xo1_sci_remove(struct platform_device *pdev)
{
- mfd_cell_disable(pdev);
free_irq(sci_irq, pdev);
cancel_work_sync(&sci_work);
free_ec_sci();
@@ -622,12 +607,12 @@ static int xo1_sci_remove(struct platform_device *pdev)
free_ebook_switch();
free_power_button();
acpi_base = 0;
- return 0;
}
static struct platform_driver xo1_sci_driver = {
.driver = {
.name = "olpc-xo1-sci-acpi",
+ .dev_groups = lid_groups,
},
.probe = xo1_sci_probe,
.remove = xo1_sci_remove,
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index fef7d0ba7e3a..68244a3422d1 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1.5 System Control Interrupts (SCI)
*
* Copyright (C) 2009-2010 One Laptop per Child
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/device.h>
@@ -15,8 +11,7 @@
#include <linux/power_supply.h>
#include <linux/olpc-ec.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
+#include <linux/acpi.h>
#include <asm/olpc.h>
#define DRV_NAME "olpc-xo15-sci"
@@ -32,7 +27,7 @@ static bool lid_wake_on_close;
* wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
*
* We provide here a sysfs attribute that will additionally enable
- * wake-on-close behavior. This is useful (e.g.) when we oportunistically
+ * wake-on-close behavior. This is useful (e.g.) when we opportunistically
* suspend with the display running; if the lid is then closed, we want to
* wake up to turn the display off.
*
@@ -40,18 +35,11 @@ static bool lid_wake_on_close;
*/
static int set_lid_wake_behavior(bool wake_on_close)
{
- struct acpi_object_list arg_list;
- union acpi_object arg;
acpi_status status;
- arg_list.count = 1;
- arg_list.pointer = &arg;
- arg.type = ACPI_TYPE_INTEGER;
- arg.integer.value = wake_on_close;
-
- status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
+ status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
if (ACPI_FAILURE(status)) {
- pr_warning(PFX "failed to set lid behavior\n");
+ pr_warn(PFX "failed to set lid behavior\n");
return 1;
}
@@ -87,21 +75,21 @@ static struct kobj_attribute lid_wake_on_close_attr =
static void battery_status_changed(void)
{
- struct power_supply *psy = power_supply_get_by_name("olpc-battery");
+ struct power_supply *psy = power_supply_get_by_name("olpc_battery");
if (psy) {
power_supply_changed(psy);
- put_device(psy->dev);
+ power_supply_put(psy);
}
}
static void ac_status_changed(void)
{
- struct power_supply *psy = power_supply_get_by_name("olpc-ac");
+ struct power_supply *psy = power_supply_get_by_name("olpc_ac");
if (psy) {
power_supply_changed(psy);
- put_device(psy->dev);
+ power_supply_put(psy);
}
}
@@ -195,15 +183,15 @@ err_sysfs:
return r;
}
-static int xo15_sci_remove(struct acpi_device *device)
+static void xo15_sci_remove(struct acpi_device *device)
{
acpi_disable_gpe(NULL, xo15_sci_gpe);
acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
cancel_work_sync(&sci_work);
sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
- return 0;
}
+#ifdef CONFIG_PM_SLEEP
static int xo15_sci_resume(struct device *dev)
{
/* Enable all EC events */
@@ -215,6 +203,7 @@ static int xo15_sci_resume(struct device *dev)
return 0;
}
+#endif
static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume);
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 27376081ddec..1d4a00e767ec 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -1,18 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for the OLPC DCON and OLPC EC access
*
* Copyright © 2006 Advanced Micro Devices, Inc.
* Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/string.h>
@@ -30,9 +26,6 @@
struct olpc_platform_t olpc_platform_info;
EXPORT_SYMBOL_GPL(olpc_platform_info);
-/* EC event mask to be applied during suspend (defining wakeup sources). */
-static u16 ec_wakeup_mask;
-
/* what the timeout *should* be (in ms) */
#define EC_BASE_TIMEOUT 20
@@ -186,83 +179,6 @@ err:
return ret;
}
-void olpc_ec_wakeup_set(u16 value)
-{
- ec_wakeup_mask |= value;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set);
-
-void olpc_ec_wakeup_clear(u16 value)
-{
- ec_wakeup_mask &= ~value;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear);
-
-/*
- * Returns true if the compile and runtime configurations allow for EC events
- * to wake the system.
- */
-bool olpc_ec_wakeup_available(void)
-{
- if (!machine_is_olpc())
- return false;
-
- /*
- * XO-1 EC wakeups are available when olpc-xo1-sci driver is
- * compiled in
- */
-#ifdef CONFIG_OLPC_XO1_SCI
- if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */
- return true;
-#endif
-
- /*
- * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is
- * compiled in
- */
-#ifdef CONFIG_OLPC_XO15_SCI
- if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */
- return true;
-#endif
-
- return false;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available);
-
-int olpc_ec_mask_write(u16 bits)
-{
- if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
- __be16 ec_word = cpu_to_be16(bits);
- return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2,
- NULL, 0);
- } else {
- unsigned char ec_byte = bits & 0xff;
- return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0);
- }
-}
-EXPORT_SYMBOL_GPL(olpc_ec_mask_write);
-
-int olpc_ec_sci_query(u16 *sci_value)
-{
- int ret;
-
- if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
- __be16 ec_word;
- ret = olpc_ec_cmd(EC_EXT_SCI_QUERY,
- NULL, 0, (void *) &ec_word, 2);
- if (ret == 0)
- *sci_value = be16_to_cpu(ec_word);
- } else {
- unsigned char ec_byte;
- ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1);
- if (ret == 0)
- *sci_value = ec_byte;
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
-
static bool __init check_ofw_architecture(struct device_node *root)
{
const char *olpc_arch;
@@ -296,6 +212,10 @@ static bool __init platform_detect(void)
if (success) {
olpc_platform_info.boardrev = get_board_revision(root);
olpc_platform_info.flags |= OLPC_F_PRESENT;
+
+ pr_info("OLPC board revision %s%X\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4);
}
of_node_put(root);
@@ -311,33 +231,12 @@ static int __init add_xo1_platform_devices(void)
return PTR_ERR(pdev);
pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
- return 0;
+ return PTR_ERR_OR_ZERO(pdev);
}
-static int olpc_xo1_ec_probe(struct platform_device *pdev)
-{
- /* get the EC revision */
- olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
- (unsigned char *) &olpc_platform_info.ecver, 1);
-
- /* EC version 0x5f adds support for wide SCI mask */
- if (olpc_platform_info.ecver >= 0x5f)
- olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
-
- pr_info("OLPC board revision %s%X (EC=%x)\n",
- ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
- olpc_platform_info.boardrev >> 4,
- olpc_platform_info.ecver);
-
- return 0;
-}
static int olpc_xo1_ec_suspend(struct platform_device *pdev)
{
- olpc_ec_mask_write(ec_wakeup_mask);
-
/*
* Squelch SCIs while suspended. This is a fix for
* <http://dev.laptop.org/ticket/1835>.
@@ -361,15 +260,27 @@ static int olpc_xo1_ec_resume(struct platform_device *pdev)
}
static struct olpc_ec_driver ec_xo1_driver = {
- .probe = olpc_xo1_ec_probe,
.suspend = olpc_xo1_ec_suspend,
.resume = olpc_xo1_ec_resume,
.ec_cmd = olpc_xo1_ec_cmd,
+#ifdef CONFIG_OLPC_XO1_SCI
+ /*
+ * XO-1 EC wakeups are available when olpc-xo1-sci driver is
+ * compiled in
+ */
+ .wakeup_available = true,
+#endif
};
static struct olpc_ec_driver ec_xo1_5_driver = {
- .probe = olpc_xo1_ec_probe,
.ec_cmd = olpc_xo1_ec_cmd,
+#ifdef CONFIG_OLPC_XO15_SCI
+ /*
+ * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is
+ * compiled in
+ */
+ .wakeup_available = true,
+#endif
};
static int __init olpc_init(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index d6ee92986920..e108ce7dad6a 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* OLPC-specific OFW device tree support code.
*
@@ -9,17 +10,11 @@
*
* Adapted for sparc by David S. Miller davem@davemloft.net
* Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/of_pdt.h>
#include <asm/olpc.h>
#include <asm/olpc_ofw.h>
@@ -136,13 +131,12 @@ void * __init prom_early_alloc(unsigned long size)
const size_t chunk_size = max(PAGE_SIZE, size);
/*
- * To mimimize the number of allocations, grab at least
+ * To minimize the number of allocations, grab at least
* PAGE_SIZE of memory (that's an arbitrary choice that's
* fast enough on the platforms we care about while minimizing
* wasted bootmem) and hand off chunks of it to callers.
*/
- res = alloc_bootmem(chunk_size);
- BUG_ON(!res);
+ res = memblock_alloc_or_panic(chunk_size, SMP_CACHE_BYTES);
prom_early_allocated += chunk_size;
memset(res, 0, chunk_size);
free_mem = chunk_size;
@@ -218,10 +212,25 @@ static u32 __init olpc_dt_get_board_revision(void)
return be32_to_cpu(rev);
}
-void __init olpc_dt_fixup(void)
+static int __init olpc_dt_compatible_match(phandle node, const char *compat)
+{
+ char buf[64], *p;
+ int plen;
+
+ plen = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf));
+ if (plen <= 0)
+ return 0;
+
+ for (p = buf; p < buf + plen; p += strlen(p) + 1) {
+ if (strcmp(p, compat) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void __init olpc_dt_fixup(void)
{
- int r;
- char buf[64];
phandle node;
u32 board_rev;
@@ -229,41 +238,66 @@ void __init olpc_dt_fixup(void)
if (!node)
return;
- /*
- * If the battery node has a compatible property, we are running a new
- * enough firmware and don't have fixups to make.
- */
- r = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf));
- if (r > 0)
- return;
-
- pr_info("PROM DT: Old firmware detected, applying fixes\n");
-
- /* Add olpc,xo1-battery compatible marker to battery node */
- olpc_dt_interpret("\" /battery@0\" find-device"
- " \" olpc,xo1-battery\" +compatible"
- " device-end");
-
board_rev = olpc_dt_get_board_revision();
if (!board_rev)
return;
if (board_rev >= olpc_board_pre(0xd0)) {
- /* XO-1.5: add dcon device */
- olpc_dt_interpret("\" /pci/display@1\" find-device"
- " new-device"
- " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible"
- " finish-device device-end");
+ /* XO-1.5 */
+
+ if (olpc_dt_compatible_match(node, "olpc,xo1.5-battery"))
+ return;
+
+ /* Add olpc,xo1.5-battery compatible marker to battery node */
+ olpc_dt_interpret("\" /battery@0\" find-device");
+ olpc_dt_interpret(" \" olpc,xo1.5-battery\" +compatible");
+ olpc_dt_interpret("device-end");
+
+ if (olpc_dt_compatible_match(node, "olpc,xo1-battery")) {
+ /*
+ * If we have a olpc,xo1-battery compatible, then we're
+ * running a new enough firmware that already has
+ * the dcon node.
+ */
+ return;
+ }
+
+ /* Add dcon device */
+ olpc_dt_interpret("\" /pci/display@1\" find-device");
+ olpc_dt_interpret(" new-device");
+ olpc_dt_interpret(" \" dcon\" device-name");
+ olpc_dt_interpret(" \" olpc,xo1-dcon\" +compatible");
+ olpc_dt_interpret(" finish-device");
+ olpc_dt_interpret("device-end");
} else {
- /* XO-1: add dcon device, mark RTC as olpc,xo1-rtc */
- olpc_dt_interpret("\" /pci/display@1,1\" find-device"
- " new-device"
- " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible"
- " finish-device device-end"
- " \" /rtc\" find-device"
- " \" olpc,xo1-rtc\" +compatible"
- " device-end");
+ /* XO-1 */
+
+ if (olpc_dt_compatible_match(node, "olpc,xo1-battery")) {
+ /*
+ * If we have a olpc,xo1-battery compatible, then we're
+ * running a new enough firmware that already has
+ * the dcon and RTC nodes.
+ */
+ return;
+ }
+
+ /* Add dcon device, mark RTC as olpc,xo1-rtc */
+ olpc_dt_interpret("\" /pci/display@1,1\" find-device");
+ olpc_dt_interpret(" new-device");
+ olpc_dt_interpret(" \" dcon\" device-name");
+ olpc_dt_interpret(" \" olpc,xo1-dcon\" +compatible");
+ olpc_dt_interpret(" finish-device");
+ olpc_dt_interpret("device-end");
+
+ olpc_dt_interpret("\" /rtc\" find-device");
+ olpc_dt_interpret(" \" olpc,xo1-rtc\" +compatible");
+ olpc_dt_interpret("device-end");
}
+
+ /* Add olpc,xo1-battery compatible marker to battery node */
+ olpc_dt_interpret("\" /battery@0\" find-device");
+ olpc_dt_interpret(" \" olpc,xo1-battery\" +compatible");
+ olpc_dt_interpret("device-end");
}
void __init olpc_dt_build_devicetree(void)
@@ -285,20 +319,3 @@ void __init olpc_dt_build_devicetree(void)
pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
prom_early_allocated);
}
-
-/* A list of DT node/bus matches that we want to expose as platform devices */
-static struct of_device_id __initdata of_ids[] = {
- { .compatible = "olpc,xo1-battery" },
- { .compatible = "olpc,xo1-dcon" },
- { .compatible = "olpc,xo1-rtc" },
- {},
-};
-
-static int __init olpc_create_platform_devices(void)
-{
- if (machine_is_olpc())
- return of_platform_bus_probe(NULL, of_ids, NULL);
- else
- return 0;
-}
-device_initcall(olpc_create_platform_devices);
diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c
index e7604f62870d..6bab0f0aa8f3 100644
--- a/arch/x86/platform/olpc/olpc_ofw.c
+++ b/arch/x86/platform/olpc/olpc_ofw.c
@@ -1,10 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/spinlock_types.h>
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/page.h>
#include <asm/setup.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
#include <asm/olpc_ofw.h>
/* address of OFW callback interface; will be NULL if OFW isn't found */
diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
index 948deb289753..3a5abffe5660 100644
--- a/arch/x86/platform/olpc/xo1-wakeup.S
+++ b/arch/x86/platform/olpc/xo1-wakeup.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
.text
#include <linux/linkage.h>
#include <asm/segment.h>
@@ -76,7 +77,7 @@ save_registers:
pushfl
popl saved_context_eflags
- ret
+ RET
restore_registers:
movl saved_context_ebp, %ebp
@@ -87,9 +88,9 @@ restore_registers:
pushl saved_context_eflags
popfl
- ret
+ RET
-ENTRY(do_olpc_suspend_lowlevel)
+SYM_CODE_START(do_olpc_suspend_lowlevel)
call save_processor_state
call save_registers
@@ -108,7 +109,8 @@ ret_point:
call restore_registers
call restore_processor_state
- ret
+ RET
+SYM_CODE_END(do_olpc_suspend_lowlevel)
.data
saved_gdt: .long 0,0
diff --git a/arch/x86/platform/pvh/Makefile b/arch/x86/platform/pvh/Makefile
new file mode 100644
index 000000000000..c43fb7964dc4
--- /dev/null
+++ b/arch/x86/platform/pvh/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+OBJECT_FILES_NON_STANDARD_head.o := y
+KASAN_SANITIZE := n
+
+obj-$(CONFIG_PVH) += enlighten.o
+obj-$(CONFIG_PVH) += head.o
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
new file mode 100644
index 000000000000..2263885d16ba
--- /dev/null
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/acpi.h>
+
+#include <xen/hvc-console.h>
+
+#include <asm/bootparam.h>
+#include <asm/io_apic.h>
+#include <asm/hypervisor.h>
+#include <asm/e820/api.h>
+#include <asm/x86_init.h>
+
+#include <asm/xen/interface.h>
+
+#include <xen/xen.h>
+#include <xen/interface/hvm/start_info.h>
+
+/*
+ * PVH variables.
+ *
+ * pvh_bootparams and pvh_start_info need to live in a data segment since
+ * they are used after startup_{32|64}, which clear .bss, are invoked.
+ */
+struct boot_params __initdata pvh_bootparams;
+struct hvm_start_info __initdata pvh_start_info;
+
+const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info);
+
+static u64 __init pvh_get_root_pointer(void)
+{
+ return pvh_start_info.rsdp_paddr;
+}
+
+/*
+ * Xen guests are able to obtain the memory map from the hypervisor via the
+ * HYPERVISOR_memory_op hypercall.
+ * If we are trying to boot a Xen PVH guest, it is expected that the kernel
+ * will have been configured to provide an override for this routine to do
+ * just that.
+ */
+void __init __weak mem_map_via_hcall(struct boot_params *ptr __maybe_unused)
+{
+ xen_raw_printk("Error: Could not find memory map\n");
+ BUG();
+}
+
+static void __init init_pvh_bootparams(bool xen_guest)
+{
+ if ((pvh_start_info.version > 0) && (pvh_start_info.memmap_entries)) {
+ struct hvm_memmap_table_entry *ep;
+ int i;
+
+ ep = __va(pvh_start_info.memmap_paddr);
+ pvh_bootparams.e820_entries = pvh_start_info.memmap_entries;
+
+ for (i = 0; i < pvh_bootparams.e820_entries ; i++, ep++) {
+ pvh_bootparams.e820_table[i].addr = ep->addr;
+ pvh_bootparams.e820_table[i].size = ep->size;
+ pvh_bootparams.e820_table[i].type = ep->type;
+ }
+ } else if (xen_guest) {
+ mem_map_via_hcall(&pvh_bootparams);
+ } else {
+ /* Non-xen guests are not supported by version 0 */
+ BUG();
+ }
+
+ if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
+ pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
+ ISA_START_ADDRESS;
+ pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size =
+ ISA_END_ADDRESS - ISA_START_ADDRESS;
+ pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
+ E820_TYPE_RESERVED;
+ pvh_bootparams.e820_entries++;
+ } else
+ xen_raw_printk("Warning: Can fit ISA range into e820\n");
+
+ pvh_bootparams.hdr.cmd_line_ptr =
+ pvh_start_info.cmdline_paddr;
+
+ /* The first module is always ramdisk. */
+ if (pvh_start_info.nr_modules) {
+ struct hvm_modlist_entry *modaddr =
+ __va(pvh_start_info.modlist_paddr);
+ pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+ pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+ }
+
+ /*
+ * See Documentation/arch/x86/boot.rst.
+ *
+ * Version 2.12 supports Xen entry point but we will use default x86/PC
+ * environment (i.e. hardware_subarch 0).
+ */
+ pvh_bootparams.hdr.version = (2 << 8) | 12;
+ pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
+
+ x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
+}
+
+/*
+ * If we are trying to boot a Xen PVH guest, it is expected that the kernel
+ * will have been configured to provide the required override for this routine.
+ */
+void __init __weak xen_pvh_init(struct boot_params *boot_params)
+{
+ xen_raw_printk("Error: Missing xen PVH initialization\n");
+ BUG();
+}
+
+static void __init hypervisor_specific_init(bool xen_guest)
+{
+ if (xen_guest)
+ xen_pvh_init(&pvh_bootparams);
+}
+
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later.
+ */
+void __init xen_prepare_pvh(void)
+{
+
+ u32 msr = xen_cpuid_base();
+ bool xen_guest = !!msr;
+
+ if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+ xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+ pvh_start_info.magic);
+ BUG();
+ }
+
+ /*
+ * This must not compile to "call memset" because memset() may be
+ * instrumented.
+ */
+ __builtin_memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+
+ hypervisor_specific_init(xen_guest);
+
+ init_pvh_bootparams(xen_guest);
+}
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
new file mode 100644
index 000000000000..344030c1a81d
--- /dev/null
+++ b/arch/x86/platform/pvh/head.S
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+ .code32
+ .text
+#ifdef CONFIG_X86_32
+#define _pa(x) ((x) - __START_KERNEL_map)
+#endif
+#define rva(x) ((x) - pvh_start_xen)
+
+#include <linux/elfnote.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/desc_defs.h>
+#include <asm/segment.h>
+#include <asm/asm.h>
+#include <asm/boot.h>
+#include <asm/pgtable.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
+#include <xen/interface/elfnote.h>
+
+ __INIT
+
+/*
+ * Entry point for PVH guests.
+ *
+ * Xen ABI specifies the following register state when we come here:
+ *
+ * - `ebx`: contains the physical memory address where the loader has placed
+ * the boot start info structure.
+ * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
+ * - `cr4`: all bits are cleared.
+ * - `cs `: must be a 32-bit read/execute code segment with a base of `0`
+ * and a limit of `0xFFFFFFFF`. The selector value is unspecified.
+ * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
+ * `0` and a limit of `0xFFFFFFFF`. The selector values are all
+ * unspecified.
+ * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
+ * of '0x67'.
+ * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
+ * Bit 8 (TF) must be cleared. Other bits are all unspecified.
+ *
+ * All other processor registers and flag bits are unspecified. The OS is in
+ * charge of setting up its own stack, GDT and IDT.
+ */
+
+#define PVH_GDT_ENTRY_CS 1
+#define PVH_GDT_ENTRY_DS 2
+#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8)
+#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
+
+SYM_CODE_START(pvh_start_xen)
+ UNWIND_HINT_END_OF_STACK
+ cld
+
+ /*
+ * See the comment for startup_32 for more details. We need to
+ * execute a call to get the execution address to be position
+ * independent, but we don't have a stack. Save and restore the
+ * magic field of start_info in ebx, and use that as the stack.
+ */
+ mov (%ebx), %eax
+ leal 4(%ebx), %esp
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+1: popl %ebp
+ mov %eax, (%ebx)
+ subl $rva(1b), %ebp
+ movl $0, %esp
+
+ leal rva(gdt)(%ebp), %eax
+ addl %eax, 2(%eax)
+ lgdt (%eax)
+
+ mov $PVH_DS_SEL,%eax
+ mov %eax,%ds
+ mov %eax,%es
+ mov %eax,%ss
+
+ /* Stash hvm_start_info. */
+ leal rva(pvh_start_info)(%ebp), %edi
+ mov %ebx, %esi
+ movl rva(pvh_start_info_sz)(%ebp), %ecx
+ shr $2,%ecx
+ rep movsl
+
+ leal rva(early_stack_end)(%ebp), %esp
+
+ /* Enable PAE mode. */
+ mov %cr4, %eax
+ orl $X86_CR4_PAE, %eax
+ mov %eax, %cr4
+
+#ifdef CONFIG_X86_64
+ /* Enable Long mode. */
+ mov $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /*
+ * Reuse the non-relocatable symbol emitted for the ELF note to
+ * subtract the build time physical address of pvh_start_xen() from
+ * its actual runtime address, without relying on absolute 32-bit ELF
+ * relocations, as these are not supported by the linker when running
+ * in -pie mode, and should be avoided in .head.text in general.
+ */
+ mov %ebp, %ebx
+ subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx
+ jz .Lpagetable_done
+
+ /*
+ * Store the resulting load offset in phys_base. __pa() needs
+ * phys_base set to calculate the hypercall page in xen_pvh_init().
+ */
+ movl %ebx, rva(phys_base)(%ebp)
+
+ /* Fixup page-tables for relocation. */
+ leal rva(pvh_init_top_pgt)(%ebp), %edi
+ movl $PTRS_PER_PGD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+ /* L3 ident has a single entry. */
+ leal rva(pvh_level3_ident_pgt)(%ebp), %edi
+ addl %ebx, 0x00(%edi)
+
+ leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
+ addl %ebx, (PAGE_SIZE - 16)(%edi)
+ addl %ebx, (PAGE_SIZE - 8)(%edi)
+
+ /* pvh_level2_ident_pgt is fine - large pages */
+
+ /* pvh_level2_kernel_pgt needs adjustment - large pages */
+ leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
+ movl $PTRS_PER_PMD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+.Lpagetable_done:
+ /* Enable pre-constructed page tables. */
+ leal rva(pvh_init_top_pgt)(%ebp), %eax
+ mov %eax, %cr3
+ mov $(X86_CR0_PG | X86_CR0_PE), %eax
+ mov %eax, %cr0
+
+ /* Jump to 64-bit mode. */
+ pushl $PVH_CS_SEL
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
+
+ /* 64-bit entry point. */
+ .code64
+1:
+ UNWIND_HINT_END_OF_STACK
+
+ /*
+ * Set up GSBASE.
+ * Note that on SMP the boot CPU uses the init data section until
+ * the per-CPU areas are set up.
+ */
+ movl $MSR_GS_BASE,%ecx
+ xorl %eax, %eax
+ xorl %edx, %edx
+ wrmsr
+
+ /* Call xen_prepare_pvh() via the kernel virtual mapping */
+ leaq xen_prepare_pvh(%rip), %rax
+ subq phys_base(%rip), %rax
+ addq $__START_KERNEL_map, %rax
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rax
+
+ /* startup_64 expects boot_params in %rsi. */
+ lea pvh_bootparams(%rip), %rsi
+ jmp startup_64
+
+#else /* CONFIG_X86_64 */
+
+ call mk_early_pgtbl_32
+
+ mov $_pa(initial_page_table), %eax
+ mov %eax, %cr3
+
+ mov %cr0, %eax
+ or $(X86_CR0_PG | X86_CR0_PE), %eax
+ mov %eax, %cr0
+
+ ljmp $PVH_CS_SEL, $1f
+1:
+ call xen_prepare_pvh
+ mov $_pa(pvh_bootparams), %esi
+
+ /* startup_32 doesn't expect paging and PAE to be on. */
+ ljmp $PVH_CS_SEL, $_pa(2f)
+2:
+ mov %cr0, %eax
+ and $~X86_CR0_PG, %eax
+ mov %eax, %cr0
+ mov %cr4, %eax
+ and $~X86_CR4_PAE, %eax
+ mov %eax, %cr4
+
+ ljmp $PVH_CS_SEL, $_pa(startup_32)
+#endif
+SYM_CODE_END(pvh_start_xen)
+
+ .section ".init.data","aw"
+ .balign 8
+SYM_DATA_START_LOCAL(gdt)
+ .word gdt_end - gdt_start - 1
+ .long gdt_start - gdt
+ .word 0
+SYM_DATA_END(gdt)
+SYM_DATA_START_LOCAL(gdt_start)
+ .quad 0x0000000000000000 /* NULL descriptor */
+#ifdef CONFIG_X86_64
+ .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */
+#else
+ .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */
+#endif
+ .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */
+SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
+
+ .balign 16
+SYM_DATA_START_LOCAL(early_stack)
+ .fill BOOT_STACK_SIZE, 1, 0
+SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
+
+#ifdef CONFIG_X86_64
+/*
+ * Xen PVH needs a set of identity mapped and kernel high mapping
+ * page tables. pvh_start_xen starts running on the identity mapped
+ * page tables, but xen_prepare_pvh calls into the high mapping.
+ * These page tables need to be relocatable and are only used until
+ * startup_64 transitions to init_top_pgt.
+ */
+SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(pvh_init_top_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
+ .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .fill 511, 8, 0
+SYM_DATA_END(pvh_level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
+ /*
+ * Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ *
+ * Note: This sets _PAGE_GLOBAL despite whether
+ * the CPU supports it or it is enabled. But,
+ * the CPU should ignore the bit.
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(pvh_level2_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
+ .fill L3_START_KERNEL, 8, 0
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+ .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .quad 0 /* no fixmap */
+SYM_DATA_END(pvh_level3_kernel_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
+ /*
+ * Kernel high mapping.
+ *
+ * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
+ * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
+ * 512 MiB otherwise.
+ *
+ * (NOTE: after that starts the module area, see MODULES_VADDR.)
+ *
+ * This table is eventually used by the kernel during normal runtime.
+ * Care must be taken to clear out undesired bits later, like _PAGE_RW
+ * or _PAGE_GLOBAL in some cases.
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
+SYM_DATA_END(pvh_level2_kernel_pgt)
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
+ .long CONFIG_PHYSICAL_ALIGN;
+ .long LOAD_PHYSICAL_ADDR;
+ .long KERNEL_IMAGE_SIZE - 1)
+#endif
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry;
+ xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .)
diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile
index 762b4c7f4314..981b3e4302e6 100644
--- a/arch/x86/platform/scx200/Makefile
+++ b/arch/x86/platform/scx200/Makefile
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SCx200) += scx200.o
scx200-y += scx200_32.o
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 3dc9aee41d91..80662b72035d 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
*
diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile
deleted file mode 100644
index cc5db1168a5e..000000000000
--- a/arch/x86/platform/sfi/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_SFI) += sfi.o
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
deleted file mode 100644
index bcd1a703e3e6..000000000000
--- a/arch/x86/platform/sfi/sfi.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * sfi.c - x86 architecture SFI support.
- *
- * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#define KMSG_COMPONENT "SFI"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/io.h>
-
-#include <asm/io_apic.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-
-/* All CPUs enumerated by SFI must be present and enabled */
-static void __init mp_sfi_register_lapic(u8 id)
-{
- if (MAX_LOCAL_APIC - id <= 0) {
- pr_warning("Processor #%d invalid (max %d)\n",
- id, MAX_LOCAL_APIC);
- return;
- }
-
- pr_info("registering lapic[%d]\n", id);
-
- generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
-}
-
-static int __init sfi_parse_cpus(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_cpu_table_entry *pentry;
- int i;
- int cpu_num;
-
- sb = (struct sfi_table_simple *)table;
- cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
- pentry = (struct sfi_cpu_table_entry *)sb->pentry;
-
- for (i = 0; i < cpu_num; i++) {
- mp_sfi_register_lapic(pentry->apic_id);
- pentry++;
- }
-
- smp_found_config = 1;
- return 0;
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#ifdef CONFIG_X86_IO_APIC
-
-static int __init sfi_parse_ioapic(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_apic_table_entry *pentry;
- int i, num;
-
- sb = (struct sfi_table_simple *)table;
- num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
- pentry = (struct sfi_apic_table_entry *)sb->pentry;
-
- for (i = 0; i < num; i++) {
- mp_register_ioapic(i, pentry->phys_addr, gsi_top);
- pentry++;
- }
-
- WARN(pic_mode, KERN_WARNING
- "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
- pic_mode = 0;
- return 0;
-}
-#endif /* CONFIG_X86_IO_APIC */
-
-/*
- * sfi_platform_init(): register lapics & io-apics
- */
-int __init sfi_platform_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- register_lapic_address(sfi_lapic_addr);
- sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
-#endif
-#ifdef CONFIG_X86_IO_APIC
- sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
-#endif
- return 0;
-}
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
index c54e348c96a7..910fe9e3ffb4 100644
--- a/arch/x86/platform/ts5500/Makefile
+++ b/arch/x86/platform/ts5500/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index 39febb214e8c..0b67da056fd9 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -1,30 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Technologic Systems TS-5500 Single Board Computer support
*
- * Copyright (C) 2013 Savoir-faire Linux Inc.
+ * Copyright (C) 2013-2014 Savoir-faire Linux Inc.
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option) any later
- * version.
- *
- *
* This driver registers the Technologic Systems TS-5500 Single Board Computer
* (SBC) and its devices, and exposes information to userspace such as jumpers'
* state or available options. For further information about sysfs entries, see
* Documentation/ABI/testing/sysfs-platform-ts5500.
*
- * This code actually supports the TS-5500 platform, but it may be extended to
- * support similar Technologic Systems x86-based platforms, such as the TS-5600.
+ * This code may be extended to support similar x86-based platforms.
+ * Actually, the TS-5500 and TS-5400 are supported.
*/
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/leds.h>
-#include <linux/module.h>
-#include <linux/platform_data/gpio-ts5500.h>
+#include <linux/init.h>
#include <linux/platform_data/max197.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -32,6 +26,7 @@
/* Product code register */
#define TS5500_PRODUCT_CODE_ADDR 0x74
#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
+#define TS5400_PRODUCT_CODE 0x40 /* TS-5400 product code */
/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
#define TS5500_SRAM_RS485_ADC_ADDR 0x75
@@ -66,6 +61,7 @@
/**
* struct ts5500_sbc - TS-5500 board description
+ * @name: Board model name.
* @id: Board product ID.
* @sram: Flag for SRAM option.
* @rs485: Flag for RS-485 option.
@@ -75,6 +71,7 @@
* @jumpers: Bitfield for jumpers' state.
*/
struct ts5500_sbc {
+ const char *name;
int id;
bool sram;
bool rs485;
@@ -88,7 +85,7 @@ struct ts5500_sbc {
static const struct {
const char * const string;
const ssize_t offset;
-} ts5500_signatures[] __initdata = {
+} ts5500_signatures[] __initconst = {
{ "TS-5x00 AMD Elan", 0xb14 },
};
@@ -122,13 +119,16 @@ static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
return -EBUSY;
- tmp = inb(TS5500_PRODUCT_CODE_ADDR);
- if (tmp != TS5500_PRODUCT_CODE) {
- pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp);
+ sbc->id = inb(TS5500_PRODUCT_CODE_ADDR);
+ if (sbc->id == TS5500_PRODUCT_CODE) {
+ sbc->name = "TS-5500";
+ } else if (sbc->id == TS5400_PRODUCT_CODE) {
+ sbc->name = "TS-5400";
+ } else {
+ pr_err("ts5500: unknown product code 0x%x\n", sbc->id);
ret = -ENODEV;
goto cleanup;
}
- sbc->id = tmp;
tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
sbc->sram = tmp & TS5500_SRAM;
@@ -147,48 +147,52 @@ cleanup:
return ret;
}
-static ssize_t ts5500_show_id(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t name_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct ts5500_sbc *sbc = dev_get_drvdata(dev);
- return sprintf(buf, "0x%.2x\n", sbc->id);
+ return sprintf(buf, "%s\n", sbc->name);
}
+static DEVICE_ATTR_RO(name);
-static ssize_t ts5500_show_jumpers(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct ts5500_sbc *sbc = dev_get_drvdata(dev);
- return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
+ return sprintf(buf, "0x%.2x\n", sbc->id);
}
+static DEVICE_ATTR_RO(id);
-#define TS5500_SHOW(field) \
- static ssize_t ts5500_show_##field(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
- { \
- struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
- return sprintf(buf, "%d\n", sbc->field); \
- }
-
-TS5500_SHOW(sram)
-TS5500_SHOW(rs485)
-TS5500_SHOW(adc)
-TS5500_SHOW(ereset)
-TS5500_SHOW(itr)
+static ssize_t jumpers_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct ts5500_sbc *sbc = dev_get_drvdata(dev);
-static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL);
-static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL);
-static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL);
-static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL);
-static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL);
-static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL);
-static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL);
+ return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
+}
+static DEVICE_ATTR_RO(jumpers);
+
+#define TS5500_ATTR_BOOL(_field) \
+ static ssize_t _field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+ { \
+ struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
+ \
+ return sprintf(buf, "%d\n", sbc->_field); \
+ } \
+ static DEVICE_ATTR_RO(_field)
+
+TS5500_ATTR_BOOL(sram);
+TS5500_ATTR_BOOL(rs485);
+TS5500_ATTR_BOOL(adc);
+TS5500_ATTR_BOOL(ereset);
+TS5500_ATTR_BOOL(itr);
static struct attribute *ts5500_attributes[] = {
&dev_attr_id.attr,
+ &dev_attr_name.attr,
&dev_attr_jumpers.attr,
&dev_attr_sram.attr,
&dev_attr_rs485.attr,
@@ -311,12 +315,14 @@ static int __init ts5500_init(void)
if (err)
goto error;
- ts5500_dio1_pdev.dev.parent = &pdev->dev;
- if (platform_device_register(&ts5500_dio1_pdev))
- dev_warn(&pdev->dev, "DIO1 block registration failed\n");
- ts5500_dio2_pdev.dev.parent = &pdev->dev;
- if (platform_device_register(&ts5500_dio2_pdev))
- dev_warn(&pdev->dev, "DIO2 block registration failed\n");
+ if (sbc->id == TS5500_PRODUCT_CODE) {
+ ts5500_dio1_pdev.dev.parent = &pdev->dev;
+ if (platform_device_register(&ts5500_dio1_pdev))
+ dev_warn(&pdev->dev, "DIO1 block registration failed\n");
+ ts5500_dio2_pdev.dev.parent = &pdev->dev;
+ if (platform_device_register(&ts5500_dio2_pdev))
+ dev_warn(&pdev->dev, "DIO2 block registration failed\n");
+ }
if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
dev_warn(&pdev->dev, "LED registration failed\n");
@@ -333,7 +339,3 @@ error:
return err;
}
device_initcall(ts5500_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
-MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
index 6c40995fefb8..1441dda8edf7 100644
--- a/arch/x86/platform/uv/Makefile
+++ b/arch/x86/platform/uv/Makefile
@@ -1 +1,2 @@
-obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_time.o uv_nmi.o
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 766612137a62..bf31af3d32d6 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -1,76 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BIOS run time interface routines.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson <rja@sgi.com>
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/slab.h>
#include <asm/efi.h>
#include <linux/io.h>
+#include <asm/pgalloc.h>
#include <asm/uv/bios.h>
#include <asm/uv/uv_hub.h>
-static struct uv_systab uv_systab;
+unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+struct uv_systab *uv_systab;
+
+static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
{
- struct uv_systab *tab = &uv_systab;
+ struct uv_systab *tab = uv_systab;
s64 ret;
- if (!tab->function)
+ if (!tab || !tab->function)
/*
* BIOS does not support UV systab
*/
return BIOS_STATUS_UNIMPLEMENTED;
- ret = efi_call6((void *)__va(tab->function), (u64)which,
- a1, a2, a3, a4, a5);
+ ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+
return ret;
}
-EXPORT_SYMBOL_GPL(uv_bios_call);
-s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
- u64 a4, u64 a5)
+static s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4,
+ u64 a5)
{
- unsigned long bios_flags;
s64 ret;
- local_irq_save(bios_flags);
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
- local_irq_restore(bios_flags);
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+ up(&__efi_uv_runtime_lock);
return ret;
}
-s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
- u64 a4, u64 a5)
+static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
{
+ unsigned long bios_flags;
s64 ret;
- preempt_disable();
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
- preempt_enable();
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
+ local_irq_save(bios_flags);
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+ local_irq_restore(bios_flags);
+
+ up(&__efi_uv_runtime_lock);
return ret;
}
-
long sn_partition_id;
EXPORT_SYMBOL_GPL(sn_partition_id);
long sn_coherency_id;
@@ -78,10 +76,7 @@ EXPORT_SYMBOL_GPL(sn_coherency_id);
long sn_region_size;
EXPORT_SYMBOL_GPL(sn_region_size);
long system_serial_number;
-EXPORT_SYMBOL_GPL(system_serial_number);
int uv_type;
-EXPORT_SYMBOL_GPL(uv_type);
-
s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
long *region, long *ssn)
@@ -108,7 +103,6 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
*ssn = v1;
return ret;
}
-EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
int
uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -149,11 +143,8 @@ EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
s64
uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
{
- s64 ret;
-
- ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
- (u64)addr, buf, (u64)len, 0);
- return ret;
+ return uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+ (u64)addr, buf, (u64)len, 0);
}
EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
@@ -162,7 +153,6 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
(u64)ticks_per_second, 0, 0, 0);
}
-EXPORT_SYMBOL_GPL(uv_bios_freq_base);
/*
* uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
@@ -181,36 +171,99 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
(u64)decode, (u64)domain, (u64)bus, 0, 0);
}
-EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
+extern s64 uv_bios_get_master_nasid(u64 size, u64 *master_nasid)
+{
+ return uv_bios_call(UV_BIOS_EXTRA, 0, UV_BIOS_EXTRA_MASTER_NASID, 0,
+ size, (u64)master_nasid);
+}
+EXPORT_SYMBOL_GPL(uv_bios_get_master_nasid);
-#ifdef CONFIG_EFI
-void uv_bios_init(void)
+extern s64 uv_bios_get_heapsize(u64 nasid, u64 size, u64 *heap_size)
{
- struct uv_systab *tab;
+ return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_GET_HEAPSIZE,
+ 0, size, (u64)heap_size);
+}
+EXPORT_SYMBOL_GPL(uv_bios_get_heapsize);
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
- (efi.uv_systab == (unsigned long)NULL)) {
- printk(KERN_CRIT "No EFI UV System Table.\n");
- uv_systab.function = (unsigned long)NULL;
- return;
- }
+extern s64 uv_bios_install_heap(u64 nasid, u64 heap_size, u64 *bios_heap)
+{
+ return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_INSTALL_HEAP,
+ 0, heap_size, (u64)bios_heap);
+}
+EXPORT_SYMBOL_GPL(uv_bios_install_heap);
- tab = (struct uv_systab *)ioremap(efi.uv_systab,
- sizeof(struct uv_systab));
- if (strncmp(tab->signature, "UVST", 4) != 0)
- printk(KERN_ERR "bad signature in UV system table!");
+extern s64 uv_bios_obj_count(u64 nasid, u64 size, u64 *objcnt)
+{
+ return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_OBJECT_COUNT,
+ 0, size, (u64)objcnt);
+}
+EXPORT_SYMBOL_GPL(uv_bios_obj_count);
- /*
- * Copy table to permanent spot for later use.
- */
- memcpy(&uv_systab, tab, sizeof(struct uv_systab));
- iounmap(tab);
+extern s64 uv_bios_enum_objs(u64 nasid, u64 size, u64 *objbuf)
+{
+ return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_OBJECTS,
+ 0, size, (u64)objbuf);
+}
+EXPORT_SYMBOL_GPL(uv_bios_enum_objs);
+
+extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 size, u64 *portbuf)
+{
+ return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_PORTS,
+ obj_id, size, (u64)portbuf);
+}
+EXPORT_SYMBOL_GPL(uv_bios_enum_ports);
+
+extern s64 uv_bios_get_geoinfo(u64 nasid, u64 size, u64 *buf)
+{
+ return uv_bios_call(UV_BIOS_GET_GEOINFO, nasid, (u64)buf, size, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_get_geoinfo);
+
+extern s64 uv_bios_get_pci_topology(u64 size, u64 *buf)
+{
+ return uv_bios_call(UV_BIOS_GET_PCI_TOPOLOGY, (u64)buf, size, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_get_pci_topology);
- printk(KERN_INFO "EFI UV System Table Revision %d\n",
- uv_systab.revision);
+unsigned long get_uv_systab_phys(bool msg)
+{
+ if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
+ !uv_systab_phys || efi_runtime_disabled()) {
+ if (msg)
+ pr_crit("UV: UVsystab: missing\n");
+ return 0;
+ }
+ return uv_systab_phys;
}
-#else /* !CONFIG_EFI */
-void uv_bios_init(void) { }
-#endif
+int uv_bios_init(void)
+{
+ unsigned long uv_systab_phys_addr;
+
+ uv_systab = NULL;
+ uv_systab_phys_addr = get_uv_systab_phys(1);
+ if (!uv_systab_phys_addr)
+ return -EEXIST;
+
+ uv_systab = ioremap(uv_systab_phys_addr, sizeof(struct uv_systab));
+ if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
+ pr_err("UV: UVsystab: bad signature!\n");
+ iounmap(uv_systab);
+ return -EINVAL;
+ }
+
+ /* Starting with UV4 the UV systab size is variable */
+ if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+ int size = uv_systab->size;
+
+ iounmap(uv_systab);
+ uv_systab = ioremap(uv_systab_phys_addr, size);
+ if (!uv_systab) {
+ pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
+ return -EFAULT;
+ }
+ }
+ pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
+ return 0;
+}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
deleted file mode 100644
index 0f92173a12b6..000000000000
--- a/arch/x86/platform/uv/tlb_uv.c
+++ /dev/null
@@ -1,2149 +0,0 @@
-/*
- * SGI UltraViolet TLB flush routines.
- *
- * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-
-#include <asm/mmu_context.h>
-#include <asm/uv/uv.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
-#include <asm/apic.h>
-#include <asm/idle.h>
-#include <asm/tsc.h>
-#include <asm/irq_vectors.h>
-#include <asm/timer.h>
-
-/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
-static int timeout_base_ns[] = {
- 20,
- 160,
- 1280,
- 10240,
- 81920,
- 655360,
- 5242880,
- 167772160
-};
-
-static int timeout_us;
-static int nobau;
-static int nobau_perm;
-static cycles_t congested_cycles;
-
-/* tunables: */
-static int max_concurr = MAX_BAU_CONCURRENT;
-static int max_concurr_const = MAX_BAU_CONCURRENT;
-static int plugged_delay = PLUGGED_DELAY;
-static int plugsb4reset = PLUGSB4RESET;
-static int giveup_limit = GIVEUP_LIMIT;
-static int timeoutsb4reset = TIMEOUTSB4RESET;
-static int ipi_reset_limit = IPI_RESET_LIMIT;
-static int complete_threshold = COMPLETE_THRESHOLD;
-static int congested_respns_us = CONGESTED_RESPONSE_US;
-static int congested_reps = CONGESTED_REPS;
-static int disabled_period = DISABLED_PERIOD;
-
-static struct tunables tunables[] = {
- {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
- {&plugged_delay, PLUGGED_DELAY},
- {&plugsb4reset, PLUGSB4RESET},
- {&timeoutsb4reset, TIMEOUTSB4RESET},
- {&ipi_reset_limit, IPI_RESET_LIMIT},
- {&complete_threshold, COMPLETE_THRESHOLD},
- {&congested_respns_us, CONGESTED_RESPONSE_US},
- {&congested_reps, CONGESTED_REPS},
- {&disabled_period, DISABLED_PERIOD},
- {&giveup_limit, GIVEUP_LIMIT}
-};
-
-static struct dentry *tunables_dir;
-static struct dentry *tunables_file;
-
-/* these correspond to the statistics printed by ptc_seq_show() */
-static char *stat_description[] = {
- "sent: number of shootdown messages sent",
- "stime: time spent sending messages",
- "numuvhubs: number of hubs targeted with shootdown",
- "numuvhubs16: number times 16 or more hubs targeted",
- "numuvhubs8: number times 8 or more hubs targeted",
- "numuvhubs4: number times 4 or more hubs targeted",
- "numuvhubs2: number times 2 or more hubs targeted",
- "numuvhubs1: number times 1 hub targeted",
- "numcpus: number of cpus targeted with shootdown",
- "dto: number of destination timeouts",
- "retries: destination timeout retries sent",
- "rok: : destination timeouts successfully retried",
- "resetp: ipi-style resource resets for plugs",
- "resett: ipi-style resource resets for timeouts",
- "giveup: fall-backs to ipi-style shootdowns",
- "sto: number of source timeouts",
- "bz: number of stay-busy's",
- "throt: number times spun in throttle",
- "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
- "recv: shootdown messages received",
- "rtime: time spent processing messages",
- "all: shootdown all-tlb messages",
- "one: shootdown one-tlb messages",
- "mult: interrupts that found multiple messages",
- "none: interrupts that found no messages",
- "retry: number of retry messages processed",
- "canc: number messages canceled by retries",
- "nocan: number retries that found nothing to cancel",
- "reset: number of ipi-style reset requests processed",
- "rcan: number messages canceled by reset requests",
- "disable: number times use of the BAU was disabled",
- "enable: number times use of the BAU was re-enabled"
-};
-
-static int __init
-setup_nobau(char *arg)
-{
- nobau = 1;
- return 0;
-}
-early_param("nobau", setup_nobau);
-
-/* base pnode in this partition */
-static int uv_base_pnode __read_mostly;
-
-static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-static DEFINE_PER_CPU(struct bau_control, bau_control);
-static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
-
-static void
-set_bau_on(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- if (nobau_perm) {
- pr_info("BAU not initialized; cannot be turned on\n");
- return;
- }
- nobau = 0;
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = 0;
- }
- pr_info("BAU turned on\n");
- return;
-}
-
-static void
-set_bau_off(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- nobau = 1;
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = 1;
- }
- pr_info("BAU turned off\n");
- return;
-}
-
-/*
- * Determine the first node on a uvhub. 'Nodes' are used for kernel
- * memory allocation.
- */
-static int __init uvhub_to_first_node(int uvhub)
-{
- int node, b;
-
- for_each_online_node(node) {
- b = uv_node_to_blade_id(node);
- if (uvhub == b)
- return node;
- }
- return -1;
-}
-
-/*
- * Determine the apicid of the first cpu on a uvhub.
- */
-static int __init uvhub_to_first_apicid(int uvhub)
-{
- int cpu;
-
- for_each_present_cpu(cpu)
- if (uvhub == uv_cpu_to_blade_id(cpu))
- return per_cpu(x86_cpu_to_apicid, cpu);
- return -1;
-}
-
-/*
- * Free a software acknowledge hardware resource by clearing its Pending
- * bit. This will return a reply to the sender.
- * If the message has timed out, a reply has already been sent by the
- * hardware but the resource has not been released. In that case our
- * clear of the Timeout bit (as well) will free the resource. No reply will
- * be sent (the hardware will only do one reply per message).
- */
-static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
- int do_acknowledge)
-{
- unsigned long dw;
- struct bau_pq_entry *msg;
-
- msg = mdp->msg;
- if (!msg->canceled && do_acknowledge) {
- dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
- write_mmr_sw_ack(dw);
- }
- msg->replied_to = 1;
- msg->swack_vec = 0;
-}
-
-/*
- * Process the receipt of a RETRY message
- */
-static void bau_process_retry_msg(struct msg_desc *mdp,
- struct bau_control *bcp)
-{
- int i;
- int cancel_count = 0;
- unsigned long msg_res;
- unsigned long mmr = 0;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_pq_entry *msg2;
- struct ptc_stats *stat = bcp->statp;
-
- stat->d_retries++;
- /*
- * cancel any message from msg+1 to the retry itself
- */
- for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
- if (msg2 > mdp->queue_last)
- msg2 = mdp->queue_first;
- if (msg2 == msg)
- break;
-
- /* same conditions for cancellation as do_reset */
- if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
- (msg2->swack_vec) && ((msg2->swack_vec &
- msg->swack_vec) == 0) &&
- (msg2->sending_cpu == msg->sending_cpu) &&
- (msg2->msg_type != MSG_NOOP)) {
- mmr = read_mmr_sw_ack();
- msg_res = msg2->swack_vec;
- /*
- * This is a message retry; clear the resources held
- * by the previous message only if they timed out.
- * If it has not timed out we have an unexpected
- * situation to report.
- */
- if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
- unsigned long mr;
- /*
- * Is the resource timed out?
- * Make everyone ignore the cancelled message.
- */
- msg2->canceled = 1;
- stat->d_canceled++;
- cancel_count++;
- mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
- write_mmr_sw_ack(mr);
- }
- }
- }
- if (!cancel_count)
- stat->d_nocanceled++;
-}
-
-/*
- * Do all the things a cpu should do for a TLB shootdown message.
- * Other cpu's may come here at the same time for this message.
- */
-static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
- int do_acknowledge)
-{
- short socket_ack_count = 0;
- short *sp;
- struct atomic_short *asp;
- struct ptc_stats *stat = bcp->statp;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_control *smaster = bcp->socket_master;
-
- /*
- * This must be a normal message, or retry of a normal message
- */
- if (msg->address == TLB_FLUSH_ALL) {
- local_flush_tlb();
- stat->d_alltlb++;
- } else {
- __flush_tlb_one(msg->address);
- stat->d_onetlb++;
- }
- stat->d_requestee++;
-
- /*
- * One cpu on each uvhub has the additional job on a RETRY
- * of releasing the resource held by the message that is
- * being retried. That message is identified by sending
- * cpu number.
- */
- if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
- bau_process_retry_msg(mdp, bcp);
-
- /*
- * This is a swack message, so we have to reply to it.
- * Count each responding cpu on the socket. This avoids
- * pinging the count's cache line back and forth between
- * the sockets.
- */
- sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
- asp = (struct atomic_short *)sp;
- socket_ack_count = atom_asr(1, asp);
- if (socket_ack_count == bcp->cpus_in_socket) {
- int msg_ack_count;
- /*
- * Both sockets dump their completed count total into
- * the message's count.
- */
- *sp = 0;
- asp = (struct atomic_short *)&msg->acknowledge_count;
- msg_ack_count = atom_asr(socket_ack_count, asp);
-
- if (msg_ack_count == bcp->cpus_in_uvhub) {
- /*
- * All cpus in uvhub saw it; reply
- * (unless we are in the UV2 workaround)
- */
- reply_to_message(mdp, bcp, do_acknowledge);
- }
- }
-
- return;
-}
-
-/*
- * Determine the first cpu on a pnode.
- */
-static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
-{
- int cpu;
- struct hub_and_pnode *hpp;
-
- for_each_present_cpu(cpu) {
- hpp = &smaster->thp[cpu];
- if (pnode == hpp->pnode)
- return cpu;
- }
- return -1;
-}
-
-/*
- * Last resort when we get a large number of destination timeouts is
- * to clear resources held by a given cpu.
- * Do this with IPI so that all messages in the BAU message queue
- * can be identified by their nonzero swack_vec field.
- *
- * This is entered for a single cpu on the uvhub.
- * The sender want's this uvhub to free a specific message's
- * swack resources.
- */
-static void do_reset(void *ptr)
-{
- int i;
- struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
- struct reset_args *rap = (struct reset_args *)ptr;
- struct bau_pq_entry *msg;
- struct ptc_stats *stat = bcp->statp;
-
- stat->d_resets++;
- /*
- * We're looking for the given sender, and
- * will free its swack resource.
- * If all cpu's finally responded after the timeout, its
- * message 'replied_to' was set.
- */
- for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
- unsigned long msg_res;
- /* do_reset: same conditions for cancellation as
- bau_process_retry_msg() */
- if ((msg->replied_to == 0) &&
- (msg->canceled == 0) &&
- (msg->sending_cpu == rap->sender) &&
- (msg->swack_vec) &&
- (msg->msg_type != MSG_NOOP)) {
- unsigned long mmr;
- unsigned long mr;
- /*
- * make everyone else ignore this message
- */
- msg->canceled = 1;
- /*
- * only reset the resource if it is still pending
- */
- mmr = read_mmr_sw_ack();
- msg_res = msg->swack_vec;
- mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
- if (mmr & msg_res) {
- stat->d_rcanceled++;
- write_mmr_sw_ack(mr);
- }
- }
- }
- return;
-}
-
-/*
- * Use IPI to get all target uvhubs to release resources held by
- * a given sending cpu number.
- */
-static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
-{
- int pnode;
- int apnode;
- int maskbits;
- int sender = bcp->cpu;
- cpumask_t *mask = bcp->uvhub_master->cpumask;
- struct bau_control *smaster = bcp->socket_master;
- struct reset_args reset_args;
-
- reset_args.sender = sender;
- cpus_clear(*mask);
- /* find a single cpu for each uvhub in this distribution mask */
- maskbits = sizeof(struct pnmask) * BITSPERBYTE;
- /* each bit is a pnode relative to the partition base pnode */
- for (pnode = 0; pnode < maskbits; pnode++) {
- int cpu;
- if (!bau_uvhub_isset(pnode, distribution))
- continue;
- apnode = pnode + bcp->partition_base_pnode;
- cpu = pnode_to_first_cpu(apnode, smaster);
- cpu_set(cpu, *mask);
- }
-
- /* IPI all cpus; preemption is already disabled */
- smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
- return;
-}
-
-static inline unsigned long cycles_2_us(unsigned long long cyc)
-{
- unsigned long long ns;
- unsigned long us;
- int cpu = smp_processor_id();
-
- ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
- us = ns / 1000;
- return us;
-}
-
-/*
- * wait for all cpus on this hub to finish their sends and go quiet
- * leaves uvhub_quiesce set so that no new broadcasts are started by
- * bau_flush_send_and_wait()
- */
-static inline void quiesce_local_uvhub(struct bau_control *hmaster)
-{
- atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-/*
- * mark this quiet-requestor as done
- */
-static inline void end_uvhub_quiesce(struct bau_control *hmaster)
-{
- atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
-{
- unsigned long descriptor_status;
-
- descriptor_status = uv_read_local_mmr(mmr_offset);
- descriptor_status >>= right_shift;
- descriptor_status &= UV_ACT_STATUS_MASK;
- return descriptor_status;
-}
-
-/*
- * Wait for completion of a broadcast software ack message
- * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
- */
-static int uv1_wait_completion(struct bau_desc *bau_desc,
- unsigned long mmr_offset, int right_shift,
- struct bau_control *bcp, long try)
-{
- unsigned long descriptor_status;
- cycles_t ttm;
- struct ptc_stats *stat = bcp->statp;
-
- descriptor_status = uv1_read_status(mmr_offset, right_shift);
- /* spin on the status MMR, waiting for it to go idle */
- while ((descriptor_status != DS_IDLE)) {
- /*
- * Our software ack messages may be blocked because
- * there are no swack resources available. As long
- * as none of them has timed out hardware will NACK
- * our message and its state will stay IDLE.
- */
- if (descriptor_status == DS_SOURCE_TIMEOUT) {
- stat->s_stimeout++;
- return FLUSH_GIVEUP;
- } else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
- stat->s_dtimeout++;
- ttm = get_cycles();
-
- /*
- * Our retries may be blocked by all destination
- * swack resources being consumed, and a timeout
- * pending. In that case hardware returns the
- * ERROR that looks like a destination timeout.
- */
- if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
- bcp->conseccompletes = 0;
- return FLUSH_RETRY_PLUGGED;
- }
-
- bcp->conseccompletes = 0;
- return FLUSH_RETRY_TIMEOUT;
- } else {
- /*
- * descriptor_status is still BUSY
- */
- cpu_relax();
- }
- descriptor_status = uv1_read_status(mmr_offset, right_shift);
- }
- bcp->conseccompletes++;
- return FLUSH_COMPLETE;
-}
-
-/*
- * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
- * But not currently used.
- */
-static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
-{
- unsigned long descriptor_status;
-
- descriptor_status =
- ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
- return descriptor_status;
-}
-
-/*
- * Return whether the status of the descriptor that is normally used for this
- * cpu (the one indexed by its hub-relative cpu number) is busy.
- * The status of the original 32 descriptors is always reflected in the 64
- * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
- * The bit provided by the activation_status_2 register is irrelevant to
- * the status if it is only being tested for busy or not busy.
- */
-int normal_busy(struct bau_control *bcp)
-{
- int cpu = bcp->uvhub_cpu;
- int mmr_offset;
- int right_shift;
-
- mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
- right_shift = cpu * UV_ACT_STATUS_SIZE;
- return (((((read_lmmr(mmr_offset) >> right_shift) &
- UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
-}
-
-/*
- * Entered when a bau descriptor has gone into a permanent busy wait because
- * of a hardware bug.
- * Workaround the bug.
- */
-int handle_uv2_busy(struct bau_control *bcp)
-{
- struct ptc_stats *stat = bcp->statp;
-
- stat->s_uv2_wars++;
- bcp->busy = 1;
- return FLUSH_GIVEUP;
-}
-
-static int uv2_wait_completion(struct bau_desc *bau_desc,
- unsigned long mmr_offset, int right_shift,
- struct bau_control *bcp, long try)
-{
- unsigned long descriptor_stat;
- cycles_t ttm;
- int desc = bcp->uvhub_cpu;
- long busy_reps = 0;
- struct ptc_stats *stat = bcp->statp;
-
- descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
-
- /* spin on the status MMR, waiting for it to go idle */
- while (descriptor_stat != UV2H_DESC_IDLE) {
- if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
- /*
- * A h/w bug on the destination side may
- * have prevented the message being marked
- * pending, thus it doesn't get replied to
- * and gets continually nacked until it times
- * out with a SOURCE_TIMEOUT.
- */
- stat->s_stimeout++;
- return FLUSH_GIVEUP;
- } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
- ttm = get_cycles();
-
- /*
- * Our retries may be blocked by all destination
- * swack resources being consumed, and a timeout
- * pending. In that case hardware returns the
- * ERROR that looks like a destination timeout.
- * Without using the extended status we have to
- * deduce from the short time that this was a
- * strong nack.
- */
- if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
- bcp->conseccompletes = 0;
- stat->s_plugged++;
- /* FLUSH_RETRY_PLUGGED causes hang on boot */
- return FLUSH_GIVEUP;
- }
- stat->s_dtimeout++;
- bcp->conseccompletes = 0;
- /* FLUSH_RETRY_TIMEOUT causes hang on boot */
- return FLUSH_GIVEUP;
- } else {
- busy_reps++;
- if (busy_reps > 1000000) {
- /* not to hammer on the clock */
- busy_reps = 0;
- ttm = get_cycles();
- if ((ttm - bcp->send_message) >
- bcp->timeout_interval)
- return handle_uv2_busy(bcp);
- }
- /*
- * descriptor_stat is still BUSY
- */
- cpu_relax();
- }
- descriptor_stat = uv2_read_status(mmr_offset, right_shift,
- desc);
- }
- bcp->conseccompletes++;
- return FLUSH_COMPLETE;
-}
-
-/*
- * There are 2 status registers; each and array[32] of 2 bits. Set up for
- * which register to read and position in that register based on cpu in
- * current hub.
- */
-static int wait_completion(struct bau_desc *bau_desc,
- struct bau_control *bcp, long try)
-{
- int right_shift;
- unsigned long mmr_offset;
- int desc = bcp->uvhub_cpu;
-
- if (desc < UV_CPUS_PER_AS) {
- mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
- right_shift = desc * UV_ACT_STATUS_SIZE;
- } else {
- mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
- right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
- }
-
- if (bcp->uvhub_version == 1)
- return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
- bcp, try);
- else
- return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
- bcp, try);
-}
-
-static inline cycles_t sec_2_cycles(unsigned long sec)
-{
- unsigned long ns;
- cycles_t cyc;
-
- ns = sec * 1000000000;
- cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
- return cyc;
-}
-
-/*
- * Our retries are blocked by all destination sw ack resources being
- * in use, and a timeout is pending. In that case hardware immediately
- * returns the ERROR that looks like a destination timeout.
- */
-static void destination_plugged(struct bau_desc *bau_desc,
- struct bau_control *bcp,
- struct bau_control *hmaster, struct ptc_stats *stat)
-{
- udelay(bcp->plugged_delay);
- bcp->plugged_tries++;
-
- if (bcp->plugged_tries >= bcp->plugsb4reset) {
- bcp->plugged_tries = 0;
-
- quiesce_local_uvhub(hmaster);
-
- spin_lock(&hmaster->queue_lock);
- reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
-
- end_uvhub_quiesce(hmaster);
-
- bcp->ipi_attempts++;
- stat->s_resets_plug++;
- }
-}
-
-static void destination_timeout(struct bau_desc *bau_desc,
- struct bau_control *bcp, struct bau_control *hmaster,
- struct ptc_stats *stat)
-{
- hmaster->max_concurr = 1;
- bcp->timeout_tries++;
- if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
- bcp->timeout_tries = 0;
-
- quiesce_local_uvhub(hmaster);
-
- spin_lock(&hmaster->queue_lock);
- reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
-
- end_uvhub_quiesce(hmaster);
-
- bcp->ipi_attempts++;
- stat->s_resets_timeout++;
- }
-}
-
-/*
- * Stop all cpus on a uvhub from using the BAU for a period of time.
- * This is reversed by check_enable.
- */
-static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
-{
- int tcpu;
- struct bau_control *tbcp;
- struct bau_control *hmaster;
- cycles_t tm1;
-
- hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
- if (!bcp->baudisabled) {
- stat->s_bau_disabled++;
- tm1 = get_cycles();
- for_each_present_cpu(tcpu) {
- tbcp = &per_cpu(bau_control, tcpu);
- if (tbcp->uvhub_master == hmaster) {
- tbcp->baudisabled = 1;
- tbcp->set_bau_on_time =
- tm1 + bcp->disabled_period;
- }
- }
- }
- spin_unlock(&hmaster->disable_lock);
-}
-
-static void count_max_concurr(int stat, struct bau_control *bcp,
- struct bau_control *hmaster)
-{
- bcp->plugged_tries = 0;
- bcp->timeout_tries = 0;
- if (stat != FLUSH_COMPLETE)
- return;
- if (bcp->conseccompletes <= bcp->complete_threshold)
- return;
- if (hmaster->max_concurr >= hmaster->max_concurr_const)
- return;
- hmaster->max_concurr++;
-}
-
-static void record_send_stats(cycles_t time1, cycles_t time2,
- struct bau_control *bcp, struct ptc_stats *stat,
- int completion_status, int try)
-{
- cycles_t elapsed;
-
- if (time2 > time1) {
- elapsed = time2 - time1;
- stat->s_time += elapsed;
-
- if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
- bcp->period_requests++;
- bcp->period_time += elapsed;
- if ((elapsed > congested_cycles) &&
- (bcp->period_requests > bcp->cong_reps) &&
- ((bcp->period_time / bcp->period_requests) >
- congested_cycles)) {
- stat->s_congested++;
- disable_for_period(bcp, stat);
- }
- }
- } else
- stat->s_requestor--;
-
- if (completion_status == FLUSH_COMPLETE && try > 1)
- stat->s_retriesok++;
- else if (completion_status == FLUSH_GIVEUP) {
- stat->s_giveup++;
- if (get_cycles() > bcp->period_end)
- bcp->period_giveups = 0;
- bcp->period_giveups++;
- if (bcp->period_giveups == 1)
- bcp->period_end = get_cycles() + bcp->disabled_period;
- if (bcp->period_giveups > bcp->giveup_limit) {
- disable_for_period(bcp, stat);
- stat->s_giveuplimit++;
- }
- }
-}
-
-/*
- * Because of a uv1 hardware bug only a limited number of concurrent
- * requests can be made.
- */
-static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
-{
- spinlock_t *lock = &hmaster->uvhub_lock;
- atomic_t *v;
-
- v = &hmaster->active_descriptor_count;
- if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
- stat->s_throttles++;
- do {
- cpu_relax();
- } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
- }
-}
-
-/*
- * Handle the completion status of a message send.
- */
-static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
- struct bau_control *bcp, struct bau_control *hmaster,
- struct ptc_stats *stat)
-{
- if (completion_status == FLUSH_RETRY_PLUGGED)
- destination_plugged(bau_desc, bcp, hmaster, stat);
- else if (completion_status == FLUSH_RETRY_TIMEOUT)
- destination_timeout(bau_desc, bcp, hmaster, stat);
-}
-
-/*
- * Send a broadcast and wait for it to complete.
- *
- * The flush_mask contains the cpus the broadcast is to be sent to including
- * cpus that are on the local uvhub.
- *
- * Returns 0 if all flushing represented in the mask was done.
- * Returns 1 if it gives up entirely and the original cpu mask is to be
- * returned to the kernel.
- */
-int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
- struct bau_desc *bau_desc)
-{
- int seq_number = 0;
- int completion_stat = 0;
- int uv1 = 0;
- long try = 0;
- unsigned long index;
- cycles_t time1;
- cycles_t time2;
- struct ptc_stats *stat = bcp->statp;
- struct bau_control *hmaster = bcp->uvhub_master;
- struct uv1_bau_msg_header *uv1_hdr = NULL;
- struct uv2_bau_msg_header *uv2_hdr = NULL;
-
- if (bcp->uvhub_version == 1) {
- uv1 = 1;
- uv1_throttle(hmaster, stat);
- }
-
- while (hmaster->uvhub_quiesce)
- cpu_relax();
-
- time1 = get_cycles();
- if (uv1)
- uv1_hdr = &bau_desc->header.uv1_hdr;
- else
- uv2_hdr = &bau_desc->header.uv2_hdr;
-
- do {
- if (try == 0) {
- if (uv1)
- uv1_hdr->msg_type = MSG_REGULAR;
- else
- uv2_hdr->msg_type = MSG_REGULAR;
- seq_number = bcp->message_number++;
- } else {
- if (uv1)
- uv1_hdr->msg_type = MSG_RETRY;
- else
- uv2_hdr->msg_type = MSG_RETRY;
- stat->s_retry_messages++;
- }
-
- if (uv1)
- uv1_hdr->sequence = seq_number;
- else
- uv2_hdr->sequence = seq_number;
- index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
- bcp->send_message = get_cycles();
-
- write_mmr_activation(index);
-
- try++;
- completion_stat = wait_completion(bau_desc, bcp, try);
-
- handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
-
- if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
- bcp->ipi_attempts = 0;
- stat->s_overipilimit++;
- completion_stat = FLUSH_GIVEUP;
- break;
- }
- cpu_relax();
- } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
- (completion_stat == FLUSH_RETRY_TIMEOUT));
-
- time2 = get_cycles();
-
- count_max_concurr(completion_stat, bcp, hmaster);
-
- while (hmaster->uvhub_quiesce)
- cpu_relax();
-
- atomic_dec(&hmaster->active_descriptor_count);
-
- record_send_stats(time1, time2, bcp, stat, completion_stat, try);
-
- if (completion_stat == FLUSH_GIVEUP)
- /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
- return 1;
- return 0;
-}
-
-/*
- * The BAU is disabled for this uvhub. When the disabled time period has
- * expired re-enable it.
- * Return 0 if it is re-enabled for all cpus on this uvhub.
- */
-static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
-{
- int tcpu;
- struct bau_control *tbcp;
- struct bau_control *hmaster;
-
- hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
- if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
- stat->s_bau_reenabled++;
- for_each_present_cpu(tcpu) {
- tbcp = &per_cpu(bau_control, tcpu);
- if (tbcp->uvhub_master == hmaster) {
- tbcp->baudisabled = 0;
- tbcp->period_requests = 0;
- tbcp->period_time = 0;
- tbcp->period_giveups = 0;
- }
- }
- spin_unlock(&hmaster->disable_lock);
- return 0;
- }
- spin_unlock(&hmaster->disable_lock);
- return -1;
-}
-
-static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
- int remotes, struct bau_desc *bau_desc)
-{
- stat->s_requestor++;
- stat->s_ntargcpu += remotes + locals;
- stat->s_ntargremotes += remotes;
- stat->s_ntarglocals += locals;
-
- /* uvhub statistics */
- hubs = bau_uvhub_weight(&bau_desc->distribution);
- if (locals) {
- stat->s_ntarglocaluvhub++;
- stat->s_ntargremoteuvhub += (hubs - 1);
- } else
- stat->s_ntargremoteuvhub += hubs;
-
- stat->s_ntarguvhub += hubs;
-
- if (hubs >= 16)
- stat->s_ntarguvhub16++;
- else if (hubs >= 8)
- stat->s_ntarguvhub8++;
- else if (hubs >= 4)
- stat->s_ntarguvhub4++;
- else if (hubs >= 2)
- stat->s_ntarguvhub2++;
- else
- stat->s_ntarguvhub1++;
-}
-
-/*
- * Translate a cpu mask to the uvhub distribution mask in the BAU
- * activation descriptor.
- */
-static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
- struct bau_desc *bau_desc, int *localsp, int *remotesp)
-{
- int cpu;
- int pnode;
- int cnt = 0;
- struct hub_and_pnode *hpp;
-
- for_each_cpu(cpu, flush_mask) {
- /*
- * The distribution vector is a bit map of pnodes, relative
- * to the partition base pnode (and the partition base nasid
- * in the header).
- * Translate cpu to pnode and hub using a local memory array.
- */
- hpp = &bcp->socket_master->thp[cpu];
- pnode = hpp->pnode - bcp->partition_base_pnode;
- bau_uvhub_set(pnode, &bau_desc->distribution);
- cnt++;
- if (hpp->uvhub == bcp->uvhub)
- (*localsp)++;
- else
- (*remotesp)++;
- }
- if (!cnt)
- return 1;
- return 0;
-}
-
-/*
- * globally purge translation cache of a virtual address or all TLB's
- * @cpumask: mask of all cpu's in which the address is to be removed
- * @mm: mm_struct containing virtual address range
- * @start: start virtual address to be removed from TLB
- * @end: end virtual address to be remove from TLB
- * @cpu: the current cpu
- *
- * This is the entry point for initiating any UV global TLB shootdown.
- *
- * Purges the translation caches of all specified processors of the given
- * virtual address, or purges all TLB's on specified processors.
- *
- * The caller has derived the cpumask from the mm_struct. This function
- * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
- *
- * The cpumask is converted into a uvhubmask of the uvhubs containing
- * those cpus.
- *
- * Note that this function should be called with preemption disabled.
- *
- * Returns NULL if all remote flushing was done.
- * Returns pointer to cpumask if some remote flushing remains to be
- * done. The returned pointer is valid till preemption is re-enabled.
- */
-const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm, unsigned long start,
- unsigned long end, unsigned int cpu)
-{
- int locals = 0;
- int remotes = 0;
- int hubs = 0;
- struct bau_desc *bau_desc;
- struct cpumask *flush_mask;
- struct ptc_stats *stat;
- struct bau_control *bcp;
- unsigned long descriptor_status;
- unsigned long status;
-
- bcp = &per_cpu(bau_control, cpu);
- stat = bcp->statp;
- stat->s_enters++;
-
- if (bcp->nobau)
- return cpumask;
-
- if (bcp->busy) {
- descriptor_status =
- read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
- status = ((descriptor_status >> (bcp->uvhub_cpu *
- UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
- if (status == UV2H_DESC_BUSY)
- return cpumask;
- bcp->busy = 0;
- }
-
- /* bau was disabled due to slow response */
- if (bcp->baudisabled) {
- if (check_enable(bcp, stat)) {
- stat->s_ipifordisabled++;
- return cpumask;
- }
- }
-
- /*
- * Each sending cpu has a per-cpu mask which it fills from the caller's
- * cpu mask. All cpus are converted to uvhubs and copied to the
- * activation descriptor.
- */
- flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
- /* don't actually do a shootdown of the local cpu */
- cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
-
- if (cpu_isset(cpu, *cpumask))
- stat->s_ntargself++;
-
- bau_desc = bcp->descriptor_base;
- bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
- bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
- if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
- return NULL;
-
- record_send_statistics(stat, locals, hubs, remotes, bau_desc);
-
- if (!end || (end - start) <= PAGE_SIZE)
- bau_desc->payload.address = start;
- else
- bau_desc->payload.address = TLB_FLUSH_ALL;
- bau_desc->payload.sending_cpu = cpu;
- /*
- * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
- * or 1 if it gave up and the original cpumask should be returned.
- */
- if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
- return NULL;
- else
- return cpumask;
-}
-
-/*
- * Search the message queue for any 'other' unprocessed message with the
- * same software acknowledge resource bit vector as the 'msg' message.
- */
-struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
- struct bau_control *bcp)
-{
- struct bau_pq_entry *msg_next = msg + 1;
- unsigned char swack_vec = msg->swack_vec;
-
- if (msg_next > bcp->queue_last)
- msg_next = bcp->queue_first;
- while (msg_next != msg) {
- if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
- (msg_next->swack_vec == swack_vec))
- return msg_next;
- msg_next++;
- if (msg_next > bcp->queue_last)
- msg_next = bcp->queue_first;
- }
- return NULL;
-}
-
-/*
- * UV2 needs to work around a bug in which an arriving message has not
- * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
- * Such a message must be ignored.
- */
-void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
-{
- unsigned long mmr_image;
- unsigned char swack_vec;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_pq_entry *other_msg;
-
- mmr_image = read_mmr_sw_ack();
- swack_vec = msg->swack_vec;
-
- if ((swack_vec & mmr_image) == 0) {
- /*
- * This message was assigned a swack resource, but no
- * reserved acknowlegment is pending.
- * The bug has prevented this message from setting the MMR.
- */
- /*
- * Some message has set the MMR 'pending' bit; it might have
- * been another message. Look for that message.
- */
- other_msg = find_another_by_swack(msg, bcp);
- if (other_msg) {
- /*
- * There is another. Process this one but do not
- * ack it.
- */
- bau_process_message(mdp, bcp, 0);
- /*
- * Let the natural processing of that other message
- * acknowledge it. Don't get the processing of sw_ack's
- * out of order.
- */
- return;
- }
- }
-
- /*
- * Either the MMR shows this one pending a reply or there is no
- * other message using this sw_ack, so it is safe to acknowledge it.
- */
- bau_process_message(mdp, bcp, 1);
-
- return;
-}
-
-/*
- * The BAU message interrupt comes here. (registered by set_intr_gate)
- * See entry_64.S
- *
- * We received a broadcast assist message.
- *
- * Interrupts are disabled; this interrupt could represent
- * the receipt of several messages.
- *
- * All cores/threads on this hub get this interrupt.
- * The last one to see it does the software ack.
- * (the resource will not be freed until noninterruptable cpus see this
- * interrupt; hardware may timeout the s/w ack and reply ERROR)
- */
-void uv_bau_message_interrupt(struct pt_regs *regs)
-{
- int count = 0;
- cycles_t time_start;
- struct bau_pq_entry *msg;
- struct bau_control *bcp;
- struct ptc_stats *stat;
- struct msg_desc msgdesc;
-
- ack_APIC_irq();
- time_start = get_cycles();
-
- bcp = &per_cpu(bau_control, smp_processor_id());
- stat = bcp->statp;
-
- msgdesc.queue_first = bcp->queue_first;
- msgdesc.queue_last = bcp->queue_last;
-
- msg = bcp->bau_msg_head;
- while (msg->swack_vec) {
- count++;
-
- msgdesc.msg_slot = msg - msgdesc.queue_first;
- msgdesc.msg = msg;
- if (bcp->uvhub_version == 2)
- process_uv2_message(&msgdesc, bcp);
- else
- bau_process_message(&msgdesc, bcp, 1);
-
- msg++;
- if (msg > msgdesc.queue_last)
- msg = msgdesc.queue_first;
- bcp->bau_msg_head = msg;
- }
- stat->d_time += (get_cycles() - time_start);
- if (!count)
- stat->d_nomsg++;
- else if (count > 1)
- stat->d_multmsg++;
-}
-
-/*
- * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
- * shootdown message timeouts enabled. The timeout does not cause
- * an interrupt, but causes an error message to be returned to
- * the sender.
- */
-static void __init enable_timeouts(void)
-{
- int uvhub;
- int nuvhubs;
- int pnode;
- unsigned long mmr_image;
-
- nuvhubs = uv_num_possible_blades();
-
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- if (!uv_blade_nr_possible_cpus(uvhub))
- continue;
-
- pnode = uv_blade_to_pnode(uvhub);
- mmr_image = read_mmr_misc_control(pnode);
- /*
- * Set the timeout period and then lock it in, in three
- * steps; captures and locks in the period.
- *
- * To program the period, the SOFT_ACK_MODE must be off.
- */
- mmr_image &= ~(1L << SOFTACK_MSHIFT);
- write_mmr_misc_control(pnode, mmr_image);
- /*
- * Set the 4-bit period.
- */
- mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
- mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
- write_mmr_misc_control(pnode, mmr_image);
- /*
- * UV1:
- * Subsequent reversals of the timebase bit (3) cause an
- * immediate timeout of one or all INTD resources as
- * indicated in bits 2:0 (7 causes all of them to timeout).
- */
- mmr_image |= (1L << SOFTACK_MSHIFT);
- if (is_uv2_hub()) {
- /* hw bug workaround; do not use extended status */
- mmr_image &= ~(1L << UV2_EXT_SHFT);
- }
- write_mmr_misc_control(pnode, mmr_image);
- }
-}
-
-static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
-{
- if (*offset < num_possible_cpus())
- return offset;
- return NULL;
-}
-
-static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
-{
- (*offset)++;
- if (*offset < num_possible_cpus())
- return offset;
- return NULL;
-}
-
-static void ptc_seq_stop(struct seq_file *file, void *data)
-{
-}
-
-static inline unsigned long long usec_2_cycles(unsigned long microsec)
-{
- unsigned long ns;
- unsigned long long cyc;
-
- ns = microsec * 1000;
- cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
- return cyc;
-}
-
-/*
- * Display the statistics thru /proc/sgi_uv/ptc_statistics
- * 'data' points to the cpu number
- * Note: see the descriptions in stat_description[].
- */
-static int ptc_seq_show(struct seq_file *file, void *data)
-{
- struct ptc_stats *stat;
- struct bau_control *bcp;
- int cpu;
-
- cpu = *(loff_t *)data;
- if (!cpu) {
- seq_printf(file,
- "# cpu bauoff sent stime self locals remotes ncpus localhub ");
- seq_printf(file,
- "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
- seq_printf(file,
- "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
- seq_printf(file,
- "rok resetp resett giveup sto bz throt disable ");
- seq_printf(file,
- "enable wars warshw warwaits enters ipidis plugged ");
- seq_printf(file,
- "ipiover glim cong swack recv rtime all one mult ");
- seq_printf(file,
- "none retry canc nocan reset rcan\n");
- }
- if (cpu < num_possible_cpus() && cpu_online(cpu)) {
- bcp = &per_cpu(bau_control, cpu);
- stat = bcp->statp;
- /* source side statistics */
- seq_printf(file,
- "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
- cpu, bcp->nobau, stat->s_requestor,
- cycles_2_us(stat->s_time),
- stat->s_ntargself, stat->s_ntarglocals,
- stat->s_ntargremotes, stat->s_ntargcpu,
- stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
- stat->s_ntarguvhub, stat->s_ntarguvhub16);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
- stat->s_ntarguvhub8, stat->s_ntarguvhub4,
- stat->s_ntarguvhub2, stat->s_ntarguvhub1,
- stat->s_dtimeout, stat->s_strongnacks);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
- stat->s_retry_messages, stat->s_retriesok,
- stat->s_resets_plug, stat->s_resets_timeout,
- stat->s_giveup, stat->s_stimeout,
- stat->s_busy, stat->s_throttles);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
- stat->s_bau_disabled, stat->s_bau_reenabled,
- stat->s_uv2_wars, stat->s_uv2_wars_hw,
- stat->s_uv2_war_waits, stat->s_enters,
- stat->s_ipifordisabled, stat->s_plugged,
- stat->s_overipilimit, stat->s_giveuplimit,
- stat->s_congested);
-
- /* destination side statistics */
- seq_printf(file,
- "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
- read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
- stat->d_requestee, cycles_2_us(stat->d_time),
- stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
- stat->d_nomsg, stat->d_retries, stat->d_canceled,
- stat->d_nocanceled, stat->d_resets,
- stat->d_rcanceled);
- }
- return 0;
-}
-
-/*
- * Display the tunables thru debugfs
- */
-static ssize_t tunables_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- char *buf;
- int ret;
-
- buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
- "max_concur plugged_delay plugsb4reset timeoutsb4reset",
- "ipi_reset_limit complete_threshold congested_response_us",
- "congested_reps disabled_period giveup_limit",
- max_concurr, plugged_delay, plugsb4reset,
- timeoutsb4reset, ipi_reset_limit, complete_threshold,
- congested_respns_us, congested_reps, disabled_period,
- giveup_limit);
-
- if (!buf)
- return -ENOMEM;
-
- ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
- kfree(buf);
- return ret;
-}
-
-/*
- * handle a write to /proc/sgi_uv/ptc_statistics
- * -1: reset the statistics
- * 0: display meaning of the statistics
- */
-static ssize_t ptc_proc_write(struct file *file, const char __user *user,
- size_t count, loff_t *data)
-{
- int cpu;
- int i;
- int elements;
- long input_arg;
- char optstr[64];
- struct ptc_stats *stat;
-
- if (count == 0 || count > sizeof(optstr))
- return -EINVAL;
- if (copy_from_user(optstr, user, count))
- return -EFAULT;
- optstr[count - 1] = '\0';
-
- if (!strcmp(optstr, "on")) {
- set_bau_on();
- return count;
- } else if (!strcmp(optstr, "off")) {
- set_bau_off();
- return count;
- }
-
- if (strict_strtol(optstr, 10, &input_arg) < 0) {
- printk(KERN_DEBUG "%s is invalid\n", optstr);
- return -EINVAL;
- }
-
- if (input_arg == 0) {
- elements = ARRAY_SIZE(stat_description);
- printk(KERN_DEBUG "# cpu: cpu number\n");
- printk(KERN_DEBUG "Sender statistics:\n");
- for (i = 0; i < elements; i++)
- printk(KERN_DEBUG "%s\n", stat_description[i]);
- } else if (input_arg == -1) {
- for_each_present_cpu(cpu) {
- stat = &per_cpu(ptcstats, cpu);
- memset(stat, 0, sizeof(struct ptc_stats));
- }
- }
-
- return count;
-}
-
-static int local_atoi(const char *name)
-{
- int val = 0;
-
- for (;; name++) {
- switch (*name) {
- case '0' ... '9':
- val = 10*val+(*name-'0');
- break;
- default:
- return val;
- }
- }
-}
-
-/*
- * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
- * Zero values reset them to defaults.
- */
-static int parse_tunables_write(struct bau_control *bcp, char *instr,
- int count)
-{
- char *p;
- char *q;
- int cnt = 0;
- int val;
- int e = ARRAY_SIZE(tunables);
-
- p = instr + strspn(instr, WHITESPACE);
- q = p;
- for (; *p; p = q + strspn(q, WHITESPACE)) {
- q = p + strcspn(p, WHITESPACE);
- cnt++;
- if (q == p)
- break;
- }
- if (cnt != e) {
- printk(KERN_INFO "bau tunable error: should be %d values\n", e);
- return -EINVAL;
- }
-
- p = instr + strspn(instr, WHITESPACE);
- q = p;
- for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
- q = p + strcspn(p, WHITESPACE);
- val = local_atoi(p);
- switch (cnt) {
- case 0:
- if (val == 0) {
- max_concurr = MAX_BAU_CONCURRENT;
- max_concurr_const = MAX_BAU_CONCURRENT;
- continue;
- }
- if (val < 1 || val > bcp->cpus_in_uvhub) {
- printk(KERN_DEBUG
- "Error: BAU max concurrent %d is invalid\n",
- val);
- return -EINVAL;
- }
- max_concurr = val;
- max_concurr_const = val;
- continue;
- default:
- if (val == 0)
- *tunables[cnt].tunp = tunables[cnt].deflt;
- else
- *tunables[cnt].tunp = val;
- continue;
- }
- if (q == p)
- break;
- }
- return 0;
-}
-
-/*
- * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
- */
-static ssize_t tunables_write(struct file *file, const char __user *user,
- size_t count, loff_t *data)
-{
- int cpu;
- int ret;
- char instr[100];
- struct bau_control *bcp;
-
- if (count == 0 || count > sizeof(instr)-1)
- return -EINVAL;
- if (copy_from_user(instr, user, count))
- return -EFAULT;
-
- instr[count] = '\0';
-
- cpu = get_cpu();
- bcp = &per_cpu(bau_control, cpu);
- ret = parse_tunables_write(bcp, instr, count);
- put_cpu();
- if (ret)
- return ret;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->max_concurr = max_concurr;
- bcp->max_concurr_const = max_concurr;
- bcp->plugged_delay = plugged_delay;
- bcp->plugsb4reset = plugsb4reset;
- bcp->timeoutsb4reset = timeoutsb4reset;
- bcp->ipi_reset_limit = ipi_reset_limit;
- bcp->complete_threshold = complete_threshold;
- bcp->cong_response_us = congested_respns_us;
- bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
- }
- return count;
-}
-
-static const struct seq_operations uv_ptc_seq_ops = {
- .start = ptc_seq_start,
- .next = ptc_seq_next,
- .stop = ptc_seq_stop,
- .show = ptc_seq_show
-};
-
-static int ptc_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &uv_ptc_seq_ops);
-}
-
-static int tunables_open(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-static const struct file_operations proc_uv_ptc_operations = {
- .open = ptc_proc_open,
- .read = seq_read,
- .write = ptc_proc_write,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations tunables_fops = {
- .open = tunables_open,
- .read = tunables_read,
- .write = tunables_write,
- .llseek = default_llseek,
-};
-
-static int __init uv_ptc_init(void)
-{
- struct proc_dir_entry *proc_uv_ptc;
-
- if (!is_uv_system())
- return 0;
-
- proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
- &proc_uv_ptc_operations);
- if (!proc_uv_ptc) {
- printk(KERN_ERR "unable to create %s proc entry\n",
- UV_PTC_BASENAME);
- return -EINVAL;
- }
-
- tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
- if (!tunables_dir) {
- printk(KERN_ERR "unable to create debugfs directory %s\n",
- UV_BAU_TUNABLES_DIR);
- return -EINVAL;
- }
- tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
- tunables_dir, NULL, &tunables_fops);
- if (!tunables_file) {
- printk(KERN_ERR "unable to create debugfs file %s\n",
- UV_BAU_TUNABLES_FILE);
- return -EINVAL;
- }
- return 0;
-}
-
-/*
- * Initialize the sending side's sending buffers.
- */
-static void activation_descriptor_init(int node, int pnode, int base_pnode)
-{
- int i;
- int cpu;
- int uv1 = 0;
- unsigned long gpa;
- unsigned long m;
- unsigned long n;
- size_t dsize;
- struct bau_desc *bau_desc;
- struct bau_desc *bd2;
- struct uv1_bau_msg_header *uv1_hdr;
- struct uv2_bau_msg_header *uv2_hdr;
- struct bau_control *bcp;
-
- /*
- * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
- * per cpu; and one per cpu on the uvhub (ADP_SZ)
- */
- dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
- bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
- BUG_ON(!bau_desc);
-
- gpa = uv_gpa(bau_desc);
- n = uv_gpa_to_gnode(gpa);
- m = uv_gpa_to_offset(gpa);
- if (is_uv1_hub())
- uv1 = 1;
-
- /* the 14-bit pnode */
- write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
- /*
- * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
- * cpu even though we only use the first one; one descriptor can
- * describe a broadcast to 256 uv hubs.
- */
- for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
- memset(bd2, 0, sizeof(struct bau_desc));
- if (uv1) {
- uv1_hdr = &bd2->header.uv1_hdr;
- uv1_hdr->swack_flag = 1;
- /*
- * The base_dest_nasid set in the message header
- * is the nasid of the first uvhub in the partition.
- * The bit map will indicate destination pnode numbers
- * relative to that base. They may not be consecutive
- * if nasid striding is being used.
- */
- uv1_hdr->base_dest_nasid =
- UV_PNODE_TO_NASID(base_pnode);
- uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv1_hdr->command = UV_NET_ENDPOINT_INTD;
- uv1_hdr->int_both = 1;
- /*
- * all others need to be set to zero:
- * fairness chaining multilevel count replied_to
- */
- } else {
- /*
- * BIOS uses legacy mode, but UV2 hardware always
- * uses native mode for selective broadcasts.
- */
- uv2_hdr = &bd2->header.uv2_hdr;
- uv2_hdr->swack_flag = 1;
- uv2_hdr->base_dest_nasid =
- UV_PNODE_TO_NASID(base_pnode);
- uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv2_hdr->command = UV_NET_ENDPOINT_INTD;
- }
- }
- for_each_present_cpu(cpu) {
- if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
- continue;
- bcp = &per_cpu(bau_control, cpu);
- bcp->descriptor_base = bau_desc;
- }
-}
-
-/*
- * initialize the destination side's receiving buffers
- * entered for each uvhub in the partition
- * - node is first node (kernel memory notion) on the uvhub
- * - pnode is the uvhub's physical identifier
- */
-static void pq_init(int node, int pnode)
-{
- int cpu;
- size_t plsize;
- char *cp;
- void *vp;
- unsigned long pn;
- unsigned long first;
- unsigned long pn_first;
- unsigned long last;
- struct bau_pq_entry *pqp;
- struct bau_control *bcp;
-
- plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
- vp = kmalloc_node(plsize, GFP_KERNEL, node);
- pqp = (struct bau_pq_entry *)vp;
- BUG_ON(!pqp);
-
- cp = (char *)pqp + 31;
- pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
-
- for_each_present_cpu(cpu) {
- if (pnode != uv_cpu_to_pnode(cpu))
- continue;
- /* for every cpu on this pnode: */
- bcp = &per_cpu(bau_control, cpu);
- bcp->queue_first = pqp;
- bcp->bau_msg_head = pqp;
- bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
- }
- /*
- * need the gnode of where the memory was really allocated
- */
- pn = uv_gpa_to_gnode(uv_gpa(pqp));
- first = uv_physnodeaddr(pqp);
- pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
- last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
- write_mmr_payload_first(pnode, pn_first);
- write_mmr_payload_tail(pnode, first);
- write_mmr_payload_last(pnode, last);
- write_gmmr_sw_ack(pnode, 0xffffUL);
-
- /* in effect, all msg_type's are set to MSG_NOOP */
- memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
-}
-
-/*
- * Initialization of each UV hub's structures
- */
-static void __init init_uvhub(int uvhub, int vector, int base_pnode)
-{
- int node;
- int pnode;
- unsigned long apicid;
-
- node = uvhub_to_first_node(uvhub);
- pnode = uv_blade_to_pnode(uvhub);
-
- activation_descriptor_init(node, pnode, base_pnode);
-
- pq_init(node, pnode);
- /*
- * The below initialization can't be in firmware because the
- * messaging IRQ will be determined by the OS.
- */
- apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
- write_mmr_data_config(pnode, ((apicid << 32) | vector));
-}
-
-/*
- * We will set BAU_MISC_CONTROL with a timeout period.
- * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
- * So the destination timeout period has to be calculated from them.
- */
-static int calculate_destination_timeout(void)
-{
- unsigned long mmr_image;
- int mult1;
- int mult2;
- int index;
- int base;
- int ret;
- unsigned long ts_ns;
-
- if (is_uv1_hub()) {
- mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
- mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
- index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
- mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
- mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
- ts_ns = timeout_base_ns[index];
- ts_ns *= (mult1 * mult2);
- ret = ts_ns / 1000;
- } else {
- /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
- mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
- mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
- if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
- base = 80;
- else
- base = 10;
- mult1 = mmr_image & UV2_ACK_MASK;
- ret = mult1 * base;
- }
- return ret;
-}
-
-static void __init init_per_cpu_tunables(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->baudisabled = 0;
- if (nobau)
- bcp->nobau = 1;
- bcp->statp = &per_cpu(ptcstats, cpu);
- /* time interval to catch a hardware stay-busy bug */
- bcp->timeout_interval = usec_2_cycles(2*timeout_us);
- bcp->max_concurr = max_concurr;
- bcp->max_concurr_const = max_concurr;
- bcp->plugged_delay = plugged_delay;
- bcp->plugsb4reset = plugsb4reset;
- bcp->timeoutsb4reset = timeoutsb4reset;
- bcp->ipi_reset_limit = ipi_reset_limit;
- bcp->complete_threshold = complete_threshold;
- bcp->cong_response_us = congested_respns_us;
- bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
- spin_lock_init(&bcp->queue_lock);
- spin_lock_init(&bcp->uvhub_lock);
- spin_lock_init(&bcp->disable_lock);
- }
-}
-
-/*
- * Scan all cpus to collect blade and socket summaries.
- */
-static int __init get_cpu_topology(int base_pnode,
- struct uvhub_desc *uvhub_descs,
- unsigned char *uvhub_mask)
-{
- int cpu;
- int pnode;
- int uvhub;
- int socket;
- struct bau_control *bcp;
- struct uvhub_desc *bdp;
- struct socket_desc *sdp;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
-
- memset(bcp, 0, sizeof(struct bau_control));
-
- pnode = uv_cpu_hub_info(cpu)->pnode;
- if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
- printk(KERN_EMERG
- "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
- cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
- return 1;
- }
-
- bcp->osnode = cpu_to_node(cpu);
- bcp->partition_base_pnode = base_pnode;
-
- uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
- *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
- bdp = &uvhub_descs[uvhub];
-
- bdp->num_cpus++;
- bdp->uvhub = uvhub;
- bdp->pnode = pnode;
-
- /* kludge: 'assuming' one node per socket, and assuming that
- disabling a socket just leaves a gap in node numbers */
- socket = bcp->osnode & 1;
- bdp->socket_mask |= (1 << socket);
- sdp = &bdp->socket[socket];
- sdp->cpu_number[sdp->num_cpus] = cpu;
- sdp->num_cpus++;
- if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
- printk(KERN_EMERG "%d cpus per socket invalid\n",
- sdp->num_cpus);
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * Each socket is to get a local array of pnodes/hubs.
- */
-static void make_per_cpu_thp(struct bau_control *smaster)
-{
- int cpu;
- size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
-
- smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
- memset(smaster->thp, 0, hpsz);
- for_each_present_cpu(cpu) {
- smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
- smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
- }
-}
-
-/*
- * Each uvhub is to get a local cpumask.
- */
-static void make_per_hub_cpumask(struct bau_control *hmaster)
-{
- int sz = sizeof(cpumask_t);
-
- hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode);
-}
-
-/*
- * Initialize all the per_cpu information for the cpu's on a given socket,
- * given what has been gathered into the socket_desc struct.
- * And reports the chosen hub and socket masters back to the caller.
- */
-static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
- struct bau_control **smasterp,
- struct bau_control **hmasterp)
-{
- int i;
- int cpu;
- struct bau_control *bcp;
-
- for (i = 0; i < sdp->num_cpus; i++) {
- cpu = sdp->cpu_number[i];
- bcp = &per_cpu(bau_control, cpu);
- bcp->cpu = cpu;
- if (i == 0) {
- *smasterp = bcp;
- if (!(*hmasterp))
- *hmasterp = bcp;
- }
- bcp->cpus_in_uvhub = bdp->num_cpus;
- bcp->cpus_in_socket = sdp->num_cpus;
- bcp->socket_master = *smasterp;
- bcp->uvhub = bdp->uvhub;
- if (is_uv1_hub())
- bcp->uvhub_version = 1;
- else if (is_uv2_hub())
- bcp->uvhub_version = 2;
- else {
- printk(KERN_EMERG "uvhub version not 1 or 2\n");
- return 1;
- }
- bcp->uvhub_master = *hmasterp;
- bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
- if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
- printk(KERN_EMERG "%d cpus per uvhub invalid\n",
- bcp->uvhub_cpu);
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * Summarize the blade and socket topology into the per_cpu structures.
- */
-static int __init summarize_uvhub_sockets(int nuvhubs,
- struct uvhub_desc *uvhub_descs,
- unsigned char *uvhub_mask)
-{
- int socket;
- int uvhub;
- unsigned short socket_mask;
-
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- struct uvhub_desc *bdp;
- struct bau_control *smaster = NULL;
- struct bau_control *hmaster = NULL;
-
- if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
- continue;
-
- bdp = &uvhub_descs[uvhub];
- socket_mask = bdp->socket_mask;
- socket = 0;
- while (socket_mask) {
- struct socket_desc *sdp;
- if ((socket_mask & 1)) {
- sdp = &bdp->socket[socket];
- if (scan_sock(sdp, bdp, &smaster, &hmaster))
- return 1;
- make_per_cpu_thp(smaster);
- }
- socket++;
- socket_mask = (socket_mask >> 1);
- }
- make_per_hub_cpumask(hmaster);
- }
- return 0;
-}
-
-/*
- * initialize the bau_control structure for each cpu
- */
-static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
-{
- unsigned char *uvhub_mask;
- void *vp;
- struct uvhub_desc *uvhub_descs;
-
- timeout_us = calculate_destination_timeout();
-
- vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
- uvhub_descs = (struct uvhub_desc *)vp;
- memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
- uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
-
- if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
- goto fail;
-
- if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
- goto fail;
-
- kfree(uvhub_descs);
- kfree(uvhub_mask);
- init_per_cpu_tunables();
- return 0;
-
-fail:
- kfree(uvhub_descs);
- kfree(uvhub_mask);
- return 1;
-}
-
-/*
- * Initialization of BAU-related structures
- */
-static int __init uv_bau_init(void)
-{
- int uvhub;
- int pnode;
- int nuvhubs;
- int cur_cpu;
- int cpus;
- int vector;
- cpumask_var_t *mask;
-
- if (!is_uv_system())
- return 0;
-
- for_each_possible_cpu(cur_cpu) {
- mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
- zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
- }
-
- nuvhubs = uv_num_possible_blades();
- congested_cycles = usec_2_cycles(congested_respns_us);
-
- uv_base_pnode = 0x7fffffff;
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- cpus = uv_blade_nr_possible_cpus(uvhub);
- if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
- uv_base_pnode = uv_blade_to_pnode(uvhub);
- }
-
- enable_timeouts();
-
- if (init_per_cpu(nuvhubs, uv_base_pnode)) {
- set_bau_off();
- nobau_perm = 1;
- return 0;
- }
-
- vector = UV_BAU_MESSAGE;
- for_each_possible_blade(uvhub)
- if (uv_blade_nr_possible_cpus(uvhub))
- init_uvhub(uvhub, vector, uv_base_pnode);
-
- alloc_intr_gate(vector, uv_bau_message_intr1);
-
- for_each_possible_blade(uvhub) {
- if (uv_blade_nr_possible_cpus(uvhub)) {
- unsigned long val;
- unsigned long mmr;
- pnode = uv_blade_to_pnode(uvhub);
- /* INIT the bau */
- val = 1L << 63;
- write_gmmr_activation(pnode, val);
- mmr = 1; /* should be 1 to broadcast to both sockets */
- if (!is_uv1_hub())
- write_mmr_data_broadcast(pnode, mmr);
- }
- }
-
- return 0;
-}
-core_initcall(uv_bau_init);
-fs_initcall(uv_ptc_init);
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index acf7752da952..4f200ac96ce0 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -8,226 +8,169 @@
* Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/irq.h>
+#include <asm/irqdomain.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
#include <asm/uv/uv_hub.h>
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
-struct uv_irq_2_mmr_pnode{
- struct rb_node list;
+struct uv_irq_2_mmr_pnode {
unsigned long offset;
int pnode;
- int irq;
};
-static DEFINE_SPINLOCK(uv_irq_lock);
-static struct rb_root uv_irq_root;
+static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
-static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ entry->dest_mode = apic->dest_mode_logical;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = cfg->dest_apicid;
+
+ uv_write_global_mmr64(info->pnode, info->offset, mmr_value);
+}
static void uv_noop(struct irq_data *data) { }
-static void uv_ack_apic(struct irq_data *data)
+static int
+uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- ack_APIC_irq();
+ struct irq_data *parent = data->parent_data;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ int ret;
+
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret >= 0) {
+ uv_program_mmr(cfg, data->chip_data);
+ vector_schedule_cleanup(cfg);
+ }
+
+ return ret;
}
static struct irq_chip uv_irq_chip = {
.name = "UV-CORE",
.irq_mask = uv_noop,
.irq_unmask = uv_noop,
- .irq_eoi = uv_ack_apic,
+ .irq_eoi = apic_ack_irq,
.irq_set_affinity = uv_set_irq_affinity,
};
-/*
- * Add offset and pnode information of the hub sourcing interrupts to the
- * rb tree for a specific irq.
- */
-static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
- struct rb_node **link = &uv_irq_root.rb_node;
- struct rb_node *parent = NULL;
- struct uv_irq_2_mmr_pnode *n;
- struct uv_irq_2_mmr_pnode *e;
- unsigned long irqflags;
-
- n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
- uv_blade_to_memory_nid(blade));
- if (!n)
+ struct uv_irq_2_mmr_pnode *chip_data;
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+ int ret;
+
+ if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV)
+ return -EINVAL;
+
+ chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL,
+ irq_data_get_node(irq_data));
+ if (!chip_data)
return -ENOMEM;
- n->irq = irq;
- n->offset = offset;
- n->pnode = uv_blade_to_pnode(blade);
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- /* Find the right place in the rbtree: */
- while (*link) {
- parent = *link;
- e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
-
- if (unlikely(irq == e->irq)) {
- /* irq entry exists */
- e->pnode = uv_blade_to_pnode(blade);
- e->offset = offset;
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- kfree(n);
- return 0;
- }
-
- if (irq < e->irq)
- link = &(*link)->rb_left;
- else
- link = &(*link)->rb_right;
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret >= 0) {
+ if (info->uv.limit == UV_AFFINITY_CPU)
+ irq_set_status_flags(virq, IRQ_NO_BALANCING);
+
+ chip_data->pnode = uv_blade_to_pnode(info->uv.blade);
+ chip_data->offset = info->uv.offset;
+ irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data,
+ handle_percpu_irq, NULL, info->uv.name);
+ } else {
+ kfree(chip_data);
}
- /* Insert the node into the rbtree. */
- rb_link_node(&n->list, parent, link);
- rb_insert_color(&n->list, &uv_irq_root);
-
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return 0;
+ return ret;
}
-/* Retrieve offset and pnode information from the rb tree for a specific irq */
-int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- struct uv_irq_2_mmr_pnode *e;
- struct rb_node *n;
- unsigned long irqflags;
-
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- n = uv_irq_root.rb_node;
- while (n) {
- e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
-
- if (e->irq == irq) {
- *offset = e->offset;
- *pnode = e->pnode;
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return 0;
- }
-
- if (irq < e->irq)
- n = n->rb_left;
- else
- n = n->rb_right;
- }
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- return -1;
+ struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+
+ BUG_ON(nr_irqs != 1);
+ kfree(irq_data->chip_data);
+ irq_clear_status_flags(virq, IRQ_NO_BALANCING);
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
-static int
-arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset, int limit)
+static int uv_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool reserve)
{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg = irq_get_chip_data(irq);
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode, err;
- unsigned int dest;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
- sizeof(unsigned long));
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
- if (err != 0)
- return err;
-
- if (limit == UV_AFFINITY_CPU)
- irq_set_status_flags(irq, IRQ_NO_BALANCING);
- else
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-
- irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
+ uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
+ return 0;
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
-static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+static void uv_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
- sizeof(unsigned long));
-
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
}
-static int
-uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest;
- unsigned long mmr_value, mmr_offset;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;
-
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- /* Get previously stored MMR and pnode of hub sourcing interrupts */
- if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
- return -1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+static const struct irq_domain_ops uv_domain_ops = {
+ .alloc = uv_domain_alloc,
+ .free = uv_domain_free,
+ .activate = uv_domain_activate,
+ .deactivate = uv_domain_deactivate,
+};
- return IRQ_SET_MASK_OK_NOCOPY;
+static struct irq_domain *uv_get_irq_domain(void)
+{
+ static struct irq_domain *uv_domain;
+ static DEFINE_MUTEX(uv_lock);
+ struct fwnode_handle *fn;
+
+ mutex_lock(&uv_lock);
+ if (uv_domain)
+ goto out;
+
+ fn = irq_domain_alloc_named_fwnode("UV-CORE");
+ if (!fn)
+ goto out;
+
+ uv_domain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, fn,
+ &uv_domain_ops, NULL);
+ if (!uv_domain)
+ irq_domain_free_fwnode(fn);
+out:
+ mutex_unlock(&uv_lock);
+
+ return uv_domain;
}
/*
@@ -238,21 +181,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
- int irq, ret;
+ struct irq_alloc_info info;
+ struct irq_domain *domain = uv_get_irq_domain();
- irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
-
- if (irq <= 0)
- return -EBUSY;
+ if (!domain)
+ return -ENOMEM;
- ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
- limit);
- if (ret == irq)
- uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
- else
- destroy_irq(irq);
+ init_irq_alloc_info(&info, cpumask_of(cpu));
+ info.type = X86_IRQ_ALLOC_TYPE_UV;
+ info.uv.limit = limit;
+ info.uv.blade = mmr_blade;
+ info.uv.offset = mmr_offset;
+ info.uv.name = irq_name;
- return ret;
+ return irq_domain_alloc_irqs(domain, 1,
+ uv_blade_to_memory_nid(mmr_blade), &info);
}
EXPORT_SYMBOL_GPL(uv_setup_irq);
@@ -265,26 +208,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*/
void uv_teardown_irq(unsigned int irq)
{
- struct uv_irq_2_mmr_pnode *e;
- struct rb_node *n;
- unsigned long irqflags;
-
- spin_lock_irqsave(&uv_irq_lock, irqflags);
- n = uv_irq_root.rb_node;
- while (n) {
- e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
- if (e->irq == irq) {
- arch_disable_uv_irq(e->pnode, e->offset);
- rb_erase(n, &uv_irq_root);
- kfree(e);
- break;
- }
- if (irq < e->irq)
- n = n->rb_left;
- else
- n = n->rb_right;
- }
- spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- destroy_irq(irq);
+ irq_domain_free_irqs(irq, 1);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
new file mode 100644
index 000000000000..5c50e550ab63
--- /dev/null
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -0,0 +1,1102 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SGI NMI support routines
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Mike Travis
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/kdb.h>
+#include <linux/kexec.h>
+#include <linux/kgdb.h>
+#include <linux/moduleparam.h>
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/clocksource.h>
+
+#include <asm/apic.h>
+#include <asm/current.h>
+#include <asm/kdebug.h>
+#include <asm/local64.h>
+#include <asm/nmi.h>
+#include <asm/reboot.h>
+#include <asm/traps.h>
+#include <asm/uv/uv.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+/*
+ * UV handler for NMI
+ *
+ * Handle system-wide NMI events generated by the global 'power nmi' command.
+ *
+ * Basic operation is to field the NMI interrupt on each CPU and wait
+ * until all CPU's have arrived into the nmi handler. If some CPU's do not
+ * make it into the handler, try and force them in with the IPI(NMI) signal.
+ *
+ * We also have to lessen UV Hub MMR accesses as much as possible as this
+ * disrupts the UV Hub's primary mission of directing NumaLink traffic and
+ * can cause system problems to occur.
+ *
+ * To do this we register our primary NMI notifier on the NMI_UNKNOWN
+ * chain. This reduces the number of false NMI calls when the perf
+ * tools are running which generate an enormous number of NMIs per
+ * second (~4M/s for 1024 CPU threads). Our secondary NMI handler is
+ * very short as it only checks that if it has been "pinged" with the
+ * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
+ *
+ */
+
+static struct uv_hub_nmi_s **uv_hub_nmi_list;
+
+DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
+
+/* Newer SMM NMI handler, not present in all systems */
+static unsigned long uvh_nmi_mmrx; /* UVH_EVENT_OCCURRED0/1 */
+static unsigned long uvh_nmi_mmrx_clear; /* UVH_EVENT_OCCURRED0/1_ALIAS */
+static int uvh_nmi_mmrx_shift; /* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */
+static char *uvh_nmi_mmrx_type; /* "EXTIO_INT0" */
+
+/* Non-zero indicates newer SMM NMI handler present */
+static unsigned long uvh_nmi_mmrx_supported; /* UVH_EXTIO_INT0_BROADCAST */
+
+/* Indicates to BIOS that we want to use the newer SMM NMI handler */
+static unsigned long uvh_nmi_mmrx_req; /* UVH_BIOS_KERNEL_MMR_ALIAS_2 */
+static int uvh_nmi_mmrx_req_shift; /* 62 */
+
+/* UV hubless values */
+#define NMI_CONTROL_PORT 0x70
+#define NMI_DUMMY_PORT 0x71
+#define PAD_OWN_GPP_D_0 0x2c
+#define GPI_NMI_STS_GPP_D_0 0x164
+#define GPI_NMI_ENA_GPP_D_0 0x174
+#define STS_GPP_D_0_MASK 0x1
+#define PAD_CFG_DW0_GPP_D_0 0x4c0
+#define GPIROUTNMI (1ul << 17)
+#define PCH_PCR_GPIO_1_BASE 0xfdae0000ul
+#define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset))
+
+static u64 *pch_base;
+static unsigned long nmi_mmr;
+static unsigned long nmi_mmr_clear;
+static unsigned long nmi_mmr_pending;
+
+static atomic_t uv_in_nmi;
+static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
+static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
+static atomic_t uv_nmi_slave_continue;
+static cpumask_var_t uv_nmi_cpu_mask;
+
+static atomic_t uv_nmi_kexec_failed;
+
+/* Values for uv_nmi_slave_continue */
+#define SLAVE_CLEAR 0
+#define SLAVE_CONTINUE 1
+#define SLAVE_EXIT 2
+
+/*
+ * Default is all stack dumps go to the console and buffer.
+ * Lower level to send to log buffer only.
+ */
+static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
+module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
+
+/*
+ * The following values show statistics on how perf events are affecting
+ * this system.
+ */
+static int param_get_local64(char *buffer, const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
+}
+
+static int param_set_local64(const char *val, const struct kernel_param *kp)
+{
+ /* Clear on any write */
+ local64_set((local64_t *)kp->arg, 0);
+ return 0;
+}
+
+static const struct kernel_param_ops param_ops_local64 = {
+ .get = param_get_local64,
+ .set = param_set_local64,
+};
+#define param_check_local64(name, p) __param_check(name, p, local64_t)
+
+static local64_t uv_nmi_count;
+module_param_named(nmi_count, uv_nmi_count, local64, 0644);
+
+static local64_t uv_nmi_misses;
+module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
+
+static local64_t uv_nmi_ping_count;
+module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
+
+static local64_t uv_nmi_ping_misses;
+module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
+
+/*
+ * Following values allow tuning for large systems under heavy loading
+ */
+static int uv_nmi_initial_delay = 100;
+module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
+
+static int uv_nmi_slave_delay = 100;
+module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
+
+static int uv_nmi_loop_delay = 100;
+module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
+
+static int uv_nmi_trigger_delay = 10000;
+module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
+
+static int uv_nmi_wait_count = 100;
+module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
+
+static int uv_nmi_retry_count = 500;
+module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
+
+static bool uv_pch_intr_enable = true;
+static bool uv_pch_intr_now_enabled;
+module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644);
+
+static bool uv_pch_init_enable = true;
+module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644);
+
+static int uv_nmi_debug;
+module_param_named(debug, uv_nmi_debug, int, 0644);
+
+#define nmi_debug(fmt, ...) \
+ do { \
+ if (uv_nmi_debug) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+/* Valid NMI Actions */
+enum action_t {
+ nmi_act_kdump,
+ nmi_act_dump,
+ nmi_act_ips,
+ nmi_act_kdb,
+ nmi_act_kgdb,
+ nmi_act_health,
+ nmi_act_max
+};
+
+static const char * const actions[nmi_act_max] = {
+ [nmi_act_kdump] = "kdump",
+ [nmi_act_dump] = "dump",
+ [nmi_act_ips] = "ips",
+ [nmi_act_kdb] = "kdb",
+ [nmi_act_kgdb] = "kgdb",
+ [nmi_act_health] = "health",
+};
+
+static const char * const actions_desc[nmi_act_max] = {
+ [nmi_act_kdump] = "do kernel crash dump",
+ [nmi_act_dump] = "dump process stack for each cpu",
+ [nmi_act_ips] = "dump Inst Ptr info for each cpu",
+ [nmi_act_kdb] = "enter KDB (needs kgdboc= assignment)",
+ [nmi_act_kgdb] = "enter KGDB (needs gdb target remote)",
+ [nmi_act_health] = "check if CPUs respond to NMI",
+};
+
+static enum action_t uv_nmi_action = nmi_act_dump;
+
+static int param_get_action(char *buffer, const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%s\n", actions[uv_nmi_action]);
+}
+
+static int param_set_action(const char *val, const struct kernel_param *kp)
+{
+ int i, n = ARRAY_SIZE(actions);
+
+ i = sysfs_match_string(actions, val);
+ if (i >= 0) {
+ uv_nmi_action = i;
+ pr_info("UV: New NMI action:%s\n", actions[i]);
+ return 0;
+ }
+
+ pr_err("UV: Invalid NMI action. Valid actions are:\n");
+ for (i = 0; i < n; i++)
+ pr_err("UV: %-8s - %s\n", actions[i], actions_desc[i]);
+
+ return -EINVAL;
+}
+
+static const struct kernel_param_ops param_ops_action = {
+ .get = param_get_action,
+ .set = param_set_action,
+};
+#define param_check_action(name, p) __param_check(name, p, enum action_t)
+
+module_param_named(action, uv_nmi_action, action, 0644);
+
+/* Setup which NMI support is present in system */
+static void uv_nmi_setup_mmrs(void)
+{
+ bool new_nmi_method_only = false;
+
+ /* First determine arch specific MMRs to handshake with BIOS */
+ if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { /* UV2,3,4 setup */
+ uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
+ uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
+ uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
+ uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0";
+
+ uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+ uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+ uvh_nmi_mmrx_req_shift = 62;
+
+ } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */
+ uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
+ uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
+ uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
+ uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
+
+ new_nmi_method_only = true; /* Newer nmi always valid on UV5+ */
+ uvh_nmi_mmrx_req = 0; /* no request bit to clear */
+
+ } else {
+ pr_err("UV:%s:NMI support not available on this system\n", __func__);
+ return;
+ }
+
+ /* Then find out if new NMI is supported */
+ if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) {
+ if (uvh_nmi_mmrx_req)
+ uv_write_local_mmr(uvh_nmi_mmrx_req,
+ 1UL << uvh_nmi_mmrx_req_shift);
+ nmi_mmr = uvh_nmi_mmrx;
+ nmi_mmr_clear = uvh_nmi_mmrx_clear;
+ nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
+ pr_info("UV: SMI NMI support: %s\n", uvh_nmi_mmrx_type);
+ } else {
+ nmi_mmr = UVH_NMI_MMR;
+ nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
+ nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
+ pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
+ }
+}
+
+/* Read NMI MMR and check if NMI flag was set by BMC. */
+static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
+{
+ hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
+ atomic_inc(&hub_nmi->read_mmr_count);
+ return !!(hub_nmi->nmi_value & nmi_mmr_pending);
+}
+
+static inline void uv_local_mmr_clear_nmi(void)
+{
+ uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
+}
+
+/*
+ * UV hubless NMI handler functions
+ */
+static inline void uv_reassert_nmi(void)
+{
+ /* (from arch/x86/include/asm/mach_traps.h) */
+ outb(0x8f, NMI_CONTROL_PORT);
+ inb(NMI_DUMMY_PORT); /* dummy read */
+ outb(0x0f, NMI_CONTROL_PORT);
+ inb(NMI_DUMMY_PORT); /* dummy read */
+}
+
+static void uv_init_hubless_pch_io(int offset, int mask, int data)
+{
+ int *addr = PCH_PCR_GPIO_ADDRESS(offset);
+ int readd = readl(addr);
+
+ if (mask) { /* OR in new data */
+ int writed = (readd & ~mask) | data;
+
+ nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n",
+ addr, readd, ~mask, data, writed);
+ writel(writed, addr);
+ } else if (readd & data) { /* clear status bit */
+ nmi_debug("UV:PCH: %p = %x\n", addr, data);
+ writel(data, addr);
+ }
+
+ (void)readl(addr); /* flush write data */
+}
+
+static void uv_nmi_setup_hubless_intr(void)
+{
+ uv_pch_intr_now_enabled = uv_pch_intr_enable;
+
+ uv_init_hubless_pch_io(
+ PAD_CFG_DW0_GPP_D_0, GPIROUTNMI,
+ uv_pch_intr_now_enabled ? GPIROUTNMI : 0);
+
+ nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n",
+ uv_pch_intr_now_enabled ? "enabled" : "disabled");
+}
+
+static struct init_nmi {
+ unsigned int offset;
+ unsigned int mask;
+ unsigned int data;
+} init_nmi[] = {
+ { /* HOSTSW_OWN_GPP_D_0 */
+ .offset = 0x84,
+ .mask = 0x1,
+ .data = 0x0, /* ACPI Mode */
+ },
+
+/* Clear status: */
+ { /* GPI_INT_STS_GPP_D_0 */
+ .offset = 0x104,
+ .mask = 0x0,
+ .data = 0x1, /* Clear Status */
+ },
+ { /* GPI_GPE_STS_GPP_D_0 */
+ .offset = 0x124,
+ .mask = 0x0,
+ .data = 0x1, /* Clear Status */
+ },
+ { /* GPI_SMI_STS_GPP_D_0 */
+ .offset = 0x144,
+ .mask = 0x0,
+ .data = 0x1, /* Clear Status */
+ },
+ { /* GPI_NMI_STS_GPP_D_0 */
+ .offset = 0x164,
+ .mask = 0x0,
+ .data = 0x1, /* Clear Status */
+ },
+
+/* Disable interrupts: */
+ { /* GPI_INT_EN_GPP_D_0 */
+ .offset = 0x114,
+ .mask = 0x1,
+ .data = 0x0, /* Disable interrupt generation */
+ },
+ { /* GPI_GPE_EN_GPP_D_0 */
+ .offset = 0x134,
+ .mask = 0x1,
+ .data = 0x0, /* Disable interrupt generation */
+ },
+ { /* GPI_SMI_EN_GPP_D_0 */
+ .offset = 0x154,
+ .mask = 0x1,
+ .data = 0x0, /* Disable interrupt generation */
+ },
+ { /* GPI_NMI_EN_GPP_D_0 */
+ .offset = 0x174,
+ .mask = 0x1,
+ .data = 0x0, /* Disable interrupt generation */
+ },
+
+/* Setup GPP_D_0 Pad Config: */
+ { /* PAD_CFG_DW0_GPP_D_0 */
+ .offset = 0x4c0,
+ .mask = 0xffffffff,
+ .data = 0x82020100,
+/*
+ * 31:30 Pad Reset Config (PADRSTCFG): = 2h # PLTRST# (default)
+ *
+ * 29 RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly
+ * from RX buffer (default)
+ *
+ * 28 RX Raw Override to '1' (RXRAW1): = 0 # No Override
+ *
+ * 26:25 RX Level/Edge Configuration (RXEVCFG):
+ * = 0h # Level
+ * = 1h # Edge
+ *
+ * 23 RX Invert (RXINV): = 0 # No Inversion (signal active high)
+ *
+ * 20 GPIO Input Route IOxAPIC (GPIROUTIOXAPIC):
+ * = 0 # Routing does not cause peripheral IRQ...
+ * # (we want an NMI not an IRQ)
+ *
+ * 19 GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI.
+ * 18 GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI.
+ * 17 GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI.
+ *
+ * 11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad.
+ * 9 GPIO RX Disable (GPIORXDIS):
+ * = 0 # Enable the input buffer (active low enable)
+ *
+ * 8 GPIO TX Disable (GPIOTXDIS):
+ * = 1 # Disable the output buffer; i.e. Hi-Z
+ *
+ * 1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state..
+ * 0 GPIO TX State (GPIOTXSTATE):
+ * = 0 # (Leave at default)
+ */
+ },
+
+/* Pad Config DW1 */
+ { /* PAD_CFG_DW1_GPP_D_0 */
+ .offset = 0x4c4,
+ .mask = 0x3c00,
+ .data = 0, /* Termination = none (default) */
+ },
+};
+
+static void uv_init_hubless_pch_d0(void)
+{
+ int i, read;
+
+ read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0);
+ if (read != 0) {
+ pr_info("UV: Hubless NMI already configured\n");
+ return;
+ }
+
+ nmi_debug("UV: Initializing UV Hubless NMI on PCH\n");
+ for (i = 0; i < ARRAY_SIZE(init_nmi); i++) {
+ uv_init_hubless_pch_io(init_nmi[i].offset,
+ init_nmi[i].mask,
+ init_nmi[i].data);
+ }
+}
+
+static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi)
+{
+ int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0);
+ int status = *pstat;
+
+ hub_nmi->nmi_value = status;
+ atomic_inc(&hub_nmi->read_mmr_count);
+
+ if (!(status & STS_GPP_D_0_MASK)) /* Not a UV external NMI */
+ return 0;
+
+ *pstat = STS_GPP_D_0_MASK; /* Is a UV NMI: clear GPP_D_0 status */
+ (void)*pstat; /* Flush write */
+
+ return 1;
+}
+
+static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi)
+{
+ if (hub_nmi->hub_present)
+ return uv_nmi_test_mmr(hub_nmi);
+
+ if (hub_nmi->pch_owner) /* Only PCH owner can check status */
+ return uv_nmi_test_hubless(hub_nmi);
+
+ return -1;
+}
+
+/*
+ * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and
+ * return true. If first CPU in on the system, set global "in_nmi" flag.
+ */
+static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
+{
+ int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
+
+ if (first) {
+ atomic_set(&hub_nmi->cpu_owner, cpu);
+ if (atomic_add_unless(&uv_in_nmi, 1, 1))
+ atomic_set(&uv_nmi_cpu, cpu);
+
+ atomic_inc(&hub_nmi->nmi_count);
+ }
+ return first;
+}
+
+/* Check if this is a system NMI event */
+static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
+{
+ int cpu = smp_processor_id();
+ int nmi = 0;
+ int nmi_detected = 0;
+
+ local64_inc(&uv_nmi_count);
+ this_cpu_inc(uv_cpu_nmi.queries);
+
+ do {
+ nmi = atomic_read(&hub_nmi->in_nmi);
+ if (nmi)
+ break;
+
+ if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
+ nmi_detected = uv_test_nmi(hub_nmi);
+
+ /* Check flag for UV external NMI */
+ if (nmi_detected > 0) {
+ uv_set_in_nmi(cpu, hub_nmi);
+ nmi = 1;
+ break;
+ }
+
+ /* A non-PCH node in a hubless system waits for NMI */
+ else if (nmi_detected < 0)
+ goto slave_wait;
+
+ /* MMR/PCH NMI flag is clear */
+ raw_spin_unlock(&hub_nmi->nmi_lock);
+
+ } else {
+
+ /* Wait a moment for the HUB NMI locker to set flag */
+slave_wait: cpu_relax();
+ udelay(uv_nmi_slave_delay);
+
+ /* Re-check hub in_nmi flag */
+ nmi = atomic_read(&hub_nmi->in_nmi);
+ if (nmi)
+ break;
+ }
+
+ /*
+ * Check if this BMC missed setting the MMR NMI flag (or)
+ * UV hubless system where only PCH owner can check flag
+ */
+ if (!nmi) {
+ nmi = atomic_read(&uv_in_nmi);
+ if (nmi)
+ uv_set_in_nmi(cpu, hub_nmi);
+ }
+
+ /* If we're holding the hub lock, release it now */
+ if (nmi_detected < 0)
+ raw_spin_unlock(&hub_nmi->nmi_lock);
+
+ } while (0);
+
+ if (!nmi)
+ local64_inc(&uv_nmi_misses);
+
+ return nmi;
+}
+
+/* Need to reset the NMI MMR register, but only once per hub. */
+static inline void uv_clear_nmi(int cpu)
+{
+ struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
+
+ if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
+ atomic_set(&hub_nmi->cpu_owner, -1);
+ atomic_set(&hub_nmi->in_nmi, 0);
+ if (hub_nmi->hub_present)
+ uv_local_mmr_clear_nmi();
+ else
+ uv_reassert_nmi();
+ raw_spin_unlock(&hub_nmi->nmi_lock);
+ }
+}
+
+/* Ping non-responding CPU's attempting to force them into the NMI handler */
+static void uv_nmi_nr_cpus_ping(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, uv_nmi_cpu_mask)
+ uv_cpu_nmi_per(cpu).pinging = 1;
+
+ __apic_send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
+}
+
+/* Clean up flags for CPU's that ignored both NMI and ping */
+static void uv_nmi_cleanup_mask(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, uv_nmi_cpu_mask) {
+ uv_cpu_nmi_per(cpu).pinging = 0;
+ uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT;
+ cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
+ }
+}
+
+/* Loop waiting as CPU's enter NMI handler */
+static int uv_nmi_wait_cpus(int first)
+{
+ int i, j, k, n = num_online_cpus();
+ int last_k = 0, waiting = 0;
+ int cpu = smp_processor_id();
+
+ if (first) {
+ cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
+ k = 0;
+ } else {
+ k = n - cpumask_weight(uv_nmi_cpu_mask);
+ }
+
+ /* PCH NMI causes only one CPU to respond */
+ if (first && uv_pch_intr_now_enabled) {
+ cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
+ return n - k - 1;
+ }
+
+ udelay(uv_nmi_initial_delay);
+ for (i = 0; i < uv_nmi_retry_count; i++) {
+ int loop_delay = uv_nmi_loop_delay;
+
+ for_each_cpu(j, uv_nmi_cpu_mask) {
+ if (uv_cpu_nmi_per(j).state) {
+ cpumask_clear_cpu(j, uv_nmi_cpu_mask);
+ if (++k >= n)
+ break;
+ }
+ }
+ if (k >= n) { /* all in? */
+ k = n;
+ break;
+ }
+ if (last_k != k) { /* abort if no new CPU's coming in */
+ last_k = k;
+ waiting = 0;
+ } else if (++waiting > uv_nmi_wait_count)
+ break;
+
+ /* Extend delay if waiting only for CPU 0: */
+ if (waiting && (n - k) == 1 &&
+ cpumask_test_cpu(0, uv_nmi_cpu_mask))
+ loop_delay *= 100;
+
+ udelay(loop_delay);
+ }
+ atomic_set(&uv_nmi_cpus_in_nmi, k);
+ return n - k;
+}
+
+/* Wait until all slave CPU's have entered UV NMI handler */
+static void uv_nmi_wait(int master)
+{
+ /* Indicate this CPU is in: */
+ this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN);
+
+ /* If not the first CPU in (the master), then we are a slave CPU */
+ if (!master)
+ return;
+
+ do {
+ /* Wait for all other CPU's to gather here */
+ if (!uv_nmi_wait_cpus(1))
+ break;
+
+ /* If not all made it in, send IPI NMI to them */
+ pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n",
+ cpumask_weight(uv_nmi_cpu_mask),
+ cpumask_pr_args(uv_nmi_cpu_mask));
+
+ uv_nmi_nr_cpus_ping();
+
+ /* If all CPU's are in, then done */
+ if (!uv_nmi_wait_cpus(0))
+ break;
+
+ pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n",
+ cpumask_weight(uv_nmi_cpu_mask),
+ cpumask_pr_args(uv_nmi_cpu_mask));
+ } while (0);
+
+ pr_alert("UV: %d of %d CPUs in NMI\n",
+ atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
+}
+
+/* Dump Instruction Pointer header */
+static void uv_nmi_dump_cpu_ip_hdr(void)
+{
+ pr_info("\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
+ "CPU", "PID", "COMMAND", "IP");
+}
+
+/* Dump Instruction Pointer info */
+static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
+{
+ pr_info("UV: %4d %6d %-32.32s %pS",
+ cpu, current->pid, current->comm, (void *)regs->ip);
+}
+
+/*
+ * Dump this CPU's state. If action was set to "kdump" and the crash_kexec
+ * failed, then we provide "dump" as an alternate action. Action "dump" now
+ * also includes the show "ips" (instruction pointers) action whereas the
+ * action "ips" only displays instruction pointers for the non-idle CPU's.
+ * This is an abbreviated form of the "ps" command.
+ */
+static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
+{
+ const char *dots = " ................................. ";
+
+ if (cpu == 0)
+ uv_nmi_dump_cpu_ip_hdr();
+
+ if (current->pid != 0 || uv_nmi_action != nmi_act_ips)
+ uv_nmi_dump_cpu_ip(cpu, regs);
+
+ if (uv_nmi_action == nmi_act_dump) {
+ pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
+ show_regs(regs);
+ }
+
+ this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
+}
+
+/* Trigger a slave CPU to dump its state */
+static void uv_nmi_trigger_dump(int cpu)
+{
+ int retry = uv_nmi_trigger_delay;
+
+ if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN)
+ return;
+
+ uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP;
+ do {
+ cpu_relax();
+ udelay(10);
+ if (uv_cpu_nmi_per(cpu).state
+ != UV_NMI_STATE_DUMP)
+ return;
+ } while (--retry > 0);
+
+ pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
+ uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE;
+}
+
+/* Wait until all CPU's ready to exit */
+static void uv_nmi_sync_exit(int master)
+{
+ atomic_dec(&uv_nmi_cpus_in_nmi);
+ if (master) {
+ while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
+ cpu_relax();
+ atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
+ } else {
+ while (atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ }
+}
+
+/* Current "health" check is to check which CPU's are responsive */
+static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master)
+{
+ if (master) {
+ int in = atomic_read(&uv_nmi_cpus_in_nmi);
+ int out = num_online_cpus() - in;
+
+ pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out);
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ } else {
+ while (!atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ }
+ uv_nmi_sync_exit(master);
+}
+
+/* Walk through CPU list and dump state of each */
+static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
+{
+ if (master) {
+ int tcpu;
+ int ignored = 0;
+ int saved_console_loglevel = console_loglevel;
+
+ pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
+ uv_nmi_action == nmi_act_ips ? "IPs" : "processes",
+ atomic_read(&uv_nmi_cpus_in_nmi), cpu);
+
+ console_loglevel = uv_nmi_loglevel;
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ for_each_online_cpu(tcpu) {
+ if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
+ ignored++;
+ else if (tcpu == cpu)
+ uv_nmi_dump_state_cpu(tcpu, regs);
+ else
+ uv_nmi_trigger_dump(tcpu);
+ }
+ if (ignored)
+ pr_alert("UV: %d CPUs ignored NMI\n", ignored);
+
+ console_loglevel = saved_console_loglevel;
+ pr_alert("UV: process trace complete\n");
+ } else {
+ while (!atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
+ cpu_relax();
+ uv_nmi_dump_state_cpu(cpu, regs);
+ }
+ uv_nmi_sync_exit(master);
+}
+
+static void uv_nmi_touch_watchdogs(void)
+{
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ rcu_cpu_stall_reset();
+ touch_nmi_watchdog();
+}
+
+static void uv_nmi_kdump(int cpu, int main, struct pt_regs *regs)
+{
+ /* Check if kdump kernel loaded for both main and secondary CPUs */
+ if (!kexec_crash_image) {
+ if (main)
+ pr_err("UV: NMI error: kdump kernel not loaded\n");
+ return;
+ }
+
+ /* Call crash to dump system state */
+ if (main) {
+ pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
+ crash_kexec(regs);
+
+ pr_emerg("UV: crash_kexec unexpectedly returned\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
+
+ } else { /* secondary */
+
+ /* If kdump kernel fails, secondaries will exit this loop */
+ while (atomic_read(&uv_nmi_kexec_failed) == 0) {
+
+ /* Once shootdown cpus starts, they do not return */
+ run_crash_ipi_callback(regs);
+
+ mdelay(10);
+ }
+ }
+}
+
+#ifdef CONFIG_KGDB
+#ifdef CONFIG_KGDB_KDB
+static inline int uv_nmi_kdb_reason(void)
+{
+ return KDB_REASON_SYSTEM_NMI;
+}
+#else /* !CONFIG_KGDB_KDB */
+static inline int uv_nmi_kdb_reason(void)
+{
+ /* Ensure user is expecting to attach gdb remote */
+ if (uv_nmi_action == nmi_act_kgdb)
+ return 0;
+
+ pr_err("UV: NMI error: KDB is not enabled in this kernel\n");
+ return -1;
+}
+#endif /* CONFIG_KGDB_KDB */
+
+/*
+ * Call KGDB/KDB from NMI handler
+ *
+ * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
+ * 'kdb' has no affect on which is used. See the KGDB documentation for further
+ * information.
+ */
+static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
+{
+ if (master) {
+ int reason = uv_nmi_kdb_reason();
+ int ret;
+
+ if (reason < 0)
+ return;
+
+ /* Call KGDB NMI handler as MASTER */
+ ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason,
+ &uv_nmi_slave_continue);
+ if (ret) {
+ pr_alert("KGDB returned error, is kgdboc set?\n");
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ }
+ } else {
+ /* Wait for KGDB signal that it's ready for slaves to enter */
+ int sig;
+
+ do {
+ cpu_relax();
+ sig = atomic_read(&uv_nmi_slave_continue);
+ } while (!sig);
+
+ /* Call KGDB as slave */
+ if (sig == SLAVE_CONTINUE)
+ kgdb_nmicallback(cpu, regs);
+ }
+ uv_nmi_sync_exit(master);
+}
+
+#else /* !CONFIG_KGDB */
+static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
+{
+ pr_err("UV: NMI error: KGDB is not enabled in this kernel\n");
+}
+#endif /* !CONFIG_KGDB */
+
+/*
+ * UV NMI handler
+ */
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+{
+ struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
+ int cpu = smp_processor_id();
+ int master = 0;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ /* If not a UV System NMI, ignore */
+ if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
+ local_irq_restore(flags);
+ return NMI_DONE;
+ }
+
+ /* Indicate we are the first CPU into the NMI handler */
+ master = (atomic_read(&uv_nmi_cpu) == cpu);
+
+ /* If NMI action is "kdump", then attempt to do it */
+ if (uv_nmi_action == nmi_act_kdump) {
+ uv_nmi_kdump(cpu, master, regs);
+
+ /* Unexpected return, revert action to "dump" */
+ if (master)
+ uv_nmi_action = nmi_act_dump;
+ }
+
+ /* Pause as all CPU's enter the NMI handler */
+ uv_nmi_wait(master);
+
+ /* Process actions other than "kdump": */
+ switch (uv_nmi_action) {
+ case nmi_act_health:
+ uv_nmi_action_health(cpu, regs, master);
+ break;
+ case nmi_act_ips:
+ case nmi_act_dump:
+ uv_nmi_dump_state(cpu, regs, master);
+ break;
+ case nmi_act_kdb:
+ case nmi_act_kgdb:
+ uv_call_kgdb_kdb(cpu, regs, master);
+ break;
+ default:
+ if (master)
+ pr_alert("UV: unknown NMI action: %d\n", uv_nmi_action);
+ uv_nmi_sync_exit(master);
+ break;
+ }
+
+ /* Clear per_cpu "in_nmi" flag */
+ this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT);
+
+ /* Clear MMR NMI flag on each hub */
+ uv_clear_nmi(cpu);
+
+ /* Clear global flags */
+ if (master) {
+ if (!cpumask_empty(uv_nmi_cpu_mask))
+ uv_nmi_cleanup_mask();
+ atomic_set(&uv_nmi_cpus_in_nmi, -1);
+ atomic_set(&uv_nmi_cpu, -1);
+ atomic_set(&uv_in_nmi, 0);
+ atomic_set(&uv_nmi_kexec_failed, 0);
+ atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
+ }
+
+ uv_nmi_touch_watchdogs();
+ local_irq_restore(flags);
+
+ return NMI_HANDLED;
+}
+
+/*
+ * NMI handler for pulling in CPU's when perf events are grabbing our NMI
+ */
+static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
+{
+ int ret;
+
+ this_cpu_inc(uv_cpu_nmi.queries);
+ if (!this_cpu_read(uv_cpu_nmi.pinging)) {
+ local64_inc(&uv_nmi_ping_misses);
+ return NMI_DONE;
+ }
+
+ this_cpu_inc(uv_cpu_nmi.pings);
+ local64_inc(&uv_nmi_ping_count);
+ ret = uv_handle_nmi(reason, regs);
+ this_cpu_write(uv_cpu_nmi.pinging, 0);
+ return ret;
+}
+
+static void uv_register_nmi_notifier(void)
+{
+ if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
+ pr_warn("UV: NMI handler failed to register\n");
+
+ if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
+ pr_warn("UV: PING NMI handler failed to register\n");
+}
+
+void uv_nmi_init(void)
+{
+ unsigned int value;
+
+ /*
+ * Unmask NMI on all CPU's
+ */
+ value = apic_read(APIC_LVT1) | APIC_DM_NMI;
+ value &= ~APIC_LVT_MASKED;
+ apic_write(APIC_LVT1, value);
+}
+
+/* Setup HUB NMI info */
+static void __init uv_nmi_setup_common(bool hubbed)
+{
+ int size = sizeof(void *) * (1 << NODES_SHIFT);
+ int cpu;
+
+ uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
+ nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
+ BUG_ON(!uv_hub_nmi_list);
+ size = sizeof(struct uv_hub_nmi_s);
+ for_each_present_cpu(cpu) {
+ int nid = cpu_to_node(cpu);
+ if (uv_hub_nmi_list[nid] == NULL) {
+ uv_hub_nmi_list[nid] = kzalloc_node(size,
+ GFP_KERNEL, nid);
+ BUG_ON(!uv_hub_nmi_list[nid]);
+ raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
+ atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
+ uv_hub_nmi_list[nid]->hub_present = hubbed;
+ uv_hub_nmi_list[nid]->pch_owner = (nid == 0);
+ }
+ uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
+ }
+ BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
+}
+
+/* Setup for UV Hub systems */
+void __init uv_nmi_setup(void)
+{
+ uv_nmi_setup_mmrs();
+ uv_nmi_setup_common(true);
+ uv_register_nmi_notifier();
+ pr_info("UV: Hub NMI enabled\n");
+}
+
+/* Setup for UV Hubless systems */
+void __init uv_nmi_setup_hubless(void)
+{
+ uv_nmi_setup_common(false);
+ pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE);
+ nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n",
+ pch_base, PCH_PCR_GPIO_1_BASE);
+ if (uv_pch_init_enable)
+ uv_init_hubless_pch_d0();
+ uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0,
+ STS_GPP_D_0_MASK, STS_GPP_D_0_MASK);
+ uv_nmi_setup_hubless_intr();
+ /* Ensure NMI enabled in Processor Interface Reg: */
+ uv_reassert_nmi();
+ uv_register_nmi_notifier();
+ pr_info("UV: PCH NMI enabled\n");
+}
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
deleted file mode 100644
index 5d4ba301e776..000000000000
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson
- */
-
-#include <linux/device.h>
-#include <asm/uv/bios.h>
-#include <asm/uv/uv.h>
-
-struct kobject *sgi_uv_kobj;
-
-static ssize_t partition_id_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
-}
-
-static ssize_t coherence_id_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
-}
-
-static struct kobj_attribute partition_id_attr =
- __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
-
-static struct kobj_attribute coherence_id_attr =
- __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
-
-
-static int __init sgi_uv_sysfs_init(void)
-{
- unsigned long ret;
-
- if (!is_uv_system())
- return -ENODEV;
-
- if (!sgi_uv_kobj)
- sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
- if (!sgi_uv_kobj) {
- printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
- return -EINVAL;
- }
-
- ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
- if (ret) {
- printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
- return ret;
- }
-
- ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
- if (ret) {
- printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
- return ret;
- }
-
- return 0;
-}
-
-device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 5c86786bbfd2..3712afc3534d 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SGI RTC clock/timer routines.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Dimitri Sivanich
*/
@@ -30,28 +18,27 @@
#define RTC_NAME "sgi_rtc"
-static cycle_t uv_read_rtc(struct clocksource *cs);
+static u64 uv_read_rtc(struct clocksource *cs);
static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
-static void uv_rtc_timer_setup(enum clock_event_mode,
- struct clock_event_device *);
+static int uv_rtc_shutdown(struct clock_event_device *evt);
static struct clocksource clocksource_uv = {
.name = RTC_NAME,
.rating = 299,
.read = uv_read_rtc,
- .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+ .mask = (u64)UVH_RTC_REAL_TIME_CLOCK_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static struct clock_event_device clock_event_device_uv = {
- .name = RTC_NAME,
- .features = CLOCK_EVT_FEAT_ONESHOT,
- .shift = 20,
- .rating = 400,
- .irq = -1,
- .set_next_event = uv_rtc_next_event,
- .set_mode = uv_rtc_timer_setup,
- .event_handler = NULL,
+ .name = RTC_NAME,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 20,
+ .rating = 400,
+ .irq = -1,
+ .set_next_event = uv_rtc_next_event,
+ .set_state_shutdown = uv_rtc_shutdown,
+ .event_handler = NULL,
};
static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
@@ -66,7 +53,7 @@ struct uv_rtc_timer_head {
struct {
int lcpu; /* systemwide logical cpu number */
u64 expires; /* next timer expiration for this cpu */
- } cpu[1];
+ } cpu[] __counted_by(ncpus);
};
/*
@@ -88,7 +75,6 @@ static void uv_rtc_send_IPI(int cpu)
apicid = cpu_physical_id(cpu);
pnode = uv_apicid_to_pnode(apicid);
- apicid |= uv_apicid_hibits;
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
(X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -99,32 +85,23 @@ static void uv_rtc_send_IPI(int cpu)
/* Check for an RTC interrupt pending */
static int uv_intr_pending(int pnode)
{
- if (is_uv1_hub())
- return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
- UV1H_EVENT_OCCURRED0_RTC1_MASK;
- else if (is_uvx_hub())
- return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
- UVXH_EVENT_OCCURRED2_RTC_1_MASK;
- return 0;
+ return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED2) &
+ UVH_EVENT_OCCURRED2_RTC_1_MASK;
}
/* Setup interrupt and return non-zero if early expiration occurred. */
static int uv_setup_intr(int cpu, u64 expires)
{
u64 val;
- unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
+ unsigned long apicid = cpu_physical_id(cpu);
int pnode = uv_cpu_to_pnode(cpu);
uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
UVH_RTC1_INT_CONFIG_M_MASK);
uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
- if (is_uv1_hub())
- uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
- UV1H_EVENT_OCCURRED0_RTC1_MASK);
- else
- uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
- UVXH_EVENT_OCCURRED2_RTC_1_MASK);
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED2_ALIAS,
+ UVH_EVENT_OCCURRED2_RTC_1_MASK);
val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
@@ -159,20 +136,19 @@ static __init int uv_rtc_allocate_timers(void)
{
int cpu;
- blade_info = kzalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
+ blade_info = kcalloc(uv_possible_blades, sizeof(void *), GFP_KERNEL);
if (!blade_info)
return -ENOMEM;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
int bid = uv_cpu_to_blade_id(cpu);
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
if (!head) {
- head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
- (uv_blade_nr_possible_cpus(bid) *
- 2 * sizeof(u64)),
+ head = kmalloc_node(struct_size(head, cpu,
+ uv_blade_nr_possible_cpus(bid)),
GFP_KERNEL, nid);
if (!head) {
uv_rtc_deallocate_timers();
@@ -227,7 +203,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
u64 *t = &head->cpu[bcpu].expires;
unsigned long flags;
int next_cpu;
@@ -263,7 +239,7 @@ static int uv_rtc_unset_timer(int cpu, int force)
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
u64 *t = &head->cpu[bcpu].expires;
unsigned long flags;
int rc = 0;
@@ -294,10 +270,10 @@ static int uv_rtc_unset_timer(int cpu, int force)
* Read the RTC.
*
* Starting with HUB rev 2.0, the UV RTC register is replicated across all
- * cachelines of it's own page. This allows faster simultaneous reads
+ * cachelines of its own page. This allows faster simultaneous reads
* from a given socket.
*/
-static cycle_t uv_read_rtc(struct clocksource *cs)
+static u64 uv_read_rtc(struct clocksource *cs)
{
unsigned long offset;
@@ -306,7 +282,7 @@ static cycle_t uv_read_rtc(struct clocksource *cs)
else
offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
- return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
+ return (u64)uv_read_local_mmr(UVH_RTC | offset);
}
/*
@@ -321,24 +297,14 @@ static int uv_rtc_next_event(unsigned long delta,
}
/*
- * Setup the RTC timer in oneshot mode
+ * Shutdown the RTC timer
*/
-static void uv_rtc_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt)
+static int uv_rtc_shutdown(struct clock_event_device *evt)
{
int ced_cpu = cpumask_first(evt->cpumask);
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- case CLOCK_EVT_MODE_ONESHOT:
- case CLOCK_EVT_MODE_RESUME:
- /* Nothing to do here yet */
- break;
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- uv_rtc_unset_timer(ced_cpu, 1);
- break;
- }
+ uv_rtc_unset_timer(ced_cpu, 1);
+ return 0;
}
static void uv_rtc_interrupt(void)
@@ -365,7 +331,7 @@ __setup("uvrtcevt", uv_enable_evt_rtc);
static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
{
- struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+ struct clock_event_device *ced = this_cpu_ptr(&cpu_ced);
*ced = clock_event_device_uv;
ced->cpumask = cpumask_of(smp_processor_id());
@@ -401,9 +367,11 @@ static __init int uv_rtc_setup_clock(void)
clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
sn_rtc_cycles_per_second;
+ clock_event_device_uv.min_delta_ticks = 1;
clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
(NSEC_PER_SEC / sn_rtc_cycles_per_second);
+ clock_event_device_uv.max_delta_ticks = clocksource_uv.mask;
rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
if (rc) {
diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile
deleted file mode 100644
index 91bc17ab2fd5..000000000000
--- a/arch/x86/platform/visws/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
deleted file mode 100644
index 94d8a39332ec..000000000000
--- a/arch/x86/platform/visws/visws_quirks.c
+++ /dev/null
@@ -1,608 +0,0 @@
-/*
- * SGI Visual Workstation support and quirks, unmaintained.
- *
- * Split out from setup.c by davej@suse.de
- *
- * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
- *
- * SGI Visual Workstation interrupt controller
- *
- * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
- * which serves as the main interrupt controller in the system. Non-legacy
- * hardware in the system uses this controller directly. Legacy devices
- * are connected to the PIIX4 which in turn has its 8259(s) connected to
- * a of the Cobalt APIC entry.
- *
- * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
- *
- * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
- */
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/visws/cobalt.h>
-#include <asm/visws/piix4.h>
-#include <asm/io_apic.h>
-#include <asm/fixmap.h>
-#include <asm/reboot.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/time.h>
-#include <asm/io.h>
-
-#include <linux/kernel_stat.h>
-
-#include <asm/i8259.h>
-#include <asm/irq_vectors.h>
-#include <asm/visws/lithium.h>
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-
-extern int no_broadcast;
-
-char visws_board_type = -1;
-char visws_board_rev = -1;
-
-static void __init visws_time_init(void)
-{
- printk(KERN_INFO "Starting Cobalt Timer system clock\n");
-
- /* Set the countdown value */
- co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
-
- /* Start the timer */
- co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
-
- /* Enable (unmask) the timer interrupt */
- co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
-
- setup_default_timer_irq();
-}
-
-/* Replaces the default init_ISA_irqs in the generic setup */
-static void __init visws_pre_intr_init(void);
-
-/* Quirk for machine specific memory setup. */
-
-#define MB (1024 * 1024)
-
-unsigned long sgivwfb_mem_phys;
-unsigned long sgivwfb_mem_size;
-EXPORT_SYMBOL(sgivwfb_mem_phys);
-EXPORT_SYMBOL(sgivwfb_mem_size);
-
-long long mem_size __initdata = 0;
-
-static char * __init visws_memory_setup(void)
-{
- long long gfx_mem_size = 8 * MB;
-
- mem_size = boot_params.alt_mem_k;
-
- if (!mem_size) {
- printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
- mem_size = 128 * MB;
- }
-
- /*
- * this hardcodes the graphics memory to 8 MB
- * it really should be sized dynamically (or at least
- * set as a boot param)
- */
- if (!sgivwfb_mem_size) {
- printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
- sgivwfb_mem_size = 8 * MB;
- }
-
- /*
- * Trim to nearest MB
- */
- sgivwfb_mem_size &= ~((1 << 20) - 1);
- sgivwfb_mem_phys = mem_size - gfx_mem_size;
-
- e820_add_region(0, LOWMEMSIZE(), E820_RAM);
- e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
- e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
-
- return "PROM";
-}
-
-static void visws_machine_emergency_restart(void)
-{
- /*
- * Visual Workstations restart after this
- * register is poked on the PIIX4
- */
- outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
-}
-
-static void visws_machine_power_off(void)
-{
- unsigned short pm_status;
-/* extern unsigned int pci_bus0; */
-
- while ((pm_status = inw(PMSTS_PORT)) & 0x100)
- outw(pm_status, PMSTS_PORT);
-
- outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
-
- mdelay(10);
-
-#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
- (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-
-/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
- outl(PIIX_SPECIAL_STOP, 0xCFC);
-}
-
-static void __init visws_get_smp_config(unsigned int early)
-{
-}
-
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-
-static void __init MP_processor_info(struct mpc_cpu *m)
-{
- int ver, logical_apicid;
- physid_mask_t apic_cpus;
-
- if (!(m->cpuflag & CPU_ENABLED))
- return;
-
- logical_apicid = m->apicid;
- printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
- m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
- m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
-
- if (m->cpuflag & CPU_BOOTPROCESSOR)
- boot_cpu_physical_apicid = m->apicid;
-
- ver = m->apicver;
- if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
- m->apicid, MAX_LOCAL_APIC);
- return;
- }
-
- apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
- physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- m->apicid);
- ver = 0x10;
- }
- apic_version[m->apicid] = ver;
-}
-
-static void __init visws_find_smp_config(void)
-{
- struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
- unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
-
- if (ncpus > CO_CPU_MAX) {
- printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
- ncpus, mp);
-
- ncpus = CO_CPU_MAX;
- }
-
- if (ncpus > setup_max_cpus)
- ncpus = setup_max_cpus;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- smp_found_config = 1;
-#endif
- while (ncpus--)
- MP_processor_info(mp++);
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-
-static void visws_trap_init(void);
-
-void __init visws_early_detect(void)
-{
- int raw;
-
- visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
- >> PIIX_GPI_BD_SHIFT;
-
- if (visws_board_type < 0)
- return;
-
- /*
- * Override the default platform setup functions
- */
- x86_init.resources.memory_setup = visws_memory_setup;
- x86_init.mpparse.get_smp_config = visws_get_smp_config;
- x86_init.mpparse.find_smp_config = visws_find_smp_config;
- x86_init.irqs.pre_vector_init = visws_pre_intr_init;
- x86_init.irqs.trap_init = visws_trap_init;
- x86_init.timers.timer_init = visws_time_init;
- x86_init.pci.init = pci_visws_init;
- x86_init.pci.init_irq = x86_init_noop;
-
- /*
- * Install reboot quirks:
- */
- pm_power_off = visws_machine_power_off;
- machine_ops.emergency_restart = visws_machine_emergency_restart;
-
- /*
- * Do not use broadcast IPIs:
- */
- no_broadcast = 0;
-
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Turn off IO-APIC detection and initialization:
- */
- skip_ioapic_setup = 1;
-#endif
-
- /*
- * Get Board rev.
- * First, we have to initialize the 307 part to allow us access
- * to the GPIO registers. Let's map them at 0x0fc0 which is right
- * after the PIIX4 PM section.
- */
- outb_p(SIO_DEV_SEL, SIO_INDEX);
- outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
-
- outb_p(SIO_DEV_MSB, SIO_INDEX);
- outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
-
- outb_p(SIO_DEV_LSB, SIO_INDEX);
- outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
-
- outb_p(SIO_DEV_ENB, SIO_INDEX);
- outb_p(1, SIO_DATA); /* Enable GPIO registers. */
-
- /*
- * Now, we have to map the power management section to write
- * a bit which enables access to the GPIO registers.
- * What lunatic came up with this shit?
- */
- outb_p(SIO_DEV_SEL, SIO_INDEX);
- outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
-
- outb_p(SIO_DEV_MSB, SIO_INDEX);
- outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
-
- outb_p(SIO_DEV_LSB, SIO_INDEX);
- outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
-
- outb_p(SIO_DEV_ENB, SIO_INDEX);
- outb_p(1, SIO_DATA); /* Enable PM registers. */
-
- /*
- * Now, write the PM register which enables the GPIO registers.
- */
- outb_p(SIO_PM_FER2, SIO_PM_INDEX);
- outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
-
- /*
- * Now, initialize the GPIO registers.
- * We want them all to be inputs which is the
- * power on default, so let's leave them alone.
- * So, let's just read the board rev!
- */
- raw = inb_p(SIO_GP_DATA1);
- raw &= 0x7f; /* 7 bits of valid board revision ID. */
-
- if (visws_board_type == VISWS_320) {
- if (raw < 0x6) {
- visws_board_rev = 4;
- } else if (raw < 0xc) {
- visws_board_rev = 5;
- } else {
- visws_board_rev = 6;
- }
- } else if (visws_board_type == VISWS_540) {
- visws_board_rev = 2;
- } else {
- visws_board_rev = raw;
- }
-
- printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
- (visws_board_type == VISWS_320 ? "320" :
- (visws_board_type == VISWS_540 ? "540" :
- "unknown")), visws_board_rev);
-}
-
-#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
-#define BCD (LI_INTB | LI_INTC | LI_INTD)
-#define ALLDEVS (A01234 | BCD)
-
-static __init void lithium_init(void)
-{
- set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
- set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
-
- if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
- (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
- printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
-/* panic("This machine is not SGI Visual Workstation 320/540"); */
- }
-
- if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
- (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
- printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
-/* panic("This machine is not SGI Visual Workstation 320/540"); */
- }
-
- li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
- li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
-}
-
-static __init void cobalt_init(void)
-{
- /*
- * On normal SMP PC this is used only with SMP, but we have to
- * use it and set it up here to start the Cobalt clock
- */
- set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
- setup_local_APIC();
- printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
- (unsigned int)apic_read(APIC_LVR),
- (unsigned int)apic_read(APIC_ID));
-
- set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
- set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
- printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
- co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
-
- /* Enable Cobalt APIC being careful to NOT change the ID! */
- co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
-
- printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
- co_apic_read(CO_APIC_ID));
-}
-
-static void __init visws_trap_init(void)
-{
- lithium_init();
- cobalt_init();
-}
-
-/*
- * IRQ controller / APIC support:
- */
-
-static DEFINE_SPINLOCK(cobalt_lock);
-
-/*
- * Set the given Cobalt APIC Redirection Table entry to point
- * to the given IDT vector/index.
- */
-static inline void co_apic_set(int entry, int irq)
-{
- co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
- co_apic_write(CO_APIC_HI(entry), 0);
-}
-
-/*
- * Cobalt (IO)-APIC functions to handle PCI devices.
- */
-static inline int co_apic_ide0_hack(void)
-{
- extern char visws_board_type;
- extern char visws_board_rev;
-
- if (visws_board_type == VISWS_320 && visws_board_rev == 5)
- return 5;
- return CO_APIC_IDE0;
-}
-
-static int is_co_apic(unsigned int irq)
-{
- if (IS_CO_APIC(irq))
- return CO_APIC(irq);
-
- switch (irq) {
- case 0: return CO_APIC_CPU;
- case CO_IRQ_IDE0: return co_apic_ide0_hack();
- case CO_IRQ_IDE1: return CO_APIC_IDE1;
- default: return -1;
- }
-}
-
-
-/*
- * This is the SGI Cobalt (IO-)APIC:
- */
-static void enable_cobalt_irq(struct irq_data *data)
-{
- co_apic_set(is_co_apic(data->irq), data->irq);
-}
-
-static void disable_cobalt_irq(struct irq_data *data)
-{
- int entry = is_co_apic(data->irq);
-
- co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
- co_apic_read(CO_APIC_LO(entry));
-}
-
-static void ack_cobalt_irq(struct irq_data *data)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cobalt_lock, flags);
- disable_cobalt_irq(data);
- apic_write(APIC_EOI, APIC_EOI_ACK);
- spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
-static struct irq_chip cobalt_irq_type = {
- .name = "Cobalt-APIC",
- .irq_enable = enable_cobalt_irq,
- .irq_disable = disable_cobalt_irq,
- .irq_ack = ack_cobalt_irq,
-};
-
-
-/*
- * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
- * -- not the manner expected by the code in i8259.c.
- *
- * there is a 'master' physical interrupt source that gets sent to
- * the CPU. But in the chipset there are various 'virtual' interrupts
- * waiting to be handled. We represent this to Linux through a 'master'
- * interrupt controller type, and through a special virtual interrupt-
- * controller. Device drivers only see the virtual interrupt sources.
- */
-static unsigned int startup_piix4_master_irq(struct irq_data *data)
-{
- legacy_pic->init(0);
- enable_cobalt_irq(data);
- return 0;
-}
-
-static struct irq_chip piix4_master_irq_type = {
- .name = "PIIX4-master",
- .irq_startup = startup_piix4_master_irq,
- .irq_ack = ack_cobalt_irq,
-};
-
-static void pii4_mask(struct irq_data *data) { }
-
-static struct irq_chip piix4_virtual_irq_type = {
- .name = "PIIX4-virtual",
- .irq_mask = pii4_mask,
-};
-
-/*
- * PIIX4-8259 master/virtual functions to handle interrupt requests
- * from legacy devices: floppy, parallel, serial, rtc.
- *
- * None of these get Cobalt APIC entries, neither do they have IDT
- * entries. These interrupts are purely virtual and distributed from
- * the 'master' interrupt source: CO_IRQ_8259.
- *
- * When the 8259 interrupts its handler figures out which of these
- * devices is interrupting and dispatches to its handler.
- *
- * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
- * enable_irq gets the right irq. This 'master' irq is never directly
- * manipulated by any driver.
- */
-static irqreturn_t piix4_master_intr(int irq, void *dev_id)
-{
- unsigned long flags;
- int realirq;
-
- raw_spin_lock_irqsave(&i8259A_lock, flags);
-
- /* Find out what's interrupting in the PIIX4 master 8259 */
- outb(0x0c, 0x20); /* OCW3 Poll command */
- realirq = inb(0x20);
-
- /*
- * Bit 7 == 0 means invalid/spurious
- */
- if (unlikely(!(realirq & 0x80)))
- goto out_unlock;
-
- realirq &= 7;
-
- if (unlikely(realirq == 2)) {
- outb(0x0c, 0xa0);
- realirq = inb(0xa0);
-
- if (unlikely(!(realirq & 0x80)))
- goto out_unlock;
-
- realirq = (realirq & 7) + 8;
- }
-
- /* mask and ack interrupt */
- cached_irq_mask |= 1 << realirq;
- if (unlikely(realirq > 7)) {
- inb(0xa1);
- outb(cached_slave_mask, 0xa1);
- outb(0x60 + (realirq & 7), 0xa0);
- outb(0x60 + 2, 0x20);
- } else {
- inb(0x21);
- outb(cached_master_mask, 0x21);
- outb(0x60 + realirq, 0x20);
- }
-
- raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
- /*
- * handle this 'virtual interrupt' as a Cobalt one now.
- */
- generic_handle_irq(realirq);
-
- return IRQ_HANDLED;
-
-out_unlock:
- raw_spin_unlock_irqrestore(&i8259A_lock, flags);
- return IRQ_NONE;
-}
-
-static struct irqaction master_action = {
- .handler = piix4_master_intr,
- .name = "PIIX4-8259",
- .flags = IRQF_NO_THREAD,
-};
-
-static struct irqaction cascade_action = {
- .handler = no_action,
- .name = "cascade",
- .flags = IRQF_NO_THREAD,
-};
-
-static inline void set_piix4_virtual_irq_type(void)
-{
- piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask;
- piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask;
- piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask;
-}
-
-static void __init visws_pre_intr_init(void)
-{
- int i;
-
- set_piix4_virtual_irq_type();
-
- for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
- struct irq_chip *chip = NULL;
-
- if (i == 0)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_IDE0)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_IDE1)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_8259)
- chip = &piix4_master_irq_type;
- else if (i < CO_IRQ_APIC0)
- chip = &piix4_virtual_irq_type;
- else if (IS_CO_APIC(i))
- chip = &cobalt_irq_type;
-
- if (chip)
- irq_set_chip(i, chip);
- }
-
- setup_irq(CO_IRQ_8259, &master_action);
- setup_irq(2, &cascade_action);
-}