diff options
Diffstat (limited to 'arch/x86/platform')
66 files changed, 7327 insertions, 6766 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 01e0231a113e..3ed03a2552d0 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,13 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 # Platform specific code goes here +obj-y += atom/ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ -obj-y += goldfish/ obj-y += iris/ -obj-y += mrst/ +obj-y += intel/ +obj-y += intel-mid/ +obj-y += intel-quark/ obj-y += olpc/ obj-y += scx200/ -obj-y += sfi/ obj-y += ts5500/ -obj-y += visws/ obj-y += uv/ diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile new file mode 100644 index 000000000000..e06bbecd6358 --- /dev/null +++ b/arch/x86/platform/atom/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c new file mode 100644 index 000000000000..44c30ce6360a --- /dev/null +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel SOC Punit device state debug driver + * Punit controls power management for North Complex devices (Graphics + * blocks, Image Signal Processing, video processing, display, DSP etc.) + * + * Copyright (c) 2015, Intel Corporation. + */ + +#define pr_fmt(fmt) "punit_atom: " fmt + +#include <linux/acpi.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/iosf_mbi.h> + +/* Subsystem config/status Video processor */ +#define VED_SS_PM0 0x32 +/* Subsystem config/status ISP (Image Signal Processor) */ +#define ISP_SS_PM0 0x39 +/* Subsystem config/status Input/output controller */ +#define MIO_SS_PM 0x3B +/* Shift bits for getting status for video, isp and i/o */ +#define SSS_SHIFT 24 + +/* Power gate status reg */ +#define PWRGT_STATUS 0x61 +/* Shift bits for getting status for graphics rendering */ +#define RENDER_POS 0 +/* Shift bits for getting status for media control */ +#define MEDIA_POS 2 +/* Shift bits for getting status for Valley View/Baytrail display */ +#define VLV_DISPLAY_POS 6 + +/* Subsystem config/status display for Cherry Trail SOC */ +#define CHT_DSP_SSS 0x36 +/* Shift bits for getting status for display */ +#define CHT_DSP_SSS_POS 16 + +struct punit_device { + char *name; + int reg; + int sss_pos; +}; + +static const struct punit_device punit_device_tng[] = { + { "DISPLAY", CHT_DSP_SSS, SSS_SHIFT }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const struct punit_device punit_device_byt[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const struct punit_device punit_device_cht[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"}; + +static int punit_dev_state_show(struct seq_file *seq_file, void *unused) +{ + u32 punit_pwr_status; + struct punit_device *punit_devp = seq_file->private; + int index; + int status; + + seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n"); + while (punit_devp->name) { + status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, + punit_devp->reg, &punit_pwr_status); + if (status) { + seq_printf(seq_file, "%9s : Read Failed\n", + punit_devp->name); + } else { + index = (punit_pwr_status >> punit_devp->sss_pos) & 3; + seq_printf(seq_file, "%9s : %s\n", punit_devp->name, + dstates[index]); + } + punit_devp++; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(punit_dev_state); + +static struct dentry *punit_dbg_file; + +static void punit_dbgfs_register(struct punit_device *punit_device) +{ + punit_dbg_file = debugfs_create_dir("punit_atom", NULL); + + debugfs_create_file("dev_power_state", 0444, punit_dbg_file, + punit_device, &punit_dev_state_fops); +} + +static void punit_dbgfs_unregister(void) +{ + debugfs_remove_recursive(punit_dbg_file); +} + +#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) +static const struct punit_device *punit_dev; + +static void punit_s2idle_check(void) +{ + const struct punit_device *punit_devp; + u32 punit_pwr_status, dstate; + int status; + + for (punit_devp = punit_dev; punit_devp->name; punit_devp++) { + /* Skip MIO, it is on till the very last moment */ + if (punit_devp->reg == MIO_SS_PM) + continue; + + status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, + punit_devp->reg, &punit_pwr_status); + if (status) { + pr_err("%s read failed\n", punit_devp->name); + } else { + dstate = (punit_pwr_status >> punit_devp->sss_pos) & 3; + if (!dstate) + pr_err("%s is in D0 prior to s2idle\n", punit_devp->name); + } + } +} + +static struct acpi_s2idle_dev_ops punit_s2idle_ops = { + .check = punit_s2idle_check, +}; + +static void punit_s2idle_check_register(struct punit_device *punit_device) +{ + punit_dev = punit_device; + acpi_register_lps0_dev(&punit_s2idle_ops); +} + +static void punit_s2idle_check_unregister(void) +{ + acpi_unregister_lps0_dev(&punit_s2idle_ops); +} +#else +static void punit_s2idle_check_register(struct punit_device *punit_device) {} +static void punit_s2idle_check_unregister(void) {} +#endif + +#define X86_MATCH(vfm, data) \ + X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_MWAIT, data) + +static const struct x86_cpu_id intel_punit_cpu_ids[] = { + X86_MATCH(INTEL_ATOM_SILVERMONT, &punit_device_byt), + X86_MATCH(INTEL_ATOM_SILVERMONT_MID, &punit_device_tng), + X86_MATCH(INTEL_ATOM_AIRMONT, &punit_device_cht), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); + +static int __init punit_atom_debug_init(void) +{ + struct punit_device *punit_device; + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_punit_cpu_ids); + if (!id) + return -ENODEV; + + punit_device = (struct punit_device *)id->driver_data; + punit_dbgfs_register(punit_device); + punit_s2idle_check_register(punit_device); + + return 0; +} + +static void __exit punit_atom_debug_exit(void) +{ + punit_s2idle_check_unregister(); + punit_dbgfs_unregister(); +} + +module_init(punit_atom_debug_init); +module_exit(punit_atom_debug_exit); + +MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>"); +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); +MODULE_DESCRIPTION("Driver for Punit devices states debugging"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile index 91fc92971d94..7b7f37dc80b1 100644 --- a/arch/x86/platform/ce4100/Makefile +++ b/arch/x86/platform/ce4100/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_X86_INTEL_CE) += ce4100.o diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index f8ab4945892e..aaa7017416f7 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -1,32 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Intel CE4100 platform specific setup code * * (C) Copyright 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #include <linux/init.h> -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/module.h> -#include <linux/serial_reg.h> -#include <linux/serial_8250.h> +#include <linux/reboot.h> #include <asm/ce4100.h> #include <asm/prom.h> #include <asm/setup.h> -#include <asm/i8259.h> #include <asm/io.h> -#include <asm/io_apic.h> -#include <asm/emergency-restart.h> - -static int ce4100_i8042_detect(void) -{ - return 0; -} /* * The CE4100 platform has an internal 8051 Microcontroller which is @@ -40,107 +24,15 @@ static void ce4100_power_off(void) outb(0x4, 0xcf9); } -#ifdef CONFIG_SERIAL_8250 - -static unsigned int mem_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - return readl(p->membase + offset); -} - -/* - * The UART Tx interrupts are not set under some conditions and therefore serial - * transmission hangs. This is a silicon issue and has not been root caused. The - * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT - * bit of LSR register in interrupt handler to see whether at least one of these - * two bits is set, if so then process the transmit request. If this workaround - * is not applied, then the serial transmission may hang. This workaround is for - * errata number 9 in Errata - B step. -*/ - -static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset) -{ - unsigned int ret, ier, lsr; - - if (offset == UART_IIR) { - offset = offset << p->regshift; - ret = readl(p->membase + offset); - if (ret & UART_IIR_NO_INT) { - /* see if the TX interrupt should have really set */ - ier = mem_serial_in(p, UART_IER); - /* see if the UART's XMIT interrupt is enabled */ - if (ier & UART_IER_THRI) { - lsr = mem_serial_in(p, UART_LSR); - /* now check to see if the UART should be - generating an interrupt (but isn't) */ - if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) - ret &= ~UART_IIR_NO_INT; - } - } - } else - ret = mem_serial_in(p, offset); - return ret; -} - -static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - writel(value, p->membase + offset); -} - -static void ce4100_serial_fixup(int port, struct uart_port *up, - unsigned short *capabilites) -{ -#ifdef CONFIG_EARLY_PRINTK - /* - * Over ride the legacy port configuration that comes from - * asm/serial.h. Using the ioport driver then switching to the - * PCI memmaped driver hangs the IOAPIC - */ - if (up->iotype != UPIO_MEM32) { - up->uartclk = 14745600; - up->mapbase = 0xdffe0200; - set_fixmap_nocache(FIX_EARLYCON_MEM_BASE, - up->mapbase & PAGE_MASK); - up->membase = - (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); - up->membase += up->mapbase & ~PAGE_MASK; - up->mapbase += port * 0x100; - up->membase += port * 0x100; - up->iotype = UPIO_MEM32; - up->regshift = 2; - up->irq = 4; - } -#endif - up->iobase = 0; - up->serial_in = ce4100_mem_serial_in; - up->serial_out = ce4100_mem_serial_out; - - *capabilites |= (1 << 12); -} - -static __init void sdv_serial_fixup(void) -{ - serial8250_set_isa_configurator(ce4100_serial_fixup); -} - -#else -static inline void sdv_serial_fixup(void) {}; -#endif - static void __init sdv_arch_setup(void) { sdv_serial_fixup(); } -#ifdef CONFIG_X86_IO_APIC -static void __cpuinit sdv_pci_init(void) +static void sdv_pci_init(void) { x86_of_pci_init(); - /* We can't set this earlier, because we need to calibrate the timer */ - legacy_pic = &null_legacy_pic; } -#endif /* * CE4100 specific x86_init function overrides and early setup @@ -148,12 +40,12 @@ static void __cpuinit sdv_pci_init(void) */ void __init x86_ce4100_early_setup(void) { - x86_init.oem.arch_setup = sdv_arch_setup; - x86_platform.i8042_detect = ce4100_i8042_detect; - x86_init.resources.probe_roms = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.pci.init = ce4100_pci_init; + x86_init.oem.arch_setup = sdv_arch_setup; + x86_init.resources.probe_roms = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.pci.init = ce4100_pci_init; + x86_init.pci.init_irq = sdv_pci_init; /* * By default, the reboot method is ACPI which is supported by the @@ -164,10 +56,5 @@ void __init x86_ce4100_early_setup(void) */ reboot_type = BOOT_KBD; -#ifdef CONFIG_X86_IO_APIC - x86_init.pci.init_irq = sdv_pci_init; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; -#endif - pm_power_off = ce4100_power_off; } diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts index ce874f872cc6..65fa3d866226 100644 --- a/arch/x86/platform/ce4100/falconfalls.dts +++ b/arch/x86/platform/ce4100/falconfalls.dts @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * CE4100 on Falcon Falls * * (c) Copyright 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. */ /dts-v1/; / { @@ -252,7 +249,7 @@ gpio@26 { #gpio-cells = <2>; - compatible = "ti,pcf8575"; + compatible = "nxp,pcf8575"; reg = <0x26>; gpio-controller; }; @@ -266,7 +263,7 @@ gpio@26 { #gpio-cells = <2>; - compatible = "ti,pcf8575"; + compatible = "nxp,pcf8575"; reg = <0x26>; gpio-controller; }; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 6db1cc4c7534..500cab4a7f7c 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,2 +1,8 @@ -obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o -obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o +# SPDX-License-Identifier: GPL-2.0 +KASAN_SANITIZE := n +GCOV_PROFILE := n + +obj-$(CONFIG_EFI) += memmap.o quirks.o efi.o efi_$(BITS).o \ + efi_stub_$(BITS).o +obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o +obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c deleted file mode 100644 index 7145ec63c520..000000000000 --- a/arch/x86/platform/efi/efi-bgrt.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2012 Intel Corporation - * Author: Josh Triplett <josh@joshtriplett.org> - * - * Based on the bgrt driver: - * Copyright 2012 Red Hat, Inc <mjg@redhat.com> - * Author: Matthew Garrett - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/acpi.h> -#include <linux/efi.h> -#include <linux/efi-bgrt.h> - -struct acpi_table_bgrt *bgrt_tab; -void *__initdata bgrt_image; -size_t __initdata bgrt_image_size; - -struct bmp_header { - u16 id; - u32 size; -} __packed; - -void __init efi_bgrt_init(void) -{ - acpi_status status; - void __iomem *image; - bool ioremapped = false; - struct bmp_header bmp_header; - - if (acpi_disabled) - return; - - status = acpi_get_table("BGRT", 0, - (struct acpi_table_header **)&bgrt_tab); - if (ACPI_FAILURE(status)) - return; - - if (bgrt_tab->header.length < sizeof(*bgrt_tab)) - return; - if (bgrt_tab->version != 1) - return; - if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) - return; - - image = efi_lookup_mapped_addr(bgrt_tab->image_address); - if (!image) { - image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); - ioremapped = true; - if (!image) - return; - } - - memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); - if (ioremapped) - iounmap(image); - bgrt_image_size = bmp_header.size; - - bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); - if (!bgrt_image) - return; - - if (ioremapped) { - image = ioremap(bgrt_tab->image_address, bmp_header.size); - if (!image) { - kfree(bgrt_image); - bgrt_image = NULL; - return; - } - } - - memcpy_fromio(bgrt_image, image, bgrt_image_size); - if (ioremapped) - iounmap(image); -} diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c8d5577044bb..463b784499a8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Common EFI (Extensible Firmware Interface) support functions * Based on Extensible Firmware Interface Specification version 1.0 @@ -12,6 +13,8 @@ * Bibo Mao <bibo.mao@intel.com> * Chandramouli Narayanan <mouli@linux.intel.com> * Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov <bp@suse.de> - runtime services VA mapping * * Copied from efi_32.c to eliminate the duplicated code between EFI * 32/64 support code. --ying 2007-10-26 @@ -33,9 +36,8 @@ #include <linux/efi.h> #include <linux/efi-bgrt.h> #include <linux/export.h> -#include <linux/bootmem.h> -#include <linux/slab.h> #include <linux/memblock.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/time.h> @@ -45,61 +47,57 @@ #include <asm/setup.h> #include <asm/efi.h> +#include <asm/e820/api.h> #include <asm/time.h> -#include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/x86_init.h> -#include <asm/rtc.h> - -#define EFI_DEBUG 1 - -#define EFI_MIN_RESERVE 5120 +#include <asm/uv/uv.h> -#define EFI_DUMMY_GUID \ - EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) +static unsigned long efi_systab_phys __initdata; +static unsigned long efi_runtime, efi_nr_tables; -static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; +unsigned long efi_fw_vendor, efi_config_table; -struct efi __read_mostly efi = { - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, +static const efi_config_table_type_t arch_tables[] __initconst = { +#ifdef CONFIG_X86_UV + {UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" }, +#endif + {}, }; -EXPORT_SYMBOL(efi); - -struct efi_memory_map memmap; - -static struct efi efi_phys __initdata; -static efi_system_table_t efi_systab __initdata; - -unsigned long x86_efi_facility; - -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ - return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); -static bool __initdata disable_runtime = false; -static int __init setup_noefi(char *arg) -{ - disable_runtime = true; - return 0; -} -early_param("noefi", setup_noefi); +static const unsigned long * const efi_tables[] = { + &efi.acpi, + &efi.acpi20, + &efi.smbios, + &efi.smbios3, +#ifdef CONFIG_X86_UV + &uv_systab_phys, +#endif + &efi_fw_vendor, + &efi_runtime, + &efi_config_table, + &efi.esrt, + &efi_mem_attr_table, +#ifdef CONFIG_EFI_RCI2_TABLE + &rci2_table_phys, +#endif + &efi.tpm_log, + &efi.tpm_final_log, + &efi_rng_seed, +#ifdef CONFIG_LOAD_UEFI_KEYS + &efi.mokvar_table, +#endif +#ifdef CONFIG_EFI_COCO_SECRET + &efi.coco_secret, +#endif +#ifdef CONFIG_UNACCEPTED_MEMORY + &efi.unaccepted, +#endif +}; -int add_efi_memmap; -EXPORT_SYMBOL(add_efi_memmap); +u64 efi_setup; /* efi setup_data physical address */ +static int add_efi_memmap __initdata; static int __init setup_add_efi_memmap(char *arg) { add_efi_memmap = 1; @@ -107,237 +105,21 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static bool efi_no_storage_paranoia; - -static int __init setup_storage_paranoia(char *arg) -{ - efi_no_storage_paranoia = true; - return 0; -} -early_param("efi_no_storage_paranoia", setup_storage_paranoia); - - -static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(get_time, tm, tc); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_set_time(efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt1(set_time, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt3(get_wakeup_time, - enabled, pending, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(set_wakeup_time, - enabled, tm); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -static efi_status_t virt_efi_get_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 *attr, - unsigned long *data_size, - void *data) -{ - return efi_call_virt5(get_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) -{ - return efi_call_virt3(get_next_variable, - name_size, name, vendor); -} - -static efi_status_t virt_efi_set_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 attr, - unsigned long data_size, - void *data) -{ - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_query_variable_info(u32 attr, - u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt4(query_variable_info, attr, storage_space, - remaining_space, max_variable_size); -} - -static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) -{ - return efi_call_virt1(get_next_high_mono_count, count); -} - -static void virt_efi_reset_system(int reset_type, - efi_status_t status, - unsigned long data_size, - efi_char16_t *data) -{ - efi_call_virt4(reset_system, reset_type, status, - data_size, data); -} - -static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, - unsigned long sg_list) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt3(update_capsule, capsules, count, sg_list); -} - -static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, - u64 *max_size, - int *reset_type) -{ - if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - return efi_call_virt4(query_capsule_caps, capsules, count, max_size, - reset_type); -} - -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys4(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - efi_call_phys_epilog(); - return status; -} - -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); - efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -int efi_set_rtc_mmss(const struct timespec *now) -{ - unsigned long nowtime = now->tv_sec; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - struct rtc_time tm; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) { - pr_err("Oops: efitime: can't read time!\n"); - return -1; - } - - rtc_time_to_tm(nowtime, &tm); - if (!rtc_valid_tm(&tm)) { - eft.year = tm.tm_year + 1900; - eft.month = tm.tm_mon + 1; - eft.day = tm.tm_mday; - eft.minute = tm.tm_min; - eft.second = tm.tm_sec; - eft.nanosecond = 0; - } else { - printk(KERN_ERR - "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", - __FUNCTION__, nowtime); - return -1; - } - - status = efi.set_time(&eft); - if (status != EFI_SUCCESS) { - pr_err("Oops: efitime: can't write time!\n"); - return -1; - } - return 0; -} - -void efi_get_time(struct timespec *now) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) - pr_err("Oops: efitime: can't read time!\n"); - - now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); - now->tv_nsec = 0; -} - /* * Tell the kernel about the EFI memory map. This might include - * more than the max 128 entries that can fit in the e820 legacy - * (zeropage) memory map. + * more than the max 128 entries that can fit in the passed in e820 + * legacy (zeropage) memory map, but the kernel's e820 table can hold + * E820_MAX_ENTRIES. */ static void __init do_add_efi_memmap(void) { - void *p; + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; int e820_type; @@ -348,19 +130,25 @@ static void __init do_add_efi_memmap(void) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - if (md->attribute & EFI_MEMORY_WB) - e820_type = E820_RAM; + if (efi_soft_reserve_enabled() + && (md->attribute & EFI_MEMORY_SP)) + e820_type = E820_TYPE_SOFT_RESERVED; + else if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_TYPE_RAM; else - e820_type = E820_RESERVED; + e820_type = E820_TYPE_RESERVED; break; case EFI_ACPI_RECLAIM_MEMORY: - e820_type = E820_ACPI; + e820_type = E820_TYPE_ACPI; break; case EFI_ACPI_MEMORY_NVS: - e820_type = E820_NVS; + e820_type = E820_TYPE_NVS; break; case EFI_UNUSABLE_MEMORY: - e820_type = E820_UNUSABLE; + e820_type = E820_TYPE_UNUSABLE; + break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_TYPE_PMEM; break; default: /* @@ -368,220 +156,290 @@ static void __init do_add_efi_memmap(void) * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE */ - e820_type = E820_RESERVED; + e820_type = E820_TYPE_RESERVED; break; } - e820_add_region(start, size, e820_type); + + e820__range_add(start, size, e820_type); } - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + e820__update_table(e820_table); +} + +/* + * Given add_efi_memmap defaults to 0 and there is no alternative + * e820 mechanism for soft-reserved memory, import the full EFI memory + * map if soft reservations are present and enabled. Otherwise, the + * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is + * the efi=nosoftreserve option. + */ +static bool do_efi_soft_reserve(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return false; + + if (!efi_soft_reserve_enabled()) + return false; + + for_each_efi_memory_desc(md) + if (md->type == EFI_CONVENTIONAL_MEMORY && + (md->attribute & EFI_MEMORY_SP)) + return true; + return false; } int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; - unsigned long pmap; + struct efi_memory_map_data data; + phys_addr_t pmap; + int rv; -#ifdef CONFIG_X86_32 - /* Can't handle data above 4GB at this time */ - if (e->efi_memmap_hi) { + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + /* Can't handle firmware tables above 4GB on i386 */ + if (IS_ENABLED(CONFIG_X86_32) && e->efi_memmap_hi > 0) { pr_err("Memory map is above 4GB, disabling EFI.\n"); return -EINVAL; } - pmap = e->efi_memmap; -#else - pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); -#endif - memmap.phys_map = (void *)pmap; - memmap.nr_map = e->efi_memmap_size / - e->efi_memdesc_size; - memmap.desc_size = e->efi_memdesc_size; - memmap.desc_version = e->efi_memdesc_version; + pmap = (phys_addr_t)(e->efi_memmap | ((u64)e->efi_memmap_hi << 32)); + + data.phys_map = pmap; + data.size = e->efi_memmap_size; + data.desc_size = e->efi_memdesc_size; + data.desc_version = e->efi_memdesc_version; + + if (!efi_enabled(EFI_PARAVIRT)) { + rv = efi_memmap_init_early(&data); + if (rv) + return rv; + } + + if (add_efi_memmap || do_efi_soft_reserve()) + do_add_efi_memmap(); + + WARN(efi.memmap.desc_version != 1, + "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", + efi.memmap.desc_version); - memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); + memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); return 0; } -#if EFI_DEBUG -static void __init print_efi_memmap(void) +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) { - efi_memory_desc_t *md; - void *p; - int i; + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; - for (p = memmap.map, i = 0; - p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - pr_info("mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", - i, md->type, md->attribute, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); } + return false; } -#endif /* EFI_DEBUG */ -void __init efi_reserve_boot_services(void) +static void __init efi_clean_memmap(void) { - void *p; + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - u64 start = md->phys_addr; - u64 size = md->num_pages << EFI_PAGE_SHIFT; + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - /* Only reserve where possible: - * - Not within any already allocated areas - * - Not over any memory area (really needed, if above?) - * - Not within any part of the kernel - * - Not the bios reserved area - */ - if ((start+size >= __pa_symbol(_text) - && start <= __pa_symbol(_end)) || - !e820_all_mapped(start, start+size, E820_RAM) || - memblock_is_region_reserved(start, size)) { - /* Could not reserve, skip it */ - md->num_pages = 0; - memblock_dbg("Could not reserve boot range " - "[0x%010llx-0x%010llx]\n", - start, start+size-1); - } else - memblock_reserve(start, size); + if (n_removal > 0) { + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(&data); + } +} + +/* + * Firmware can use EfiMemoryMappedIO to request that MMIO regions be + * mapped by the OS so they can be accessed by EFI runtime services, but + * should have no other significance to the OS (UEFI r2.10, sec 7.2). + * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO + * regions to E820_TYPE_RESERVED entries, which prevent Linux from + * allocating space from them (see remove_e820_regions()). + * + * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and + * PCI host bridge windows, which means Linux can't allocate BAR space for + * hot-added devices. + * + * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this + * problem. + * + * Retain small EfiMemoryMappedIO regions because on some platforms, these + * describe non-window space that's included in host bridge _CRS. If we + * assign that space to PCI devices, they don't work. + */ +static void __init efi_remove_e820_mmio(void) +{ + efi_memory_desc_t *md; + u64 size, start, end; + int i = 0; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + start = md->phys_addr; + end = start + size - 1; + if (size >= 256*1024) { + pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n", + i, start, end, size >> 20); + e820__range_remove(start, size, + E820_TYPE_RESERVED, 1); + } else { + pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n", + i, start, end, size >> 10); + } + } + i++; } } -void __init efi_unmap_memmap(void) +void __init efi_print_memmap(void) { - clear_bit(EFI_MEMMAP, &x86_efi_facility); - if (memmap.map) { - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; + efi_memory_desc_t *md; + int i = 0; + + for_each_efi_memory_desc(md) { + char buf[64]; + + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", + i++, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } } -void __init efi_free_boot_services(void) +static int __init efi_systab_init(unsigned long phys) { + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + const efi_table_hdr_t *hdr; + bool over4g = false; void *p; + int ret; - if (!efi_is_native()) - return; + hdr = p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + ret = efi_systab_check_header(hdr); + if (ret) { + early_memunmap(p, size); + return ret; + } - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (efi_enabled(EFI_64BIT)) { + const efi_system_table_64_t *systab64 = p; - /* Could not reserve boot area */ - if (!size) - continue; + efi_runtime = systab64->runtime; + over4g = systab64->runtime > U32_MAX; - free_bootmem_late(start, size); - } + if (efi_setup) { + struct efi_setup_data *data; - efi_unmap_memmap(); -} + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); + return -ENOMEM; + } -static int __init efi_systab_init(void *phys) -{ - if (efi_enabled(EFI_64BIT)) { - efi_system_table_64_t *systab64; - u64 tmp = 0; - - systab64 = early_ioremap((unsigned long)phys, - sizeof(*systab64)); - if (systab64 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; - } + efi_fw_vendor = (unsigned long)data->fw_vendor; + efi_config_table = (unsigned long)data->tables; - efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = systab64->fw_vendor; - tmp |= systab64->fw_vendor; - efi_systab.fw_revision = systab64->fw_revision; - efi_systab.con_in_handle = systab64->con_in_handle; - tmp |= systab64->con_in_handle; - efi_systab.con_in = systab64->con_in; - tmp |= systab64->con_in; - efi_systab.con_out_handle = systab64->con_out_handle; - tmp |= systab64->con_out_handle; - efi_systab.con_out = systab64->con_out; - tmp |= systab64->con_out; - efi_systab.stderr_handle = systab64->stderr_handle; - tmp |= systab64->stderr_handle; - efi_systab.stderr = systab64->stderr; - tmp |= systab64->stderr; - efi_systab.runtime = (void *)(unsigned long)systab64->runtime; - tmp |= systab64->runtime; - efi_systab.boottime = (void *)(unsigned long)systab64->boottime; - tmp |= systab64->boottime; - efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = systab64->tables; - tmp |= systab64->tables; - - early_iounmap(systab64, sizeof(*systab64)); -#ifdef CONFIG_X86_32 - if (tmp >> 32) { - pr_err("EFI data located above 4GB, disabling EFI.\n"); - return -EINVAL; - } -#endif - } else { - efi_system_table_32_t *systab32; + over4g |= data->fw_vendor > U32_MAX || + data->tables > U32_MAX; - systab32 = early_ioremap((unsigned long)phys, - sizeof(*systab32)); - if (systab32 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; + early_memunmap(data, sizeof(*data)); + } else { + efi_fw_vendor = systab64->fw_vendor; + efi_config_table = systab64->tables; + + over4g |= systab64->fw_vendor > U32_MAX || + systab64->tables > U32_MAX; } + efi_nr_tables = systab64->nr_tables; + } else { + const efi_system_table_32_t *systab32 = p; - efi_systab.hdr = systab32->hdr; - efi_systab.fw_vendor = systab32->fw_vendor; - efi_systab.fw_revision = systab32->fw_revision; - efi_systab.con_in_handle = systab32->con_in_handle; - efi_systab.con_in = systab32->con_in; - efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = systab32->con_out; - efi_systab.stderr_handle = systab32->stderr_handle; - efi_systab.stderr = systab32->stderr; - efi_systab.runtime = (void *)(unsigned long)systab32->runtime; - efi_systab.boottime = (void *)(unsigned long)systab32->boottime; - efi_systab.nr_tables = systab32->nr_tables; - efi_systab.tables = systab32->tables; - - early_iounmap(systab32, sizeof(*systab32)); + efi_fw_vendor = systab32->fw_vendor; + efi_runtime = systab32->runtime; + efi_config_table = systab32->tables; + efi_nr_tables = systab32->nr_tables; } - efi.systab = &efi_systab; + efi.runtime_version = hdr->revision; - /* - * Verify the EFI Table - */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { - pr_err("System table signature incorrect!\n"); + efi_systab_report_header(hdr, efi_fw_vendor); + early_memunmap(p, size); + + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); return -EINVAL; } - if ((efi.systab->hdr.revision >> 16) == 0) - pr_err("Warning: System table version " - "%d.%02d, expected 1.00 or greater!\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); return 0; } -static int __init efi_config_init(u64 tables, int nr_tables) +static int __init efi_config_init(const efi_config_table_type_t *arch_tables) { - void *config_tables, *tablep; - int i, sz; + void *config_tables; + int sz, ret; + + if (efi_nr_tables == 0) + return 0; if (efi_enabled(EFI_64BIT)) sz = sizeof(efi_config_table_64_t); @@ -591,302 +449,69 @@ static int __init efi_config_init(u64 tables, int nr_tables) /* * Let's see what config tables the firmware passed to us. */ - config_tables = early_ioremap(tables, nr_tables * sz); + config_tables = early_memremap(efi_config_table, efi_nr_tables * sz); if (config_tables == NULL) { pr_err("Could not map Configuration table!\n"); return -ENOMEM; } - tablep = config_tables; - pr_info(""); - for (i = 0; i < efi.systab->nr_tables; i++) { - efi_guid_t guid; - unsigned long table; - - if (efi_enabled(EFI_64BIT)) { - u64 table64; - guid = ((efi_config_table_64_t *)tablep)->guid; - table64 = ((efi_config_table_64_t *)tablep)->table; - table = table64; -#ifdef CONFIG_X86_32 - if (table64 >> 32) { - pr_cont("\n"); - pr_err("Table located above 4GB, disabling EFI.\n"); - early_iounmap(config_tables, - efi.systab->nr_tables * sz); - return -EINVAL; - } -#endif - } else { - guid = ((efi_config_table_32_t *)tablep)->guid; - table = ((efi_config_table_32_t *)tablep)->table; - } - if (!efi_guidcmp(guid, MPS_TABLE_GUID)) { - efi.mps = table; - pr_cont(" MPS=0x%lx ", table); - } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) { - efi.acpi20 = table; - pr_cont(" ACPI 2.0=0x%lx ", table); - } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) { - efi.acpi = table; - pr_cont(" ACPI=0x%lx ", table); - } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) { - efi.smbios = table; - pr_cont(" SMBIOS=0x%lx ", table); -#ifdef CONFIG_X86_UV - } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) { - efi.uv_systab = table; - pr_cont(" UVsystab=0x%lx ", table); -#endif - } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) { - efi.hcdp = table; - pr_cont(" HCDP=0x%lx ", table); - } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) { - efi.uga = table; - pr_cont(" UGA=0x%lx ", table); - } - tablep += sz; - } - pr_cont("\n"); - early_iounmap(config_tables, efi.systab->nr_tables * sz); - return 0; -} - -static int __init efi_runtime_init(void) -{ - efi_runtime_services_t *runtime; - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - */ - runtime = early_ioremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map - * is invoked. - */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; - early_iounmap(runtime, sizeof(efi_runtime_services_t)); - - return 0; -} - -static int __init efi_memmap_init(void) -{ - /* Map the EFI memory map */ - memmap.map = early_ioremap((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); - if (memmap.map == NULL) { - pr_err("Could not map the memory map!\n"); - return -ENOMEM; - } - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); - - if (add_efi_memmap) - do_add_efi_memmap(); + ret = efi_config_parse_tables(config_tables, efi_nr_tables, + arch_tables); - return 0; + early_memunmap(config_tables, efi_nr_tables * sz); + return ret; } void __init efi_init(void) { - efi_char16_t *c16; - char vendor[100] = "unknown"; - int i = 0; - void *tmp; - -#ifdef CONFIG_X86_32 - if (boot_params.efi_info.efi_systab_hi || - boot_params.efi_info.efi_memmap_hi) { + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { pr_info("Table located above 4GB, disabling EFI.\n"); return; } - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - if (efi_systab_init(efi_phys.systab)) - return; + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + if (efi_systab_init(efi_systab_phys)) + return; - /* - * Show what we know for posterity - */ - c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); - if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; - vendor[i] = '\0'; - } else - pr_err("Could not map the firmware vendor!\n"); - early_iounmap(tmp, 2); - - pr_info("EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) + if (efi_reuse_config(efi_config_table, efi_nr_tables)) return; - set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); + if (efi_config_init(arch_tables)) + return; /* * Note: We currently don't support runtime services on an EFI * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_is_native()) - pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else { - if (disable_runtime || efi_runtime_init()) - return; - set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); - } + if (!efi_runtime_supported()) + pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - if (efi_memmap_init()) + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); return; - - set_bit(EFI_MEMMAP, &x86_efi_facility); - -#ifdef CONFIG_X86_32 - if (efi_is_native()) { - x86_platform.get_wallclock = efi_get_time; - x86_platform.set_wallclock = efi_set_rtc_mmss; - } -#endif - -#if EFI_DEBUG - print_efi_memmap(); -#endif -} - -void __init efi_late_init(void) -{ - efi_bgrt_init(); -} - -void __init efi_set_executable(efi_memory_desc_t *md, bool executable) -{ - u64 addr, npages; - - addr = md->virt_addr; - npages = md->num_pages; - - memrange_efi_to_native(&addr, &npages); - - if (executable) - set_memory_x(addr, npages); - else - set_memory_nx(addr, npages); -} - -static void __init runtime_code_page_mkexec(void) -{ - efi_memory_desc_t *md; - void *p; - - /* Make EFI runtime service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if (md->type != EFI_RUNTIME_SERVICES_CODE) - continue; - - efi_set_executable(md, true); } -} -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM. So, look it up in the existing EFI memory map instead. Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ - void *p; - if (WARN_ON(!memmap.map)) - return NULL; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - u64 end = md->phys_addr + size; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - if (!md->virt_addr) - continue; - if (phys_addr >= md->phys_addr && phys_addr < end) { - phys_addr += md->virt_addr - md->phys_addr; - return (__force void __iomem *)(unsigned long)phys_addr; - } - } - return NULL; -} + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_clean_memmap(); -void efi_memory_uc(u64 addr, unsigned long size) -{ - unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; - u64 npages; + efi_remove_e820_mmio(); - npages = round_up(size, page_shift) / page_shift; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); + if (efi_enabled(EFI_DBG)) + efi_print_memmap(); } -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ -void __init efi_enter_virtual_mode(void) +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) { efi_memory_desc_t *md, *prev_md = NULL; - efi_status_t status; - unsigned long size; - u64 end, systab, start_pfn, end_pfn; - void *p, *va, *new_memmap = NULL; - int count = 0; - - efi.systab = NULL; - /* - * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI - */ - - if (!efi_is_native()) { - efi_unmap_memmap(); - return; - } - - /* Merge contiguous regions of the same type and attribute */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + for_each_efi_memory_desc(md) { u64 prev_size; - md = p; if (!prev_md) { prev_md = md; @@ -909,217 +534,394 @@ void __init efi_enter_virtual_mode(void) } prev_md = md; } +} - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; - size = md->num_pages << EFI_PAGE_SHIFT; - end = md->phys_addr + size; + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; - start_pfn = PFN_DOWN(md->phys_addr); - end_pfn = PFN_UP(end); - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); - if (!(md->attribute & EFI_MEMORY_RUNTIME)) { - if (!va) - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; - } +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} - md->virt_addr = (u64) (unsigned long) va; +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return efi.memmap.map_end - efi.memmap.desc_size; - if (!va) { - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; - } + entry -= efi.memmap.desc_size; + if (entry < efi.memmap.map) + return NULL; - systab = (u64) (unsigned long) efi_phys.systab; - if (md->phys_addr <= systab && systab < end) { - systab += md->virt_addr - md->phys_addr; - efi.systab = (efi_system_table_t *) (unsigned long) systab; - } - new_memmap = krealloc(new_memmap, - (count + 1) * memmap.desc_size, - GFP_KERNEL); - memcpy(new_memmap + (count * memmap.desc_size), md, - memmap.desc_size); - count++; + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); } - BUG_ON(!efi.systab); + /* Initial call */ + if (!entry) + return efi.memmap.map; - status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + entry += efi.memmap.desc_size; + if (entry >= efi.memmap.map_end) + return NULL; - if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode " - "(status=%lx)!\n", status); - panic("EFI call to SetVirtualAddressMap() failed!"); + return entry; +} + +static bool should_map_region(efi_memory_desc_t *md) +{ + /* + * Runtime regions always require runtime mappings (obviously). + */ + if (md->attribute & EFI_MEMORY_RUNTIME) + return true; + + /* + * 32-bit EFI doesn't suffer from the bug that requires us to + * reserve boot services regions, and mixed mode support + * doesn't exist for 32-bit kernels. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + + /* + * EFI specific purpose memory may be reserved by default + * depending on kernel config and boot options. + */ + if (md->type == EFI_CONVENTIONAL_MEMORY && + efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + return false; + + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + if (efi_is_mixed()) { + if (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_LOADER_DATA || + md->type == EFI_LOADER_CODE) + return true; } /* - * Now that EFI is in virtual mode, update the function - * pointers in the runtime service table to the new virtual addresses. + * Map boot services regions as a workaround for buggy + * firmware that accesses them even when they shouldn't. * - * Call EFI services through wrapper functions. + * See efi_{reserve,free}_boot_services(). */ - efi.runtime_version = efi_systab.hdr.revision; - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; - efi.set_virtual_address_map = NULL; - efi.query_variable_info = virt_efi_query_variable_info; - efi.update_capsule = virt_efi_update_capsule; - efi.query_capsule_caps = virt_efi_query_capsule_caps; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); - - kfree(new_memmap); + if (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) + return true; - /* clean DUMMY object */ - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - 0, NULL); + return false; } /* - * Convenience functions to obtain memory types and attributes + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. */ -u32 efi_mem_type(unsigned long phys_addr) +static void * __init efi_map_regions(int *count, int *pg_shift) { + void *p, *new_memmap = NULL; + unsigned long left = 0; + unsigned long desc_size; efi_memory_desc_t *md; - void *p; - if (!efi_enabled(EFI_MEMMAP)) - return 0; + desc_size = efi.memmap.desc_size; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; - if ((md->phys_addr <= phys_addr) && - (phys_addr < (md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT)))) - return md->type; + + if (!should_map_region(md)) + continue; + + efi_map_region(md); + + if (left < desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; + } + + memcpy(new_memmap + (*count * desc_size), md, desc_size); + + left -= desc_size; + (*count)++; } - return 0; + + return new_memmap; } -u64 efi_mem_attributes(unsigned long phys_addr) +static void __init kexec_enter_virtual_mode(void) { +#ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; - void *p; + unsigned int num_pages; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && - (phys_addr < (md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT)))) - return md->attribute; + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI. + */ + if (efi_is_mixed()) { + efi_memmap_unmap(); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; } - return 0; + + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for_each_efi_memory_desc(md) + efi_map_region_fixed(md); /* FIXME: add error handling */ + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map. + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + + efi_sync_low_kernel_mappings(); + efi_native_runtime_setup(); + efi_runtime_update_mappings(); +#endif } /* - * Some firmware has serious problems when using more than 50% of the EFI - * variable store, i.e. it triggers bugs that can brick machines. Ensure that - * we never use more than this safe limit. + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * efi_pgd page table. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * - * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable - * store. + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). */ -efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +static void __init __efi_enter_virtual_mode(void) { + int count = 0, pg_shift = 0; + void *new_memmap = NULL; efi_status_t status; - u64 storage_size, remaining_size, max_size; + unsigned long pa; - if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) - return 0; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + goto err; + } - status = efi.query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - if (status != EFI_SUCCESS) - return status; + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + goto err; + } + + pa = __pa(new_memmap); /* - * Some firmware implementations refuse to boot if there's insufficient - * space in the variable store. We account for that by refusing the - * write if permitting it would reduce the available space to under - * 5KB. This figure was provided by Samsung, so should be safe. + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map that we are about to pass to the + * firmware via SetVirtualAddressMap(). */ - if ((remaining_size - size < EFI_MIN_RESERVE) && - !efi_no_storage_paranoia) { + efi_memmap_unmap(); - /* - * Triggering garbage collection may require that the firmware - * generate a real EFI_OUT_OF_RESOURCES error. We can force - * that by attempting to use more space than is available. - */ - unsigned long dummy_size = remaining_size + 1024; - void *dummy = kzalloc(dummy_size, GFP_ATOMIC); + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { + pr_err("Failed to remap late EFI memory map\n"); + goto err; + } - if (!dummy) - return EFI_OUT_OF_RESOURCES; + if (efi_enabled(EFI_DBG)) { + pr_info("EFI runtime memory map:\n"); + efi_print_memmap(); + } - status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - dummy_size, dummy); + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; - if (status == EFI_SUCCESS) { - /* - * This should have failed, so if it didn't make sure - * that we delete it... - */ - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, - 0, dummy); - } + efi_sync_low_kernel_mappings(); - kfree(dummy); + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa, + efi_systab_phys); + if (status != EFI_SUCCESS) { + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; + } - /* - * The runtime code may now have triggered a garbage collection - * run, so check the variable info again - */ - status = efi.query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); + efi_check_for_embedded_firmwares(); + efi_free_boot_services(); - if (status != EFI_SUCCESS) - return status; + if (!efi_is_mixed()) + efi_native_runtime_setup(); + else + efi_thunk_runtime_setup(); - /* - * There still isn't enough room, so return an error - */ - if (remaining_size - size < EFI_MIN_RESERVE) - return EFI_OUT_OF_RESOURCES; + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + + /* clean DUMMY object */ + efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); +} + +void __init efi_enter_virtual_mode(void) +{ + if (efi_enabled(EFI_PARAVIRT)) + return; + + efi.runtime = (efi_runtime_services_t *)efi_runtime; + + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); + + efi_dump_pagetable(); +} + +bool efi_is_table_address(unsigned long phys_addr) +{ + unsigned int i; + + if (phys_addr == EFI_INVALID_TABLE_ADDR) + return false; + + for (i = 0; i < ARRAY_SIZE(efi_tables); i++) + if (*(efi_tables[i]) == phys_addr) + return true; + + return false; +} + +#define EFI_FIELD(var) efi_ ## var + +#define EFI_ATTR_SHOW(name) \ +static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \ +} + +EFI_ATTR_SHOW(fw_vendor); +EFI_ATTR_SHOW(runtime); +EFI_ATTR_SHOW(config_table); + +struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor); +struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime); +struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table); + +umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + if (attr == &efi_attr_fw_vendor.attr) { + if (efi_enabled(EFI_PARAVIRT) || + efi_fw_vendor == EFI_INVALID_TABLE_ADDR) + return 0; + } else if (attr == &efi_attr_runtime.attr) { + if (efi_runtime == EFI_INVALID_TABLE_ADDR) + return 0; + } else if (attr == &efi_attr_config_table.attr) { + if (efi_config_table == EFI_INVALID_TABLE_ADDR) + return 0; } + return attr->mode; +} - return EFI_SUCCESS; +enum efi_secureboot_mode __x86_ima_efi_boot_mode(void) +{ + return boot_params.secure_boot; } -EXPORT_SYMBOL_GPL(efi_query_variable_store); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..b2cc7b4552a1 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Extensible Firmware Interface * @@ -23,47 +24,131 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/efi.h> +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/desc.h> #include <asm/page.h> -#include <asm/pgtable.h> +#include <asm/set_memory.h> #include <asm/tlbflush.h> #include <asm/efi.h> +void __init efi_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc((unsigned long)va, md->num_pages); + } else { + va = ioremap_cache(md->phys_addr, size); + } + + md->virt_addr = (unsigned long)va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", md->phys_addr); +} + /* * To make EFI call EFI runtime service in physical addressing mode we need - * prelog/epilog before/after the invocation to disable interrupt, to - * claim EFI runtime service handler exclusively and to duplicate a memory in - * low memory space say 0 - 3G. + * prolog/epilog before/after the invocation to claim the EFI runtime service + * handler exclusively and to duplicate a memory mapping in low memory space, + * say 0 - 3G. */ -static unsigned long efi_rt_eflags; +int __init efi_alloc_page_tables(void) +{ + return 0; +} + +void efi_sync_low_kernel_mappings(void) {} -void efi_call_phys_prelog(void) +void __init efi_dump_pagetable(void) { - struct desc_ptr gdt_descr; +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, &init_mm); +#endif +} + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} + +void __init efi_map_region_fixed(efi_memory_desc_t *md) {} +void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} - local_irq_save(efi_rt_eflags); +efi_status_t efi_call_svam(efi_runtime_services_t * const *, + u32, u32, u32, void *, u32); +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys) +{ + const efi_system_table_t *systab = (efi_system_table_t *)systab_phys; + struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd; + + /* Current pgd is swapper_pg_dir, we'll restore it later: */ + save_pgd = swapper_pg_dir; load_cr3(initial_page_table); __flush_tlb_all(); - gdt_descr.address = __pa(get_cpu_gdt_table(0)); + gdt_descr.address = get_cpu_gdt_paddr(0); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_svam(&systab->runtime, + memory_map_size, descriptor_size, + descriptor_version, virtual_map, + __pa(&efi.runtime)); + local_irq_restore(flags); + + load_fixmap_gdt(0); + load_cr3(save_pgd); + __flush_tlb_all(); + + return status; } -void efi_call_phys_epilog(void) +void __init efi_runtime_update_mappings(void) { - struct desc_ptr gdt_descr; + if (__supported_pte_mask & _PAGE_NX) { + efi_memory_desc_t *md; - gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; - load_cr3(swapper_pg_dir); - __flush_tlb_all(); + set_memory_x(md->virt_addr, md->num_pages); + } + } +} - local_irq_restore(efi_rt_eflags); +void arch_efi_call_virt_setup(void) +{ + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); +} + +void arch_efi_call_virt_teardown(void) +{ + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..b4409df2105a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * x86_64 specific EFI support functions * Based on Extensible Firmware Interface Specification version 1.0 @@ -15,101 +16,825 @@ * */ +#define pr_fmt(fmt) "efi: " fmt + #include <linux/kernel.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/spinlock.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/ioport.h> -#include <linux/module.h> +#include <linux/mc146818rtc.h> #include <linux/efi.h> +#include <linux/export.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/reboot.h> #include <linux/slab.h> +#include <linux/ucs2_string.h> +#include <linux/cc_platform.h> +#include <linux/sched/task.h> #include <asm/setup.h> #include <asm/page.h> -#include <asm/e820.h> -#include <asm/pgtable.h> +#include <asm/e820/api.h> #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm/efi.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> +#include <asm/realmode.h> +#include <asm/time.h> +#include <asm/pgalloc.h> +#include <asm/sev.h> + +/* + * We allocate runtime services regions top-down, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va = EFI_VA_START; +static struct mm_struct *efi_prev_mm; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd, *efi_pgd; + p4d_t *p4d; + pud_t *pud; + gfp_t gfp_mask; + + gfp_mask = GFP_KERNEL | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); + if (!efi_pgd) + goto fail; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); + if (!p4d) + goto free_pgd; + + pud = pud_alloc(&init_mm, p4d, EFI_VA_END); + if (!pud) + goto free_p4d; + + efi_mm.pgd = efi_pgd; + mm_init_cpumask(&efi_mm); + init_new_context(NULL, &efi_mm); + set_notrack_mm(&efi_mm); + + return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); +fail: + return -ENOMEM; +} + +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + p4d_t *p4d_k, *p4d_efi; + pud_t *pud_k, *pud_efi; + pgd_t *efi_pgd = efi_mm.pgd; + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pgd_k = pgd_offset_k(EFI_VA_END); + p4d_efi = p4d_offset(pgd_efi, 0); + p4d_k = p4d_offset(pgd_k, 0); + + num_entries = p4d_index(EFI_VA_END); + memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + p4d_efi = p4d_offset(pgd_efi, EFI_VA_END); + p4d_k = p4d_offset(pgd_k, EFI_VA_END); + pud_efi = pud_offset(p4d_efi, 0); + pud_k = pud_offset(p4d_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(p4d_efi, EFI_VA_START); + pud_k = pud_offset(p4d_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); +} + +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + phys_addr_t pa; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + pa = slow_virt_to_phys(va); + + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; + + return pa; +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + +int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + extern const u8 __efi64_thunk_ret_tramp[]; + unsigned long pfn, text, pf, rodata, tramp; + struct page *page; + unsigned npages; + pgd_t *pgd = efi_mm.pgd; + + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + pfn = pa_memmap >> PAGE_SHIFT; + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + /* + * Certain firmware versions are way too sentimental and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + + /* + * When SEV-ES is active, the GHCB as set by the kernel will be used + * by firmware. Create a 1:1 unencrypted mapping for each GHCB. + */ + if (sev_es_efi_map_ghcbs_cas(pgd)) { + pr_err("Failed to create 1:1 mapping for the GHCBs and CAs!\n"); + return 1; + } + + /* + * When making calls to the firmware everything needs to be 1:1 + * mapped and addressable with 32-bit pointers. Map the kernel + * text and allocate a new stack because we can't rely on the + * stack pointer being < 4GB. + */ + if (!efi_is_mixed()) + return 0; + + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } + + efi_mixed_mode_stack_pa = page_to_phys(page + 1); /* stack grows down */ + + npages = (_etext - _text) >> PAGE_SHIFT; + text = __pa(_text); + + if (kernel_unmap_pages_in_pgd(pgd, text, npages)) { + pr_err("Failed to unmap kernel text 1:1 mapping\n"); + return 1; + } + + npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; + rodata = __pa(__start_rodata); + pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { + pr_err("Failed to map kernel rodata 1:1\n"); + return 1; + } -static pgd_t *save_pgd __initdata; -static unsigned long efi_flags __initdata; + tramp = __pa(__efi64_thunk_ret_tramp); + pfn = tramp >> PAGE_SHIFT; -static void __init early_code_mapping_set_exec(int executable) + pf = _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, tramp, 1, pf)) { + pr_err("Failed to map mixed mode return trampoline\n"); + return 1; + } + + return 0; +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) { - efi_memory_desc_t *md; - void *p; + unsigned long flags = _PAGE_RW; + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_WB)) + flags |= _PAGE_PCD; - if (!(__supported_pte_mask & _PAGE_NX)) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && + md->type != EFI_MEMORY_MAPPED_IO) + flags |= _PAGE_ENC; + + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + unsigned long size = md->num_pages << PAGE_SHIFT; + u64 pa = md->phys_addr; + + /* + * Make sure the 1:1 mappings are present as a catch-all for b0rked + * firmware which doesn't update all internal pointers after switching + * to virtual mode and would otherwise crap on us. + */ + __map_region(md, md->phys_addr); + + /* + * Enforce the 1:1 mapping as the default virtual address when + * booting in EFI mixed mode, because even though we may be + * running a 64-bit kernel, the firmware may only be 32-bit. + */ + if (efi_is_mixed()) { + md->virt_addr = md->phys_addr; return; + } + + efi_va -= size; - /* Make EFI service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if (md->type == EFI_RUNTIME_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_CODE) - efi_set_executable(md, executable); + /* Is PA 2M-aligned? */ + if (!(pa & (PMD_SIZE - 1))) { + efi_va &= PMD_MASK; + } else { + u64 pa_offset = pa & (PMD_SIZE - 1); + u64 prev_va = efi_va; + + /* get us the same offset within this 2M page */ + efi_va = (efi_va & PMD_MASK) + pa_offset; + + if (efi_va > prev_va) + efi_va -= PMD_SIZE; } + + if (efi_va < EFI_VA_END) { + pr_warn(FW_WARN "VA address range overflow!\n"); + return; + } + + /* Do the VA map */ + __map_region(md, efi_va); + md->virt_addr = efi_va; +} + +/* + * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. + * md->virt_addr is the original virtual address which had been mapped in kexec + * 1st kernel. + */ +void __init efi_map_region_fixed(efi_memory_desc_t *md) +{ + __map_region(md, md->phys_addr); + __map_region(md, md->virt_addr); +} + +void __init parse_efi_setup(u64 phys_addr, u32 data_len) +{ + efi_setup = phys_addr + sizeof(struct setup_data); +} + +static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) +{ + unsigned long pfn; + pgd_t *pgd = efi_mm.pgd; + int err1, err2; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf); + if (err1) { + pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf); + if (err2) { + pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + return err1 || err2; } -void __init efi_call_phys_prelog(void) +bool efi_disable_ibt_for_runtime __ro_after_init = true; + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md, + bool has_ibt) { - unsigned long vaddress; - int pgd; - int n_pgds; + unsigned long pf = 0; + + efi_disable_ibt_for_runtime |= !has_ibt; - early_code_mapping_set_exec(1); - local_irq_save(efi_flags); + if (md->attribute & EFI_MEMORY_XP) + pf |= _PAGE_NX; - n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + if (!(md->attribute & EFI_MEMORY_RO)) + pf |= _PAGE_RW; - for (pgd = 0; pgd < n_pgds; pgd++) { - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + pf |= _PAGE_ENC; + + return efi_update_mappings(md, pf); +} + +void __init efi_runtime_update_mappings(void) +{ + if (efi_enabled(EFI_MEM_ATTR)) { + efi_disable_ibt_for_runtime = false; + efi_memattr_apply_permissions(NULL, efi_update_mem_attr); } - __flush_tlb_all(); } -void __init efi_call_phys_epilog(void) +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, &efi_mm); +#endif +} + +/* + * Makes the calling thread switch to/from efi_mm context. Can be used + * in a kernel thread and user context. Preemption needs to remain disabled + * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm + * can not change under us. + * It should be ensured that there are no concurrent calls to this function. + */ +static void efi_enter_mm(void) +{ + efi_prev_mm = use_temporary_mm(&efi_mm); +} + +static void efi_leave_mm(void) +{ + unuse_temporary_mm(efi_prev_mm); +} + +void arch_efi_call_virt_setup(void) +{ + efi_sync_low_kernel_mappings(); + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); + efi_enter_mm(); +} + +void arch_efi_call_virt_teardown(void) +{ + efi_leave_mm(); + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} + +static DEFINE_SPINLOCK(efi_runtime_lock); + +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(efi.runtime->mixed_mode.func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(func...) \ +({ \ + efi_status_t __s; \ + \ + arch_efi_call_virt_setup(); \ + \ + __s = __efi_thunk(func); \ + \ + arch_efi_call_virt_teardown(); \ + \ + __s; \ +}) + +static efi_status_t __init __no_sanitize_address +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + + efi_sync_low_kernel_mappings(); + local_irq_save(flags); + + efi_enter_mm(); + + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); + + efi_leave_mm(); + local_irq_restore(flags); + + return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + return EFI_UNSUPPORTED; +} + +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name, phys_vendor, phys_attr; + u32 phys_data_size, phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + unsigned long flags; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + *vnd = *vendor; + + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vnd); + phys_data = virt_to_phys_or_null_size(data, data_size); + + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); + efi_status_t status; + u32 phys_name_size, phys_name, phys_vendor; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + *vnd = *vendor; + + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vnd); + phys_name = virt_to_phys_or_null_size(name, *name_size); + + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + *vendor = *vnd; + return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ + return EFI_UNSUPPORTED; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + u32 phys_data; + unsigned long flags; + + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_data = virt_to_phys_or_null_size(data, data_size); + + efi_thunk(reset_system, reset_type, status, data_size, phys_data); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { /* - * After the lock is released, the original page table is restored. + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. */ - int pgd; - int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - for (pgd = 0; pgd < n_pgds; pgd++) - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); - kfree(save_pgd); - __flush_tlb_all(); - local_irq_restore(efi_flags); - early_code_mapping_set_exec(0); + return EFI_UNSUPPORTED; } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type, u64 attribute) +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { - unsigned long last_map_pfn; + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { - unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type, attribute); - } + spin_lock_irqsave(&efi_runtime_lock, flags); + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + unsigned long flags; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + spin_unlock_irqrestore(&efi_runtime_lock, flags); + + return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +void __init efi_thunk_runtime_setup(void) +{ + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + + efi.get_time = efi_thunk_get_time; + efi.set_time = efi_thunk_set_time; + efi.get_wakeup_time = efi_thunk_get_wakeup_time; + efi.set_wakeup_time = efi_thunk_set_wakeup_time; + efi.get_variable = efi_thunk_get_variable; + efi.get_next_variable = efi_thunk_get_next_variable; + efi.set_variable = efi_thunk_set_variable; + efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking; + efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; + efi.reset_system = efi_thunk_reset_system; + efi.query_variable_info = efi_thunk_query_variable_info; + efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking; + efi.update_capsule = efi_thunk_update_capsule; + efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} + +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys) +{ + const efi_system_table_t *systab = (efi_system_table_t *)systab_phys; + efi_status_t status; + unsigned long flags; + + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + efi_enter_mm(); + + efi_fpu_begin(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = arch_efi_call_virt(efi.runtime, set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + efi_fpu_end(); + + /* grab the virtually remapped EFI runtime services table pointer */ + efi.runtime = READ_ONCE(systab->runtime); - if (!(attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + efi_leave_mm(); - return (void __iomem *)__va(phys_addr); + return status; } diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index fbe66e626c09..f3cfdb1c9a35 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * EFI call stub for IA32. * @@ -6,118 +7,54 @@ */ #include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> #include <asm/page_types.h> -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ + __INIT +SYM_FUNC_START(efi_call_svam) + push %ebp + movl %esp, %ebp + push %ebx -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ + push 16(%esp) + push 16(%esp) + push %ecx + push %edx + movl %eax, %ebx // &systab_phys->runtime -.text -ENTRY(efi_call_phys) /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prelog and - * epilog. + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. */ movl $1f, %edx subl $__PAGE_OFFSET, %edx jmp *%edx 1: - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ + /* disable paging */ movl %cr0, %edx andl $0x7fffffff, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 4. Adjust stack pointer. - */ + /* convert the stack pointer to a flat mapped address */ subl $__PAGE_OFFSET, %esp - /* - * 5. Call the physical function. - */ - jmp *%ecx + /* call the EFI routine */ + movl (%eax), %eax + call *EFI_svam(%eax) -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp + /* grab the virtually remapped EFI runtime services table pointer */ + movl (%ebx), %ecx + movl 36(%esp), %edx // &efi.runtime + movl %ecx, (%edx) - /* - * 7. Restore PG bit - */ + /* re-enable paging */ movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret -ENDPROC(efi_call_phys) -.previous -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 + movl 16(%esp), %ebx + leave + RET +SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Function calling ABI conversion from Linux to EFI for x86_64 * @@ -7,110 +8,24 @@ */ #include <linux/linkage.h> - -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp - -ENTRY(efi_call0) - SAVE_XMM - subq $32, %rsp - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call0) - -ENTRY(efi_call1) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call1) - -ENTRY(efi_call2) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call2) - -ENTRY(efi_call3) - SAVE_XMM - subq $32, %rsp - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call3) - -ENTRY(efi_call4) - SAVE_XMM - subq $32, %rsp - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call4) - -ENTRY(efi_call5) - SAVE_XMM - subq $48, %rsp - mov %r9, 32(%rsp) - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call5) - -ENTRY(efi_call6) - SAVE_XMM - mov (%rsp), %rax - mov 8(%rax), %rax +#include <asm/nospec-branch.h> + +SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM + pushq %rbp + movq %rsp, %rbp + and $~0xf, %rsp + mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) mov %rax, 40(%rsp) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call6) + CALL_NOSPEC rdi + leave + RET +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 000000000000..c4b1144f99f6 --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + * + * Support for invoking 32-bit EFI runtime services from a 64-bit + * kernel. + * + * The below thunking functions are only used after ExitBootServices() + * has been called. This simplifies things considerably as compared with + * the early EFI thunking because we can leave all the kernel state + * intact (GDT, IDT, etc) and simply invoke the 32-bit EFI runtime + * services from __KERNEL32_CS. This means we can continue to service + * interrupts across an EFI mixed mode call. + * + * We do however, need to handle the fact that we're running in a full + * 64-bit virtual address space. Things like the stack and instruction + * addresses need to be accessible by the 32-bit firmware, so we rely on + * using the identity mappings in the EFI page table to access the stack + * and kernel text (see efi_setup_page_tables()). + */ + +#include <linux/linkage.h> +#include <linux/objtool.h> +#include <asm/page_types.h> +#include <asm/segment.h> + + .text + .code64 +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk + push %rbp + push %rbx + + /* + * Switch to 1:1 mapped 32-bit stack pointer. + */ + movq %rsp, %rax + movq efi_mixed_mode_stack_pa(%rip), %rsp + push %rax + + /* + * Copy args passed via the stack + */ + subq $0x24, %rsp + movq 0x18(%rax), %rbp + movq 0x20(%rax), %rbx + movq 0x28(%rax), %rax + movl %ebp, 0x18(%rsp) + movl %ebx, 0x1c(%rsp) + movl %eax, 0x20(%rsp) + + /* + * Calculate the physical address of the kernel text. + */ + movq $__START_KERNEL_map, %rax + subq phys_base(%rip), %rax + + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp + subq %rax, %rbx + + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) + + /* Switch to 32-bit descriptor */ + pushq $__KERNEL32_CS + pushq %rdi /* EFI runtime service address */ + lretq + + // This return instruction is not needed for correctness, as it will + // never be reached. It only exists to make objtool happy, which will + // otherwise complain about unreachable instructions in the callers. + RET +SYM_FUNC_END(__efi64_thunk) + + .section ".rodata", "a", @progbits + .balign 16 +SYM_DATA_START(__efi64_thunk_ret_tramp) +1: movq 0x20(%rsp), %rsp + pop %rbx + pop %rbp + ret + int3 + + .code32 +2: pushl $__KERNEL_CS + pushl %ebp + lret +SYM_DATA_END(__efi64_thunk_ret_tramp) + + .bss + .balign 8 +SYM_DATA(efi_mixed_mode_stack_pa, .quad 0) diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c new file mode 100644 index 000000000000..023697c88910 --- /dev/null +++ b/arch/x86/platform/efi/memmap.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common EFI memory map functions. + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/efi.h> +#include <linux/io.h> +#include <asm/early_ioremap.h> +#include <asm/efi.h> +#include <linux/memblock.h> +#include <linux/slab.h> + +static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) +{ + return memblock_phys_alloc(size, SMP_CACHE_BYTES); +} + +static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) +{ + unsigned int order = get_order(size); + struct page *p = alloc_pages(GFP_KERNEL, order); + + if (!p) + return 0; + + return PFN_PHYS(page_to_pfn(p)); +} + +static +void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) +{ + if (flags & EFI_MEMMAP_MEMBLOCK) { + if (slab_is_available()) + memblock_free_late(phys, size); + else + memblock_phys_free(phys, size); + } else if (flags & EFI_MEMMAP_SLAB) { + struct page *p = pfn_to_page(PHYS_PFN(phys)); + unsigned int order = get_order(size); + + __free_pages(p, order); + } +} + +/** + * efi_memmap_alloc - Allocate memory for the EFI memory map + * @num_entries: Number of entries in the allocated map. + * @data: efi memmap installation parameters + * + * Depending on whether mm_init() has already been invoked or not, + * either memblock or "normal" page allocation is used. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data) +{ + /* Expect allocation parameters are zero initialized */ + WARN_ON(data->phys_map || data->size); + + data->size = num_entries * efi.memmap.desc_size; + data->desc_version = efi.memmap.desc_version; + data->desc_size = efi.memmap.desc_size; + data->flags &= ~(EFI_MEMMAP_SLAB | EFI_MEMMAP_MEMBLOCK); + data->flags |= efi.memmap.flags & EFI_MEMMAP_LATE; + + if (slab_is_available()) { + data->flags |= EFI_MEMMAP_SLAB; + data->phys_map = __efi_memmap_alloc_late(data->size); + } else { + data->flags |= EFI_MEMMAP_MEMBLOCK; + data->phys_map = __efi_memmap_alloc_early(data->size); + } + + if (!data->phys_map) + return -ENOMEM; + return 0; +} + +/** + * efi_memmap_install - Install a new EFI memory map in efi.memmap + * @data: efi memmap installation parameters + * + * Unlike efi_memmap_init_*(), this function does not allow the caller + * to switch from early to late mappings. It simply uses the existing + * mapping function and installs the new memmap. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_install(struct efi_memory_map_data *data) +{ + unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map; + unsigned long flags = efi.memmap.flags; + u64 phys = efi.memmap.phys_map; + int ret; + + efi_memmap_unmap(); + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + ret = __efi_memmap_init(data); + if (ret) + return ret; + + __efi_memmap_free(phys, size, flags); + return 0; +} + +/** + * efi_memmap_split_count - Count number of additional EFI memmap entries + * @md: EFI memory descriptor to split + * @range: Address range (start, end) to split around + * + * Returns the number of additional EFI memmap entries required to + * accommodate @range. + */ +int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range) +{ + u64 m_start, m_end; + u64 start, end; + int count = 0; + + start = md->phys_addr; + end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + /* modifying range */ + m_start = range->start; + m_end = range->end; + + if (m_start <= start) { + /* split into 2 parts */ + if (start < m_end && m_end < end) + count++; + } + + if (start < m_start && m_start < end) { + /* split into 3 parts */ + if (m_end < end) + count += 2; + /* split into 2 parts */ + if (end <= m_end) + count++; + } + + return count; +} + +/** + * efi_memmap_insert - Insert a memory region in an EFI memmap + * @old_memmap: The existing EFI memory map structure + * @buf: Address of buffer to store new map + * @mem: Memory map entry to insert + * + * It is suggested that you call efi_memmap_split_count() first + * to see how large @buf needs to be. + */ +void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf, + struct efi_mem_range *mem) +{ + u64 m_start, m_end, m_attr; + efi_memory_desc_t *md; + u64 start, end; + void *old, *new; + + /* modifying range */ + m_start = mem->range.start; + m_end = mem->range.end; + m_attr = mem->attribute; + + /* + * The EFI memory map deals with regions in EFI_PAGE_SIZE + * units. Ensure that the region described by 'mem' is aligned + * correctly. + */ + if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) || + !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) { + WARN_ON(1); + return; + } + + for (old = old_memmap->map, new = buf; + old < old_memmap->map_end; + old += old_memmap->desc_size, new += old_memmap->desc_size) { + + /* copy original EFI memory descriptor */ + memcpy(new, old, old_memmap->desc_size); + md = new; + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= start && end <= m_end) + md->attribute |= m_attr; + + if (m_start <= start && + (start < m_end && m_end < end)) { + /* first part */ + md->attribute |= m_attr; + md->num_pages = (m_end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && m_end < end) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* middle part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->attribute |= m_attr; + md->phys_addr = m_start; + md->num_pages = (m_end - m_start + 1) >> + EFI_PAGE_SHIFT; + /* last part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - m_end) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && + (end <= m_end)) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_start; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + md->attribute |= m_attr; + } + } +} diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c new file mode 100644 index 000000000000..553f330198f2 --- /dev/null +++ b/arch/x86/platform/efi/quirks.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "efi: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/time.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/acpi.h> +#include <linux/dmi.h> + +#include <asm/e820/api.h> +#include <asm/efi.h> +#include <asm/uv/uv.h> +#include <asm/cpu_device_id.h> +#include <asm/realmode.h> +#include <asm/reboot.h> + +#define EFI_MIN_RESERVE 5120 + +#define EFI_DUMMY_GUID \ + EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) + +#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */ +#define QUARK_SECURITY_HEADER_SIZE 0x400 + +/* + * Header prepended to the standard EFI capsule on Quark systems the are based + * on Intel firmware BSP. + * @csh_signature: Unique identifier to sanity check signed module + * presence ("_CSH"). + * @version: Current version of CSH used. Should be one for Quark A0. + * @modulesize: Size of the entire module including the module header + * and payload. + * @security_version_number_index: Index of SVN to use for validation of signed + * module. + * @security_version_number: Used to prevent against roll back of modules. + * @rsvd_module_id: Currently unused for Clanton (Quark). + * @rsvd_module_vendor: Vendor Identifier. For Intel products value is + * 0x00008086. + * @rsvd_date: BCD representation of build date as yyyymmdd, where + * yyyy=4 digit year, mm=1-12, dd=1-31. + * @headersize: Total length of the header including including any + * padding optionally added by the signing tool. + * @hash_algo: What Hash is used in the module signing. + * @cryp_algo: What Crypto is used in the module signing. + * @keysize: Total length of the key data including including any + * padding optionally added by the signing tool. + * @signaturesize: Total length of the signature including including any + * padding optionally added by the signing tool. + * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the + * chain, if there is a next header. + * @rsvd: Reserved, padding structure to required size. + * + * See also QuartSecurityHeader_t in + * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h + * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP + */ +struct quark_security_header { + u32 csh_signature; + u32 version; + u32 modulesize; + u32 security_version_number_index; + u32 security_version_number; + u32 rsvd_module_id; + u32 rsvd_module_vendor; + u32 rsvd_date; + u32 headersize; + u32 hash_algo; + u32 cryp_algo; + u32 keysize; + u32 signaturesize; + u32 rsvd_next_header; + u32 rsvd[2]; +}; + +static const efi_char16_t efi_dummy_name[] = L"DUMMY"; + +static bool efi_no_storage_paranoia; + +/* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. The implementation of garbage collection + * in some FW versions causes stale (deleted) variables to take up space + * longer than intended and space is only freed once the store becomes + * almost completely full. + * + * Enabling this option disables the space checks in + * efi_query_variable_store() and forces garbage collection. + * + * Only enable this option if deleting EFI variables does not free up + * space in your variable store, e.g. if despite deleting variables + * you're unable to create new ones. + */ +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + +/* + * Deleting the dummy variable which kicks off garbage collection +*/ +void efi_delete_dummy_variable(void) +{ + efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL); +} + +u64 efivar_reserved_space(void) +{ + if (efi_no_storage_paranoia) + return 0; + return EFI_MIN_RESERVE; +} +EXPORT_SYMBOL_GPL(efivar_reserved_space); + +/* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) + return 0; + + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + /* + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. + */ + if ((remaining_size - size < EFI_MIN_RESERVE) && + !efi_no_storage_paranoia) { + + /* + * Triggering garbage collection may require that the firmware + * generate a real EFI_OUT_OF_RESOURCES error. We can force + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; + void *dummy = kzalloc(dummy_size, GFP_KERNEL); + + if (!dummy) + return EFI_OUT_OF_RESOURCES; + + status = efi.set_variable((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + dummy_size, dummy); + + if (status == EFI_SUCCESS) { + /* + * This should have failed, so if it didn't make sure + * that we delete it... + */ + efi_delete_dummy_variable(); + } + + kfree(dummy); + + /* + * The runtime code may now have triggered a garbage collection + * run, so check the variable info again + */ + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + + if (status != EFI_SUCCESS) + return status; + + /* + * There still isn't enough room, so return an error + */ + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + } + + return EFI_SUCCESS; +} +EXPORT_SYMBOL_GPL(efi_query_variable_store); + +/* + * The UEFI specification makes it clear that the operating system is + * free to do whatever it wants with boot services code after + * ExitBootServices() has been called. Ignoring this recommendation a + * significant bunch of EFI implementations continue calling into boot + * services code (SetVirtualAddressMap). In order to work around such + * buggy implementations we reserve boot services region during EFI + * init and make sure it stays executable. Then, after + * SetVirtualAddressMap(), it is discarded. + * + * However, some boot services regions contain data that is required + * by drivers, so we need to track which memory ranges can never be + * freed. This is done by tagging those regions with the + * EFI_MEMORY_RUNTIME attribute. + * + * Any driver that wants to mark a region as reserved must use + * efi_mem_reserve() which will insert a new EFI memory descriptor + * into efi.memmap (splitting existing regions if necessary) and tag + * it with EFI_MEMORY_RUNTIME. + */ +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) +{ + struct efi_memory_map_data data = { 0 }; + struct efi_mem_range mr; + efi_memory_desc_t md; + int num_entries; + void *new; + + if (efi_mem_desc_lookup(addr, &md) || + md.type != EFI_BOOT_SERVICES_DATA) { + pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); + return; + } + + if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { + pr_err("Region spans EFI memory descriptors, %pa\n", &addr); + return; + } + + size += addr % EFI_PAGE_SIZE; + size = round_up(size, EFI_PAGE_SIZE); + addr = round_down(addr, EFI_PAGE_SIZE); + + mr.range.start = addr; + mr.range.end = addr + size - 1; + mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; + + num_entries = efi_memmap_split_count(&md, &mr.range); + num_entries += efi.memmap.nr_map; + + if (efi_memmap_alloc(num_entries, &data) != 0) { + pr_err("Could not allocate boot services memmap\n"); + return; + } + + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); + if (!new) { + pr_err("Failed to map new boot services memmap\n"); + return; + } + + efi_memmap_insert(&efi.memmap, new, &mr); + early_memunmap(new, data.size); + + efi_memmap_install(&data); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); +} + +/* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) + */ +static __init bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820__mapped_all(start, start+size, E820_TYPE_RAM)) + return false; + + return true; +} + +void __init efi_reserve_boot_services(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP)) + return; + + for_each_efi_memory_desc(md) { + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with memblock_free_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_TYPE_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { + memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * memblock_free_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; + } +} + +/* + * Apart from having VA mappings for EFI boot services code/data regions, + * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So, + * unmap both 1:1 and VA mappings. + */ +static void __init efi_unmap_pages(efi_memory_desc_t *md) +{ + pgd_t *pgd = efi_mm.pgd; + u64 pa = md->phys_addr; + u64 va = md->virt_addr; + + /* + * EFI mixed mode has all RAM mapped to access arguments while making + * EFI runtime calls, hence don't unmap EFI boot services code/data + * regions. + */ + if (efi_is_mixed()) + return; + + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) + pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa); + + if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages)) + pr_err("Failed to unmap VA mapping for 0x%llx\n", va); +} + +void __init efi_free_boot_services(void) +{ + struct efi_memory_map_data data = { 0 }; + efi_memory_desc_t *md; + int num_entries = 0; + void *new, *new_md; + + /* Keep all regions for /sys/kernel/debug/efi */ + if (efi_enabled(EFI_DBG)) + return; + + for_each_efi_memory_desc(md) { + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + size_t rm_size; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + num_entries++; + continue; + } + + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) { + num_entries++; + continue; + } + + /* + * Before calling set_virtual_address_map(), EFI boot services + * code/data regions were mapped as a quirk for buggy firmware. + * Unmap them from efi_pgd before freeing them up. + */ + efi_unmap_pages(md); + + /* + * Nasty quirk: if all sub-1MB memory is used for boot + * services, we can get here without having allocated the + * real mode trampoline. It's too late to hand boot services + * memory back to the memblock allocator, so instead + * try to manually allocate the trampoline if needed. + * + * I've seen this on a Dell XPS 13 9350 with firmware + * 1.4.4 with SGX enabled booting Linux via Fedora 24's + * grub2-efi on a hard disk. (And no, I don't know why + * this happened, but Linux should still try to boot rather + * panicking early.) + */ + rm_size = real_mode_size_needed(); + if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { + set_real_mode_mem(start); + start += rm_size; + size -= rm_size; + } + + /* + * Don't free memory under 1M for two reasons: + * - BIOS might clobber it + * - Crash kernel needs it to be reserved + */ + if (start + size < SZ_1M) + continue; + if (start < SZ_1M) { + size -= (SZ_1M - start); + start = SZ_1M; + } + + memblock_free_late(start, size); + } + + if (!num_entries) + return; + + if (efi_memmap_alloc(num_entries, &data) != 0) { + pr_err("Failed to allocate new EFI memmap\n"); + return; + } + + new = memremap(data.phys_map, data.size, MEMREMAP_WB); + if (!new) { + pr_err("Failed to map new EFI memmap\n"); + return; + } + + /* + * Build a new EFI memmap that excludes any boot services + * regions that are not tagged EFI_MEMORY_RUNTIME, since those + * regions have now been freed. + */ + new_md = new; + for_each_efi_memory_desc(md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA)) + continue; + + memcpy(new_md, md, efi.memmap.desc_size); + new_md += efi.memmap.desc_size; + } + + memunmap(new); + + if (efi_memmap_install(&data) != 0) { + pr_err("Could not install new EFI memmap\n"); + return; + } +} + +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (nr_tables == 0) + return 0; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + + /* Do not bother to play with mem attr table across kexec */ + if (!efi_guidcmp(guid, EFI_MEMORY_ATTRIBUTES_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = EFI_INVALID_TABLE_ADDR; + + p += sz; + } + early_memunmap(tablep, nr_tables * sz); + +out_memremap: + early_memunmap(data, sizeof(*data)); +out: + return ret; +} + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); + efi_memmap_unmap(); + } +} + +/* + * For most modern platforms the preferred method of powering off is via + * ACPI. However, there are some that are known to require the use of + * EFI runtime services and for which ACPI does not work at all. + * + * Using EFI is a last resort, to be used only if no other option + * exists. + */ +bool efi_reboot_required(void) +{ + if (!acpi_gbl_reduced_hardware) + return false; + + efi_reboot_quirk_mode = EFI_RESET_WARM; + return true; +} + +bool efi_poweroff_required(void) +{ + return acpi_gbl_reduced_hardware || acpi_no_s5; +} + +#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH + +static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, + size_t hdr_bytes) +{ + struct quark_security_header *csh = *pkbuff; + + /* Only process data block that is larger than the security header */ + if (hdr_bytes < sizeof(struct quark_security_header)) + return 0; + + if (csh->csh_signature != QUARK_CSH_SIGNATURE || + csh->headersize != QUARK_SECURITY_HEADER_SIZE) + return 1; + + /* Only process data block if EFI header is included */ + if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE + + sizeof(efi_capsule_header_t)) + return 0; + + pr_debug("Quark security header detected\n"); + + if (csh->rsvd_next_header != 0) { + pr_err("multiple Quark security headers not supported\n"); + return -EINVAL; + } + + *pkbuff += csh->headersize; + cap_info->total_size = csh->headersize; + + /* + * Update the first page pointer to skip over the CSH header. + */ + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; + + return 1; +} + +static const struct x86_cpu_id efi_capsule_quirk_ids[] = { + X86_MATCH_VFM(INTEL_QUARK_X1000, &qrk_capsule_setup_info), + { } +}; + +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes) +{ + int (*quirk_handler)(struct capsule_info *, void **, size_t); + const struct x86_cpu_id *id; + int ret; + + if (hdr_bytes < sizeof(efi_capsule_header_t)) + return 0; + + cap_info->total_size = 0; + + id = x86_match_cpu(efi_capsule_quirk_ids); + if (id) { + /* + * The quirk handler is supposed to return + * - a value > 0 if the setup should continue, after advancing + * kbuff as needed + * - 0 if not enough hdr_bytes are available yet + * - a negative error code otherwise + */ + quirk_handler = (typeof(quirk_handler))id->driver_data; + ret = quirk_handler(cap_info, &kbuff, hdr_bytes); + if (ret <= 0) + return ret; + } + + memcpy(&cap_info->header, kbuff, sizeof(cap_info->header)); + + cap_info->total_size += cap_info->header.imagesize; + + return __efi_capsule_setup_info(cap_info); +} + +#endif + +/* + * If any access by any efi runtime service causes a page fault, then, + * 1. If it's efi_reset_system(), reboot through BIOS. + * 2. If any other efi runtime service, then + * a. Return error status to the efi caller process. + * b. Disable EFI Runtime Services forever and + * c. Freeze efi_rts_wq and schedule new process. + * + * @return: Returns, if the page fault is not handled. This function + * will never return if the page fault is handled successfully. + */ +void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* + * If we get an interrupt/NMI while processing an EFI runtime service + * then this is a regular OOPS, not an EFI failure. + */ + if (in_interrupt()) + return; + + /* + * Make sure that an efi runtime service caused the page fault. + * READ_ONCE() because we might be OOPSing in a different thread, + * and we don't want to trip KTSAN while trying to OOPS. + */ + if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE || + current_work() != &efi_rts_work.work) + return; + + /* + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so + * page faulting on these addresses isn't expected. + */ + if (phys_addr <= 0x0fff) + return; + + /* + * Print stack trace as it might be useful to know which EFI Runtime + * Service is buggy. + */ + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n", + phys_addr); + + /* + * Buggy efi_reset_system() is handled differently from other EFI + * Runtime Services as it doesn't use efi_rts_wq. Although, + * native_machine_emergency_restart() says that machine_real_restart() + * could fail, it's better not to complicate this fault handler + * because this case occurs *very* rarely and hence could be improved + * on a need by basis. + */ + if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) { + pr_info("efi_reset_system() buggy! Reboot through BIOS\n"); + machine_real_restart(MRR_BIOS); + return; + } + + /* + * Before calling EFI Runtime Service, the kernel has switched the + * calling process to efi_mm. Hence, switch back to task_mm. + */ + arch_efi_call_virt_teardown(); + + /* Signal error status to the efi caller process */ + efi_rts_work.status = EFI_ABORTED; + complete(&efi_rts_work.efi_rts_comp); + + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n"); + + /* + * Call schedule() in an infinite loop, so that any spurious wake ups + * will never run efi_rts_wq again. + */ + for (;;) { + set_current_state(TASK_IDLE); + schedule(); + } +} diff --git a/arch/x86/platform/efi/runtime-map.c b/arch/x86/platform/efi/runtime-map.c new file mode 100644 index 000000000000..a6f02cef3ca2 --- /dev/null +++ b/arch/x86/platform/efi/runtime-map.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com> + */ + +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/efi.h> +#include <linux/slab.h> + +#include <asm/efi.h> +#include <asm/setup.h> + +struct efi_runtime_map_entry { + efi_memory_desc_t md; + struct kobject kobj; /* kobject for each entry */ +}; + +static struct efi_runtime_map_entry **map_entries; + +struct map_attribute { + struct attribute attr; + ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf); +}; + +static inline struct map_attribute *to_map_attr(struct attribute *attr) +{ + return container_of(attr, struct map_attribute, attr); +} + +static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type); +} + +#define EFI_RUNTIME_FIELD(var) entry->md.var + +#define EFI_RUNTIME_U64_ATTR_SHOW(name) \ +static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \ +{ \ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \ +} + +EFI_RUNTIME_U64_ATTR_SHOW(phys_addr); +EFI_RUNTIME_U64_ATTR_SHOW(virt_addr); +EFI_RUNTIME_U64_ATTR_SHOW(num_pages); +EFI_RUNTIME_U64_ATTR_SHOW(attribute); + +static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj) +{ + return container_of(kobj, struct efi_runtime_map_entry, kobj); +} + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct efi_runtime_map_entry *entry = to_map_entry(kobj); + struct map_attribute *map_attr = to_map_attr(attr); + + return map_attr->show(entry, buf); +} + +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); + +/* + * These are default attributes that are added for every memmap entry. + */ +static struct attribute *def_attrs[] = { + &map_type_attr.attr, + &map_phys_addr_attr.attr, + &map_virt_addr_attr.attr, + &map_num_pages_attr.attr, + &map_attribute_attr.attr, + NULL +}; +ATTRIBUTE_GROUPS(def); + +static const struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static void map_release(struct kobject *kobj) +{ + struct efi_runtime_map_entry *entry; + + entry = to_map_entry(kobj); + kfree(entry); +} + +static const struct kobj_type __refconst map_ktype = { + .sysfs_ops = &map_attr_ops, + .default_groups = def_groups, + .release = map_release, +}; + +static struct kset *map_kset; + +static struct efi_runtime_map_entry * +add_sysfs_runtime_map_entry(struct kobject *kobj, int nr, + efi_memory_desc_t *md) +{ + int ret; + struct efi_runtime_map_entry *entry; + + if (!map_kset) { + map_kset = kset_create_and_add("runtime-map", NULL, kobj); + if (!map_kset) + return ERR_PTR(-ENOMEM); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + kset_unregister(map_kset); + map_kset = NULL; + return ERR_PTR(-ENOMEM); + } + + memcpy(&entry->md, md, sizeof(efi_memory_desc_t)); + + kobject_init(&entry->kobj, &map_ktype); + entry->kobj.kset = map_kset; + ret = kobject_add(&entry->kobj, NULL, "%d", nr); + if (ret) { + kobject_put(&entry->kobj); + kset_unregister(map_kset); + map_kset = NULL; + return ERR_PTR(ret); + } + + return entry; +} + +int efi_get_runtime_map_size(void) +{ + return efi.memmap.nr_map * efi.memmap.desc_size; +} + +int efi_get_runtime_map_desc_size(void) +{ + return efi.memmap.desc_size; +} + +int efi_runtime_map_copy(void *buf, size_t bufsz) +{ + size_t sz = efi_get_runtime_map_size(); + + if (sz > bufsz) + sz = bufsz; + + memcpy(buf, efi.memmap.map, sz); + return 0; +} + +static int __init efi_runtime_map_init(void) +{ + int i, j, ret = 0; + struct efi_runtime_map_entry *entry; + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_MEMMAP) || !efi_kobj) + return 0; + + map_entries = kcalloc(efi.memmap.nr_map, sizeof(entry), GFP_KERNEL); + if (!map_entries) { + ret = -ENOMEM; + goto out; + } + + i = 0; + for_each_efi_memory_desc(md) { + entry = add_sysfs_runtime_map_entry(efi_kobj, i, md); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto out_add_entry; + } + *(map_entries + i++) = entry; + } + + return 0; +out_add_entry: + for (j = i - 1; j >= 0; j--) { + entry = *(map_entries + j); + kobject_put(&entry->kobj); + } +out: + return ret; +} +subsys_initcall_sync(efi_runtime_map_init); diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile index 5b51194f4c8d..34b53e97a0ad 100644 --- a/arch/x86/platform/geode/Makefile +++ b/arch/x86/platform/geode/Makefile @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_GEODE_COMMON) += geode-common.o obj-$(CONFIG_ALIX) += alix.o obj-$(CONFIG_NET5501) += net5501.o obj-$(CONFIG_GEOS) += geos.o diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 90e23e7679a5..be65cd704e21 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for PCEngines ALIX. * At the moment this means setup of GPIO control of LEDs * on Alix.2/3/6 boards. * - * * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> * and Philip Prindeville <philipp@redfish-solutions.com> @@ -11,102 +11,43 @@ * TODO: There are large similarities with leds-net5501.c * by Alessandro Zummo <a.zummo@towertech.it> * In the future leds-net5501.c should be migrated over to platform - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> -#include <linux/leds.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> -#include <linux/input.h> -#include <linux/gpio_keys.h> +#include <linux/moduleparam.h> #include <linux/dmi.h> #include <asm/geode.h> +#include "geode-common.h" + #define BIOS_SIGNATURE_TINYBIOS 0xf0000 #define BIOS_SIGNATURE_COREBOOT 0x500 #define BIOS_REGION_SIZE 0x10000 +/* + * This driver is not modular, but to keep back compatibility + * with existing use cases, continuing with module_param is + * the easiest way forward. + */ static bool force = 0; module_param(force, bool, 0444); /* FIXME: Award bios is not automatically detected as Alix platform */ MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); -static struct gpio_keys_button alix_gpio_buttons[] = { - { - .code = KEY_RESTART, - .gpio = 24, - .active_low = 1, - .desc = "Reset button", - .type = EV_KEY, - .wakeup = 0, - .debounce_interval = 100, - .can_disable = 0, - } -}; -static struct gpio_keys_platform_data alix_buttons_data = { - .buttons = alix_gpio_buttons, - .nbuttons = ARRAY_SIZE(alix_gpio_buttons), - .poll_interval = 20, -}; - -static struct platform_device alix_buttons_dev = { - .name = "gpio-keys-polled", - .id = 1, - .dev = { - .platform_data = &alix_buttons_data, - } -}; - -static struct gpio_led alix_leds[] = { - { - .name = "alix:1", - .gpio = 6, - .default_trigger = "default-on", - .active_low = 1, - }, - { - .name = "alix:2", - .gpio = 25, - .default_trigger = "default-off", - .active_low = 1, - }, - { - .name = "alix:3", - .gpio = 27, - .default_trigger = "default-off", - .active_low = 1, - }, -}; - -static struct gpio_led_platform_data alix_leds_data = { - .num_leds = ARRAY_SIZE(alix_leds), - .leds = alix_leds, -}; - -static struct platform_device alix_leds_dev = { - .name = "leds-gpio", - .id = -1, - .dev.platform_data = &alix_leds_data, -}; - -static struct __initdata platform_device *alix_devs[] = { - &alix_buttons_dev, - &alix_leds_dev, +static const struct geode_led alix_leds[] __initconst = { + { 6, true }, + { 25, false }, + { 27, false }, }; static void __init register_alix(void) { - /* Setup LED control through leds-gpio driver */ - platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs)); + geode_create_restart_key(24); + geode_create_leds("alix", alix_leds, ARRAY_SIZE(alix_leds)); } static bool __init alix_present(unsigned long bios_phys, @@ -192,9 +133,4 @@ static int __init alix_init(void) return 0; } - -module_init(alix_init); - -MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>"); -MODULE_DESCRIPTION("PCEngines ALIX System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(alix_init); diff --git a/arch/x86/platform/geode/geode-common.c b/arch/x86/platform/geode/geode-common.c new file mode 100644 index 000000000000..8fd78e60bf15 --- /dev/null +++ b/arch/x86/platform/geode/geode-common.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared helpers to register GPIO-connected buttons and LEDs + * on AMD Geode boards. + */ + +#include <linux/err.h> +#include <linux/gpio/machine.h> +#include <linux/gpio/property.h> +#include <linux/input.h> +#include <linux/leds.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "geode-common.h" + +static const struct software_node geode_gpiochip_node = { + .name = "cs5535-gpio", +}; + +static const struct property_entry geode_gpio_keys_props[] = { + PROPERTY_ENTRY_U32("poll-interval", 20), + { } +}; + +static const struct software_node geode_gpio_keys_node = { + .name = "geode-gpio-keys", + .properties = geode_gpio_keys_props, +}; + +static struct property_entry geode_restart_key_props[] = { + { /* Placeholder for GPIO property */ }, + PROPERTY_ENTRY_U32("linux,code", KEY_RESTART), + PROPERTY_ENTRY_STRING("label", "Reset button"), + PROPERTY_ENTRY_U32("debounce-interval", 100), + { } +}; + +static const struct software_node geode_restart_key_node = { + .parent = &geode_gpio_keys_node, + .properties = geode_restart_key_props, +}; + +static const struct software_node *geode_gpio_keys_swnodes[] __initconst = { + &geode_gpiochip_node, + &geode_gpio_keys_node, + &geode_restart_key_node, + NULL +}; + +/* + * Creates gpio-keys-polled device for the restart key. + * + * Note that it needs to be called first, before geode_create_leds(), + * because it registers gpiochip software node used by both gpio-keys and + * leds-gpio devices. + */ +int __init geode_create_restart_key(unsigned int pin) +{ + struct platform_device_info keys_info = { + .name = "gpio-keys-polled", + .id = 1, + }; + struct platform_device *pd; + int err; + + geode_restart_key_props[0] = PROPERTY_ENTRY_GPIO("gpios", + &geode_gpiochip_node, + pin, GPIO_ACTIVE_LOW); + + err = software_node_register_node_group(geode_gpio_keys_swnodes); + if (err) { + pr_err("failed to register gpio-keys software nodes: %d\n", err); + return err; + } + + keys_info.fwnode = software_node_fwnode(&geode_gpio_keys_node); + + pd = platform_device_register_full(&keys_info); + err = PTR_ERR_OR_ZERO(pd); + if (err) { + pr_err("failed to create gpio-keys device: %d\n", err); + software_node_unregister_node_group(geode_gpio_keys_swnodes); + return err; + } + + return 0; +} + +static const struct software_node geode_gpio_leds_node = { + .name = "geode-leds", +}; + +#define MAX_LEDS 3 + +int __init geode_create_leds(const char *label, const struct geode_led *leds, + unsigned int n_leds) +{ + const struct software_node *group[MAX_LEDS + 2] = { 0 }; + struct software_node *swnodes; + struct property_entry *props; + struct platform_device_info led_info = { + .name = "leds-gpio", + .id = PLATFORM_DEVID_NONE, + }; + struct platform_device *led_dev; + const char *node_name; + int err; + int i; + + if (n_leds > MAX_LEDS) { + pr_err("%s: too many LEDs\n", __func__); + return -EINVAL; + } + + swnodes = kcalloc(n_leds, sizeof(*swnodes), GFP_KERNEL); + if (!swnodes) + return -ENOMEM; + + /* + * Each LED is represented by 3 properties: "gpios", + * "linux,default-trigger", and am empty terminator. + */ + props = kcalloc(n_leds * 3, sizeof(*props), GFP_KERNEL); + if (!props) { + err = -ENOMEM; + goto err_free_swnodes; + } + + group[0] = &geode_gpio_leds_node; + for (i = 0; i < n_leds; i++) { + node_name = kasprintf(GFP_KERNEL, "%s:%d", label, i); + if (!node_name) { + err = -ENOMEM; + goto err_free_names; + } + + props[i * 3 + 0] = + PROPERTY_ENTRY_GPIO("gpios", &geode_gpiochip_node, + leds[i].pin, GPIO_ACTIVE_LOW); + props[i * 3 + 1] = + PROPERTY_ENTRY_STRING("linux,default-trigger", + leds[i].default_on ? + "default-on" : "default-off"); + /* props[i * 3 + 2] is an empty terminator */ + + swnodes[i] = SOFTWARE_NODE(node_name, &props[i * 3], + &geode_gpio_leds_node); + group[i + 1] = &swnodes[i]; + } + + err = software_node_register_node_group(group); + if (err) { + pr_err("failed to register LED software nodes: %d\n", err); + goto err_free_names; + } + + led_info.fwnode = software_node_fwnode(&geode_gpio_leds_node); + + led_dev = platform_device_register_full(&led_info); + err = PTR_ERR_OR_ZERO(led_dev); + if (err) { + pr_err("failed to create LED device: %d\n", err); + goto err_unregister_group; + } + + return 0; + +err_unregister_group: + software_node_unregister_node_group(group); +err_free_names: + while (--i >= 0) + kfree(swnodes[i].name); + kfree(props); +err_free_swnodes: + kfree(swnodes); + return err; +} diff --git a/arch/x86/platform/geode/geode-common.h b/arch/x86/platform/geode/geode-common.h new file mode 100644 index 000000000000..9e0afd34bfad --- /dev/null +++ b/arch/x86/platform/geode/geode-common.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared helpers to register GPIO-connected buttons and LEDs + * on AMD Geode boards. + */ + +#ifndef __PLATFORM_GEODE_COMMON_H +#define __PLATFORM_GEODE_COMMON_H + +#include <linux/property.h> + +struct geode_led { + unsigned int pin; + bool default_on; +}; + +int geode_create_restart_key(unsigned int pin); +int geode_create_leds(const char *label, const struct geode_led *leds, + unsigned int n_leds); + +#endif /* __PLATFORM_GEODE_COMMON_H */ diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index c2e6d53558be..98027fb1ec32 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for Traverse Technologies GEOS. * At the moment this means setup of GPIO control of LEDs. @@ -9,93 +10,28 @@ * TODO: There are large similarities with leds-net5501.c * by Alessandro Zummo <a.zummo@towertech.it> * In the future leds-net5501.c should be migrated over to platform - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> -#include <linux/leds.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> -#include <linux/input.h> -#include <linux/gpio_keys.h> #include <linux/dmi.h> #include <asm/geode.h> -static struct gpio_keys_button geos_gpio_buttons[] = { - { - .code = KEY_RESTART, - .gpio = 3, - .active_low = 1, - .desc = "Reset button", - .type = EV_KEY, - .wakeup = 0, - .debounce_interval = 100, - .can_disable = 0, - } -}; -static struct gpio_keys_platform_data geos_buttons_data = { - .buttons = geos_gpio_buttons, - .nbuttons = ARRAY_SIZE(geos_gpio_buttons), - .poll_interval = 20, -}; - -static struct platform_device geos_buttons_dev = { - .name = "gpio-keys-polled", - .id = 1, - .dev = { - .platform_data = &geos_buttons_data, - } -}; - -static struct gpio_led geos_leds[] = { - { - .name = "geos:1", - .gpio = 6, - .default_trigger = "default-on", - .active_low = 1, - }, - { - .name = "geos:2", - .gpio = 25, - .default_trigger = "default-off", - .active_low = 1, - }, - { - .name = "geos:3", - .gpio = 27, - .default_trigger = "default-off", - .active_low = 1, - }, -}; +#include "geode-common.h" -static struct gpio_led_platform_data geos_leds_data = { - .num_leds = ARRAY_SIZE(geos_leds), - .leds = geos_leds, -}; - -static struct platform_device geos_leds_dev = { - .name = "leds-gpio", - .id = -1, - .dev.platform_data = &geos_leds_data, -}; - -static struct __initdata platform_device *geos_devs[] = { - &geos_buttons_dev, - &geos_leds_dev, +static const struct geode_led geos_leds[] __initconst = { + { 6, true }, + { 25, false }, + { 27, false }, }; static void __init register_geos(void) { - /* Setup LED control through leds-gpio driver */ - platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs)); + geode_create_restart_key(3); + geode_create_leds("geos", geos_leds, ARRAY_SIZE(geos_leds)); } static int __init geos_init(void) @@ -120,9 +56,4 @@ static int __init geos_init(void) return 0; } - -module_init(geos_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(geos_init); diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 646e3b5b4bb6..c9cee7dea99b 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -1,92 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for Soekris net5501 * At the moment this means setup of GPIO control of LEDs and buttons * on net5501 boards. * - * * Copyright (C) 2008-2009 Tower Technologies * Written by Alessandro Zummo <a.zummo@towertech.it> * * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> * and Philip Prindeville <philipp@redfish-solutions.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> -#include <linux/leds.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> #include <linux/input.h> -#include <linux/gpio_keys.h> +#include <linux/gpio/machine.h> +#include <linux/gpio/property.h> #include <asm/geode.h> +#include "geode-common.h" + #define BIOS_REGION_BASE 0xffff0000 #define BIOS_REGION_SIZE 0x00010000 -static struct gpio_keys_button net5501_gpio_buttons[] = { - { - .code = KEY_RESTART, - .gpio = 24, - .active_low = 1, - .desc = "Reset button", - .type = EV_KEY, - .wakeup = 0, - .debounce_interval = 100, - .can_disable = 0, - } -}; -static struct gpio_keys_platform_data net5501_buttons_data = { - .buttons = net5501_gpio_buttons, - .nbuttons = ARRAY_SIZE(net5501_gpio_buttons), - .poll_interval = 20, -}; - -static struct platform_device net5501_buttons_dev = { - .name = "gpio-keys-polled", - .id = 1, - .dev = { - .platform_data = &net5501_buttons_data, - } -}; - -static struct gpio_led net5501_leds[] = { - { - .name = "net5501:1", - .gpio = 6, - .default_trigger = "default-on", - .active_low = 0, - }, -}; - -static struct gpio_led_platform_data net5501_leds_data = { - .num_leds = ARRAY_SIZE(net5501_leds), - .leds = net5501_leds, -}; - -static struct platform_device net5501_leds_dev = { - .name = "leds-gpio", - .id = -1, - .dev.platform_data = &net5501_leds_data, -}; - -static struct __initdata platform_device *net5501_devs[] = { - &net5501_buttons_dev, - &net5501_leds_dev, +static const struct geode_led net5501_leds[] __initconst = { + { 6, true }, }; static void __init register_net5501(void) { - /* Setup LED control through leds-gpio driver */ - platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs)); + geode_create_restart_key(24); + geode_create_leds("net5501", net5501_leds, ARRAY_SIZE(net5501_leds)); } struct net5501_board { @@ -146,9 +94,4 @@ static int __init net5501_init(void) return 0; } - -module_init(net5501_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Soekris net5501 System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(net5501_init); diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile deleted file mode 100644 index f030b532fdf3..000000000000 --- a/arch/x86/platform/goldfish/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_GOLDFISH) += goldfish.o diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c deleted file mode 100644 index 1693107a518e..000000000000 --- a/arch/x86/platform/goldfish/goldfish.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2007 Google, Inc. - * Copyright (C) 2011 Intel, Inc. - * Copyright (C) 2013 Intel, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/platform_device.h> - -/* - * Where in virtual device memory the IO devices (timers, system controllers - * and so on) - */ - -#define GOLDFISH_PDEV_BUS_BASE (0xff001000) -#define GOLDFISH_PDEV_BUS_END (0xff7fffff) -#define GOLDFISH_PDEV_BUS_IRQ (4) - -#define GOLDFISH_TTY_BASE (0x2000) - -static struct resource goldfish_pdev_bus_resources[] = { - { - .start = GOLDFISH_PDEV_BUS_BASE, - .end = GOLDFISH_PDEV_BUS_END, - .flags = IORESOURCE_MEM, - }, - { - .start = GOLDFISH_PDEV_BUS_IRQ, - .end = GOLDFISH_PDEV_BUS_IRQ, - .flags = IORESOURCE_IRQ, - } -}; - -static int __init goldfish_init(void) -{ - platform_device_register_simple("goldfish_pdev_bus", -1, - goldfish_pdev_bus_resources, 2); - return 0; -} -device_initcall(goldfish_init); diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile new file mode 100644 index 000000000000..ddfc08783fb8 --- /dev/null +++ b/arch/x86/platform/intel-mid/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o pwr.o diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c new file mode 100644 index 000000000000..a8e75f8c14fd --- /dev/null +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel MID platform setup code + * + * (C) Copyright 2008, 2012, 2021 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> + */ + +#define pr_fmt(fmt) "intel_mid: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/regulator/machine.h> +#include <linux/scatterlist.h> +#include <linux/irq.h> +#include <linux/export.h> +#include <linux/notifier.h> + +#include <asm/setup.h> +#include <asm/mpspec_def.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/cpu_device_id.h> +#include <asm/io_apic.h> +#include <asm/intel-mid.h> +#include <asm/io.h> +#include <asm/i8259.h> +#include <asm/reboot.h> + +#include <linux/platform_data/x86/intel_scu_ipc.h> + +#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ +#define IPCMSG_COLD_RESET 0xF1 + +static void intel_mid_power_off(void) +{ + /* Shut down South Complex via PWRMU */ + intel_mid_pwr_power_off(); + + /* Only for Tangier, the rest will ignore this command */ + intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_OFF, 1); +}; + +static void intel_mid_reboot(void) +{ + intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_RESET, 0); +} + +static void __init intel_mid_time_init(void) +{ + /* Lapic only, no apbt */ + x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; +} + +static void intel_mid_arch_setup(void) +{ + switch (boot_cpu_data.x86_vfm) { + case INTEL_ATOM_SILVERMONT_MID: + x86_platform.legacy.rtc = 1; + break; + default: + break; + } + + /* + * Intel MID platforms are using explicitly defined regulators. + * + * Let the regulator core know that we do not have any additional + * regulators left. This lets it substitute unprovided regulators with + * dummy ones: + */ + regulator_has_full_constraints(); +} + +/* + * Moorestown does not have external NMI source nor port 0x61 to report + * NMI status. The possible NMI sources are from pmu as a result of NMI + * watchdog or lock debug. Reading io port 0x61 results in 0xff which + * misled NMI handler. + */ +static unsigned char intel_mid_get_nmi_reason(void) +{ + return 0; +} + +/* + * Moorestown specific x86_init function overrides and early setup + * calls. + */ +void __init x86_intel_mid_early_setup(void) +{ + x86_init.resources.probe_roms = x86_init_noop; + x86_init.resources.reserve_resources = x86_init_noop; + + x86_init.timers.timer_init = intel_mid_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + + x86_init.irqs.pre_vector_init = x86_init_noop; + + x86_init.oem.arch_setup = intel_mid_arch_setup; + + x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; + + x86_init.pci.arch_init = intel_mid_pci_init; + x86_init.pci.fixup_irqs = x86_init_noop; + + legacy_pic = &null_legacy_pic; + + /* + * Do nothing for now as everything needed done in + * x86_intel_mid_early_setup() below. + */ + x86_init.acpi.reduced_hw_early_init = x86_init_noop; + + pm_power_off = intel_mid_power_off; + machine_ops.emergency_restart = intel_mid_reboot; + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_init_noop; + set_bit(MP_BUS_ISA, mp_bus_not_pci); +} diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c new file mode 100644 index 000000000000..cd7e0c71adde --- /dev/null +++ b/arch/x86/platform/intel-mid/pwr.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel MID Power Management Unit (PWRMU) device driver + * + * Copyright (C) 2016, Intel Corporation + * + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * Intel MID Power Management Unit device driver handles the South Complex PCI + * devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core + * modifies bits in PMCSR register in the PCI configuration space. This is not + * enough on some SoCs like Intel Tangier. In such case PCI core sets a new + * power state of the device in question through a PM hook registered in struct + * pci_platform_pm_ops (see drivers/pci/pci-mid.c). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/mutex.h> +#include <linux/pci.h> + +#include <asm/intel-mid.h> + +/* Registers */ +#define PM_STS 0x00 +#define PM_CMD 0x04 +#define PM_ICS 0x08 +#define PM_WKC(x) (0x10 + (x) * 4) +#define PM_WKS(x) (0x18 + (x) * 4) +#define PM_SSC(x) (0x20 + (x) * 4) +#define PM_SSS(x) (0x30 + (x) * 4) + +/* Bits in PM_STS */ +#define PM_STS_BUSY (1 << 8) + +/* Bits in PM_CMD */ +#define PM_CMD_CMD(x) ((x) << 0) +#define PM_CMD_IOC (1 << 8) +#define PM_CMD_CM_NOP (0 << 9) +#define PM_CMD_CM_IMMEDIATE (1 << 9) +#define PM_CMD_CM_DELAY (2 << 9) +#define PM_CMD_CM_TRIGGER (3 << 9) + +/* System states */ +#define PM_CMD_SYS_STATE_S5 (5 << 16) + +/* Trigger variants */ +#define PM_CMD_CFG_TRIGGER_NC (3 << 19) + +/* Message to wait for TRIGGER_NC case */ +#define TRIGGER_NC_MSG_2 (2 << 22) + +/* List of commands */ +#define CMD_SET_CFG 0x01 + +/* Bits in PM_ICS */ +#define PM_ICS_INT_STATUS(x) ((x) & 0xff) +#define PM_ICS_IE (1 << 8) +#define PM_ICS_IP (1 << 9) +#define PM_ICS_SW_INT_STS (1 << 10) + +/* List of interrupts */ +#define INT_INVALID 0 +#define INT_CMD_COMPLETE 1 +#define INT_CMD_ERR 2 +#define INT_WAKE_EVENT 3 +#define INT_LSS_POWER_ERR 4 +#define INT_S0iX_MSG_ERR 5 +#define INT_NO_C6 6 +#define INT_TRIGGER_ERR 7 +#define INT_INACTIVITY 8 + +/* South Complex devices */ +#define LSS_MAX_SHARED_DEVS 4 +#define LSS_MAX_DEVS 64 + +#define LSS_WS_BITS 1 /* wake state width */ +#define LSS_PWS_BITS 2 /* power state width */ + +/* Supported device IDs */ +#define PCI_DEVICE_ID_PENWELL 0x0828 +#define PCI_DEVICE_ID_TANGIER 0x11a1 + +struct mid_pwr_dev { + struct pci_dev *pdev; + pci_power_t state; +}; + +struct mid_pwr { + struct device *dev; + void __iomem *regs; + int irq; + bool available; + + struct mutex lock; + struct mid_pwr_dev lss[LSS_MAX_DEVS][LSS_MAX_SHARED_DEVS]; +}; + +static struct mid_pwr *midpwr; + +static u32 mid_pwr_get_state(struct mid_pwr *pwr, int reg) +{ + return readl(pwr->regs + PM_SSS(reg)); +} + +static void mid_pwr_set_state(struct mid_pwr *pwr, int reg, u32 value) +{ + writel(value, pwr->regs + PM_SSC(reg)); +} + +static void mid_pwr_set_wake(struct mid_pwr *pwr, int reg, u32 value) +{ + writel(value, pwr->regs + PM_WKC(reg)); +} + +static void mid_pwr_interrupt_disable(struct mid_pwr *pwr) +{ + writel(~PM_ICS_IE, pwr->regs + PM_ICS); +} + +static bool mid_pwr_is_busy(struct mid_pwr *pwr) +{ + return !!(readl(pwr->regs + PM_STS) & PM_STS_BUSY); +} + +/* Wait 500ms that the latest PWRMU command finished */ +static int mid_pwr_wait(struct mid_pwr *pwr) +{ + unsigned int count = 500000; + bool busy; + + do { + busy = mid_pwr_is_busy(pwr); + if (!busy) + return 0; + udelay(1); + } while (--count); + + return -EBUSY; +} + +static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd) +{ + writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs + PM_CMD); + return mid_pwr_wait(pwr); +} + +static int __update_power_state(struct mid_pwr *pwr, int reg, int bit, int new) +{ + int curstate; + u32 power; + int ret; + + /* Check if the device is already in desired state */ + power = mid_pwr_get_state(pwr, reg); + curstate = (power >> bit) & 3; + if (curstate == new) + return 0; + + /* Update the power state */ + mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new << bit)); + + /* Send command to SCU */ + ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG); + if (ret) + return ret; + + /* Check if the device is already in desired state */ + power = mid_pwr_get_state(pwr, reg); + curstate = (power >> bit) & 3; + if (curstate != new) + return -EAGAIN; + + return 0; +} + +static pci_power_t __find_weakest_power_state(struct mid_pwr_dev *lss, + struct pci_dev *pdev, + pci_power_t state) +{ + pci_power_t weakest = PCI_D3hot; + unsigned int j; + + /* Find device in cache or first free cell */ + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) { + if (lss[j].pdev == pdev || !lss[j].pdev) + break; + } + + /* Store the desired state in cache */ + if (j < LSS_MAX_SHARED_DEVS) { + lss[j].pdev = pdev; + lss[j].state = state; + } else { + dev_WARN(&pdev->dev, "No room for device in PWRMU LSS cache\n"); + weakest = state; + } + + /* Find the power state we may use */ + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) { + if (lss[j].state < weakest) + weakest = lss[j].state; + } + + return weakest; +} + +static int __set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev, + pci_power_t state, int id, int reg, int bit) +{ + const char *name; + int ret; + + state = __find_weakest_power_state(pwr->lss[id], pdev, state); + name = pci_power_name(state); + + ret = __update_power_state(pwr, reg, bit, (__force int)state); + if (ret) { + dev_warn(&pdev->dev, "Can't set power state %s: %d\n", name, ret); + return ret; + } + + dev_vdbg(&pdev->dev, "Set power state %s\n", name); + return 0; +} + +static int mid_pwr_set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev, + pci_power_t state) +{ + int id, reg, bit; + int ret; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return id; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + + /* We support states between PCI_D0 and PCI_D3hot */ + if (state < PCI_D0) + state = PCI_D0; + if (state > PCI_D3hot) + state = PCI_D3hot; + + mutex_lock(&pwr->lock); + ret = __set_power_state(pwr, pdev, state, id, reg, bit); + mutex_unlock(&pwr->lock); + return ret; +} + +int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) +{ + struct mid_pwr *pwr = midpwr; + int ret = 0; + + might_sleep(); + + if (pwr && pwr->available) + ret = mid_pwr_set_power_state(pwr, pdev, state); + dev_vdbg(&pdev->dev, "set_power_state() returns %d\n", ret); + + return 0; +} + +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev) +{ + struct mid_pwr *pwr = midpwr; + int id, reg, bit; + u32 power; + + if (!pwr || !pwr->available) + return PCI_UNKNOWN; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return PCI_UNKNOWN; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + power = mid_pwr_get_state(pwr, reg); + return (__force pci_power_t)((power >> bit) & 3); +} + +void intel_mid_pwr_power_off(void) +{ + struct mid_pwr *pwr = midpwr; + u32 cmd = PM_CMD_SYS_STATE_S5 | + PM_CMD_CMD(CMD_SET_CFG) | + PM_CMD_CM_TRIGGER | + PM_CMD_CFG_TRIGGER_NC | + TRIGGER_NC_MSG_2; + + /* Send command to SCU */ + writel(cmd, pwr->regs + PM_CMD); + mid_pwr_wait(pwr); +} + +int intel_mid_pwr_get_lss_id(struct pci_dev *pdev) +{ + int vndr; + u8 id; + + /* + * Mapping to PWRMU index is kept in the Logical SubSystem ID byte of + * Vendor capability. + */ + vndr = pci_find_capability(pdev, PCI_CAP_ID_VNDR); + if (!vndr) + return -EINVAL; + + /* Read the Logical SubSystem ID byte */ + pci_read_config_byte(pdev, vndr + INTEL_MID_PWR_LSS_OFFSET, &id); + if (!(id & INTEL_MID_PWR_LSS_TYPE)) + return -ENODEV; + + id &= ~INTEL_MID_PWR_LSS_TYPE; + if (id >= LSS_MAX_DEVS) + return -ERANGE; + + return id; +} + +static irqreturn_t mid_pwr_irq_handler(int irq, void *dev_id) +{ + struct mid_pwr *pwr = dev_id; + u32 ics; + + ics = readl(pwr->regs + PM_ICS); + if (!(ics & PM_ICS_IP)) + return IRQ_NONE; + + writel(ics | PM_ICS_IP, pwr->regs + PM_ICS); + + dev_warn(pwr->dev, "Unexpected IRQ: %#x\n", PM_ICS_INT_STATUS(ics)); + return IRQ_HANDLED; +} + +struct mid_pwr_device_info { + int (*set_initial_state)(struct mid_pwr *pwr); +}; + +static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mid_pwr_device_info *info = (void *)id->driver_data; + struct device *dev = &pdev->dev; + struct mid_pwr *pwr; + int ret; + + ret = pcim_enable_device(pdev); + if (ret < 0) { + dev_err(&pdev->dev, "error: could not enable device\n"); + return ret; + } + + pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL); + if (!pwr) + return -ENOMEM; + + pwr->regs = pcim_iomap_region(pdev, 0, "intel_mid_pwr"); + ret = PTR_ERR_OR_ZERO(pwr->regs); + if (ret) { + dev_err(&pdev->dev, "Could not request / ioremap I/O-Mem: %d\n", ret); + return ret; + } + + pwr->dev = dev; + pwr->irq = pdev->irq; + + mutex_init(&pwr->lock); + + /* Disable interrupts */ + mid_pwr_interrupt_disable(pwr); + + if (info && info->set_initial_state) { + ret = info->set_initial_state(pwr); + if (ret) + dev_warn(dev, "Can't set initial state: %d\n", ret); + } + + ret = devm_request_irq(dev, pdev->irq, mid_pwr_irq_handler, + IRQF_NO_SUSPEND, pci_name(pdev), pwr); + if (ret) + return ret; + + pwr->available = true; + midpwr = pwr; + + pci_set_drvdata(pdev, pwr); + return 0; +} + +static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states) +{ + unsigned int i, j; + int ret; + + /* + * Enable wake events. + * + * PWRMU supports up to 32 sources for wake up the system. Ungate them + * all here. + */ + mid_pwr_set_wake(pwr, 0, 0xffffffff); + mid_pwr_set_wake(pwr, 1, 0xffffffff); + + /* + * Power off South Complex devices. + * + * There is a map (see a note below) of 64 devices with 2 bits per each + * on 32-bit HW registers. The following calls set all devices to one + * known initial state, i.e. PCI_D3hot. This is done in conjunction + * with PMCSR setting in arch/x86/pci/intel_mid_pci.c. + * + * NOTE: The actual device mapping is provided by a platform at run + * time using vendor capability of PCI configuration space. + */ + mid_pwr_set_state(pwr, 0, states[0]); + mid_pwr_set_state(pwr, 1, states[1]); + mid_pwr_set_state(pwr, 2, states[2]); + mid_pwr_set_state(pwr, 3, states[3]); + + /* Send command to SCU */ + ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG); + if (ret) + return ret; + + for (i = 0; i < LSS_MAX_DEVS; i++) { + for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) + pwr->lss[i][j].state = PCI_D3hot; + } + + return 0; +} + +static int pnw_set_initial_state(struct mid_pwr *pwr) +{ + /* On Penwell SRAM must stay powered on */ + static const u32 states[] = { + 0xf00fffff, /* PM_SSC(0) */ + 0xffffffff, /* PM_SSC(1) */ + 0xffffffff, /* PM_SSC(2) */ + 0xffffffff, /* PM_SSC(3) */ + }; + return mid_set_initial_state(pwr, states); +} + +static int tng_set_initial_state(struct mid_pwr *pwr) +{ + static const u32 states[] = { + 0xffffffff, /* PM_SSC(0) */ + 0xffffffff, /* PM_SSC(1) */ + 0xffffffff, /* PM_SSC(2) */ + 0xffffffff, /* PM_SSC(3) */ + }; + return mid_set_initial_state(pwr, states); +} + +static const struct mid_pwr_device_info pnw_info = { + .set_initial_state = pnw_set_initial_state, +}; + +static const struct mid_pwr_device_info tng_info = { + .set_initial_state = tng_set_initial_state, +}; + +/* This table should be in sync with the one in drivers/pci/pci-mid.c */ +static const struct pci_device_id mid_pwr_pci_ids[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&pnw_info }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info }, + {} +}; + +static struct pci_driver mid_pwr_pci_driver = { + .name = "intel_mid_pwr", + .probe = mid_pwr_probe, + .id_table = mid_pwr_pci_ids, +}; + +builtin_pci_driver(mid_pwr_pci_driver); diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile new file mode 100644 index 000000000000..ed77cb9529ce --- /dev/null +++ b/arch/x86/platform/intel-quark/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_INTEL_IMR) += imr.o +obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c new file mode 100644 index 000000000000..ee25b032c0b3 --- /dev/null +++ b/arch/x86/platform/intel-quark/imr.c @@ -0,0 +1,597 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * imr.c -- Intel Isolated Memory Region driver + * + * Copyright(c) 2013 Intel Corporation. + * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> + * + * IMR registers define an isolated region of memory that can + * be masked to prohibit certain system agents from accessing memory. + * When a device behind a masked port performs an access - snooped or + * not, an IMR may optionally prevent that transaction from changing + * the state of memory or from getting correct data in response to the + * operation. + * + * Write data will be dropped and reads will return 0xFFFFFFFF, the + * system will reset and system BIOS will print out an error message to + * inform the user that an IMR has been violated. + * + * This code is based on the Linux MTRR code and reference code from + * Intel's Quark BSP EFI, Linux and grub code. + * + * See quark-x1000-datasheet.pdf for register definitions. + * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <asm-generic/sections.h> +#include <asm/cpu_device_id.h> +#include <asm/imr.h> +#include <asm/iosf_mbi.h> +#include <asm/io.h> + +#include <linux/debugfs.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/types.h> + +struct imr_device { + bool init; + struct mutex lock; + int max_imr; + int reg_base; +}; + +static struct imr_device imr_dev; + +/* + * IMR read/write mask control registers. + * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for + * bit definitions. + * + * addr_hi + * 31 Lock bit + * 30:24 Reserved + * 23:2 1 KiB aligned lo address + * 1:0 Reserved + * + * addr_hi + * 31:24 Reserved + * 23:2 1 KiB aligned hi address + * 1:0 Reserved + */ +#define IMR_LOCK BIT(31) + +struct imr_regs { + u32 addr_lo; + u32 addr_hi; + u32 rmask; + u32 wmask; +}; + +#define IMR_NUM_REGS (sizeof(struct imr_regs)/sizeof(u32)) +#define IMR_SHIFT 8 +#define imr_to_phys(x) ((x) << IMR_SHIFT) +#define phys_to_imr(x) ((x) >> IMR_SHIFT) + +/** + * imr_is_enabled - true if an IMR is enabled false otherwise. + * + * Determines if an IMR is enabled based on address range and read/write + * mask. An IMR set with an address range set to zero and a read/write + * access mask set to all is considered to be disabled. An IMR in any + * other state - for example set to zero but without read/write access + * all is considered to be enabled. This definition of disabled is how + * firmware switches off an IMR and is maintained in kernel for + * consistency. + * + * @imr: pointer to IMR descriptor. + * @return: true if IMR enabled false if disabled. + */ +static inline int imr_is_enabled(struct imr_regs *imr) +{ + return !(imr->rmask == IMR_READ_ACCESS_ALL && + imr->wmask == IMR_WRITE_ACCESS_ALL && + imr_to_phys(imr->addr_lo) == 0 && + imr_to_phys(imr->addr_hi) == 0); +} + +/** + * imr_read - read an IMR at a given index. + * + * Requires caller to hold imr mutex. + * + * @idev: pointer to imr_device structure. + * @imr_id: IMR entry to read. + * @imr: IMR structure representing address and access masks. + * @return: 0 on success or error code passed from mbi_iosf on failure. + */ +static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) +{ + u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; + int ret; + + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_lo); + if (ret) + return ret; + + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->addr_hi); + if (ret) + return ret; + + ret = iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->rmask); + if (ret) + return ret; + + return iosf_mbi_read(QRK_MBI_UNIT_MM, MBI_REG_READ, reg++, &imr->wmask); +} + +/** + * imr_write - write an IMR at a given index. + * + * Requires caller to hold imr mutex. + * Note lock bits need to be written independently of address bits. + * + * @idev: pointer to imr_device structure. + * @imr_id: IMR entry to write. + * @imr: IMR structure representing address and access masks. + * @return: 0 on success or error code passed from mbi_iosf on failure. + */ +static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) +{ + unsigned long flags; + u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; + int ret; + + local_irq_save(flags); + + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_lo); + if (ret) + goto failed; + + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->addr_hi); + if (ret) + goto failed; + + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->rmask); + if (ret) + goto failed; + + ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, reg++, imr->wmask); + if (ret) + goto failed; + + local_irq_restore(flags); + return 0; +failed: + /* + * If writing to the IOSF failed then we're in an unknown state, + * likely a very bad state. An IMR in an invalid state will almost + * certainly lead to a memory access violation. + */ + local_irq_restore(flags); + WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n", + imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK); + + return ret; +} + +/** + * imr_dbgfs_state_show - print state of IMR registers. + * + * @s: pointer to seq_file for output. + * @unused: unused parameter. + * @return: 0 on success or error code passed from mbi_iosf on failure. + */ +static int imr_dbgfs_state_show(struct seq_file *s, void *unused) +{ + phys_addr_t base; + phys_addr_t end; + int i; + struct imr_device *idev = s->private; + struct imr_regs imr; + size_t size; + int ret = -ENODEV; + + mutex_lock(&idev->lock); + + for (i = 0; i < idev->max_imr; i++) { + + ret = imr_read(idev, i, &imr); + if (ret) + break; + + /* + * Remember to add IMR_ALIGN bytes to size to indicate the + * inherent IMR_ALIGN size bytes contained in the masked away + * lower ten bits. + */ + if (imr_is_enabled(&imr)) { + base = imr_to_phys(imr.addr_lo); + end = imr_to_phys(imr.addr_hi) + IMR_MASK; + size = end - base + 1; + } else { + base = 0; + end = 0; + size = 0; + } + seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx " + "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i, + &base, &end, size, imr.rmask, imr.wmask, + imr_is_enabled(&imr) ? "enabled " : "disabled", + imr.addr_lo & IMR_LOCK ? "locked" : "unlocked"); + } + + mutex_unlock(&idev->lock); + return ret; +} +DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state); + +/** + * imr_debugfs_register - register debugfs hooks. + * + * @idev: pointer to imr_device structure. + */ +static void imr_debugfs_register(struct imr_device *idev) +{ + debugfs_create_file("imr_state", 0444, NULL, idev, + &imr_dbgfs_state_fops); +} + +/** + * imr_check_params - check passed address range IMR alignment and non-zero size + * + * @base: base address of intended IMR. + * @size: size of intended IMR. + * @return: zero on valid range -EINVAL on unaligned base/size. + */ +static int imr_check_params(phys_addr_t base, size_t size) +{ + if ((base & IMR_MASK) || (size & IMR_MASK)) { + pr_err("base %pa size 0x%08zx must align to 1KiB\n", + &base, size); + return -EINVAL; + } + if (size == 0) + return -EINVAL; + + return 0; +} + +/** + * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends. + * + * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the + * value in the register. We need to subtract IMR_ALIGN bytes from input sizes + * as a result. + * + * @size: input size bytes. + * @return: reduced size. + */ +static inline size_t imr_raw_size(size_t size) +{ + return size - IMR_ALIGN; +} + +/** + * imr_address_overlap - detects an address overlap. + * + * @addr: address to check against an existing IMR. + * @imr: imr being checked. + * @return: true for overlap false for no overlap. + */ +static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr) +{ + return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi); +} + +/** + * imr_add_range - add an Isolated Memory Region. + * + * @base: physical base address of region aligned to 1KiB. + * @size: physical size of region in bytes must be aligned to 1KiB. + * @read_mask: read access mask. + * @write_mask: write access mask. + * @return: zero on success or negative value indicating error. + */ +int imr_add_range(phys_addr_t base, size_t size, + unsigned int rmask, unsigned int wmask) +{ + phys_addr_t end; + unsigned int i; + struct imr_device *idev = &imr_dev; + struct imr_regs imr; + size_t raw_size; + int reg; + int ret; + + if (WARN_ONCE(idev->init == false, "driver not initialized")) + return -ENODEV; + + ret = imr_check_params(base, size); + if (ret) + return ret; + + /* Tweak the size value. */ + raw_size = imr_raw_size(size); + end = base + raw_size; + + /* + * Check for reserved IMR value common to firmware, kernel and grub + * indicating a disabled IMR. + */ + imr.addr_lo = phys_to_imr(base); + imr.addr_hi = phys_to_imr(end); + imr.rmask = rmask; + imr.wmask = wmask; + if (!imr_is_enabled(&imr)) + return -ENOTSUPP; + + mutex_lock(&idev->lock); + + /* + * Find a free IMR while checking for an existing overlapping range. + * Note there's no restriction in silicon to prevent IMR overlaps. + * For the sake of simplicity and ease in defining/debugging an IMR + * memory map we exclude IMR overlaps. + */ + reg = -1; + for (i = 0; i < idev->max_imr; i++) { + ret = imr_read(idev, i, &imr); + if (ret) + goto failed; + + /* Find overlap @ base or end of requested range. */ + ret = -EINVAL; + if (imr_is_enabled(&imr)) { + if (imr_address_overlap(base, &imr)) + goto failed; + if (imr_address_overlap(end, &imr)) + goto failed; + } else { + reg = i; + } + } + + /* Error out if we have no free IMR entries. */ + if (reg == -1) { + ret = -ENOMEM; + goto failed; + } + + pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n", + reg, &base, &end, raw_size, rmask, wmask); + + /* Enable IMR at specified range and access mask. */ + imr.addr_lo = phys_to_imr(base); + imr.addr_hi = phys_to_imr(end); + imr.rmask = rmask; + imr.wmask = wmask; + + ret = imr_write(idev, reg, &imr); + if (ret < 0) { + /* + * In the highly unlikely event iosf_mbi_write failed + * attempt to rollback the IMR setup skipping the trapping + * of further IOSF write failures. + */ + imr.addr_lo = 0; + imr.addr_hi = 0; + imr.rmask = IMR_READ_ACCESS_ALL; + imr.wmask = IMR_WRITE_ACCESS_ALL; + imr_write(idev, reg, &imr); + } +failed: + mutex_unlock(&idev->lock); + return ret; +} +EXPORT_SYMBOL_GPL(imr_add_range); + +/** + * __imr_remove_range - delete an Isolated Memory Region. + * + * This function allows you to delete an IMR by its index specified by reg or + * by address range specified by base and size respectively. If you specify an + * index on its own the base and size parameters are ignored. + * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored. + * imr_remove_range(-1, base, size); delete IMR from base to base+size. + * + * @reg: imr index to remove. + * @base: physical base address of region aligned to 1 KiB. + * @size: physical size of region in bytes aligned to 1 KiB. + * @return: -EINVAL on invalid range or out or range id + * -ENODEV if reg is valid but no IMR exists or is locked + * 0 on success. + */ +static int __imr_remove_range(int reg, phys_addr_t base, size_t size) +{ + phys_addr_t end; + bool found = false; + unsigned int i; + struct imr_device *idev = &imr_dev; + struct imr_regs imr; + size_t raw_size; + int ret = 0; + + if (WARN_ONCE(idev->init == false, "driver not initialized")) + return -ENODEV; + + /* + * Validate address range if deleting by address, else we are + * deleting by index where base and size will be ignored. + */ + if (reg == -1) { + ret = imr_check_params(base, size); + if (ret) + return ret; + } + + /* Tweak the size value. */ + raw_size = imr_raw_size(size); + end = base + raw_size; + + mutex_lock(&idev->lock); + + if (reg >= 0) { + /* If a specific IMR is given try to use it. */ + ret = imr_read(idev, reg, &imr); + if (ret) + goto failed; + + if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) { + ret = -ENODEV; + goto failed; + } + found = true; + } else { + /* Search for match based on address range. */ + for (i = 0; i < idev->max_imr; i++) { + ret = imr_read(idev, i, &imr); + if (ret) + goto failed; + + if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) + continue; + + if ((imr_to_phys(imr.addr_lo) == base) && + (imr_to_phys(imr.addr_hi) == end)) { + found = true; + reg = i; + break; + } + } + } + + if (!found) { + ret = -ENODEV; + goto failed; + } + + pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size); + + /* Tear down the IMR. */ + imr.addr_lo = 0; + imr.addr_hi = 0; + imr.rmask = IMR_READ_ACCESS_ALL; + imr.wmask = IMR_WRITE_ACCESS_ALL; + + ret = imr_write(idev, reg, &imr); + +failed: + mutex_unlock(&idev->lock); + return ret; +} + +/** + * imr_remove_range - delete an Isolated Memory Region by address + * + * This function allows you to delete an IMR by an address range specified + * by base and size respectively. + * imr_remove_range(base, size); delete IMR from base to base+size. + * + * @base: physical base address of region aligned to 1 KiB. + * @size: physical size of region in bytes aligned to 1 KiB. + * @return: -EINVAL on invalid range or out or range id + * -ENODEV if reg is valid but no IMR exists or is locked + * 0 on success. + */ +int imr_remove_range(phys_addr_t base, size_t size) +{ + return __imr_remove_range(-1, base, size); +} +EXPORT_SYMBOL_GPL(imr_remove_range); + +/** + * imr_clear - delete an Isolated Memory Region by index + * + * This function allows you to delete an IMR by an address range specified + * by the index of the IMR. Useful for initial sanitization of the IMR + * address map. + * imr_ge(base, size); delete IMR from base to base+size. + * + * @reg: imr index to remove. + * @return: -EINVAL on invalid range or out or range id + * -ENODEV if reg is valid but no IMR exists or is locked + * 0 on success. + */ +static inline int imr_clear(int reg) +{ + return __imr_remove_range(reg, 0, 0); +} + +/** + * imr_fixup_memmap - Tear down IMRs used during bootup. + * + * BIOS and Grub both setup IMRs around compressed kernel, initrd memory + * that need to be removed before the kernel hands out one of the IMR + * encased addresses to a downstream DMA agent such as the SD or Ethernet. + * IMRs on Galileo are setup to immediately reset the system on violation. + * As a result if you're running a root filesystem from SD - you'll need + * the boot-time IMRs torn down or you'll find seemingly random resets when + * using your filesystem. + * + * @idev: pointer to imr_device structure. + * @return: + */ +static void __init imr_fixup_memmap(struct imr_device *idev) +{ + phys_addr_t base = virt_to_phys(&_text); + size_t size = virt_to_phys(&__end_rodata) - base; + unsigned long start, end; + int i; + int ret; + + /* Tear down all existing unlocked IMRs. */ + for (i = 0; i < idev->max_imr; i++) + imr_clear(i); + + start = (unsigned long)_text; + end = (unsigned long)__end_rodata - 1; + + /* + * Setup an unlocked IMR around the physical extent of the kernel + * from the beginning of the .text section to the end of the + * .rodata section as one physically contiguous block. + * + * We don't round up @size since it is already PAGE_SIZE aligned. + * See vmlinux.lds.S for details. + */ + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); + if (ret < 0) { + pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", + size / 1024, start, end); + } else { + pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n", + size / 1024, start, end); + } + +} + +static const struct x86_cpu_id imr_ids[] __initconst = { + X86_MATCH_VFM(INTEL_QUARK_X1000, NULL), + {} +}; + +/** + * imr_init - entry point for IMR driver. + * + * return: -ENODEV for no IMR support 0 if good to go. + */ +static int __init imr_init(void) +{ + struct imr_device *idev = &imr_dev; + + if (!x86_match_cpu(imr_ids) || !iosf_mbi_available()) + return -ENODEV; + + idev->max_imr = QUARK_X1000_IMR_MAX; + idev->reg_base = QUARK_X1000_IMR_REGBASE; + idev->init = true; + + mutex_init(&idev->lock); + imr_debugfs_register(idev); + imr_fixup_memmap(idev); + return 0; +} +device_initcall(imr_init); diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c new file mode 100644 index 000000000000..657925b0f428 --- /dev/null +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * imr_selftest.c -- Intel Isolated Memory Region self-test driver + * + * Copyright(c) 2013 Intel Corporation. + * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> + * + * IMR self test. The purpose of this module is to run a set of tests on the + * IMR API to validate its sanity. We check for overlapping, reserved + * addresses and setup/teardown sanity. + * + */ + +#include <asm-generic/sections.h> +#include <asm/cpu_device_id.h> +#include <asm/imr.h> +#include <asm/io.h> + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/types.h> + +#define SELFTEST KBUILD_MODNAME ": " +/** + * imr_self_test_result - Print result string for self test. + * + * @res: result code - true if test passed false otherwise. + * @fmt: format string. + * ... variadic argument list. + */ +static __printf(2, 3) +void __init imr_self_test_result(int res, const char *fmt, ...) +{ + va_list vlist; + + /* Print pass/fail. */ + if (res) + pr_info(SELFTEST "pass "); + else + pr_info(SELFTEST "fail "); + + /* Print variable string. */ + va_start(vlist, fmt); + vprintk(fmt, vlist); + va_end(vlist); + + /* Optional warning. */ + WARN(res == 0, "test failed"); +} +#undef SELFTEST + +/** + * imr_self_test + * + * Verify IMR self_test with some simple tests to verify overlap, + * zero sized allocations and 1 KiB sized areas. + * + */ +static void __init imr_self_test(void) +{ + phys_addr_t base = virt_to_phys(&_text); + size_t size = virt_to_phys(&__end_rodata) - base; + const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n"; + int ret; + + /* Test zero zero. */ + ret = imr_add_range(0, 0, 0, 0); + imr_self_test_result(ret < 0, "zero sized IMR\n"); + + /* Test exact overlap. */ + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); + imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); + + /* Test overlap with base inside of existing. */ + base += size - IMR_ALIGN; + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); + imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); + + /* Test overlap with end inside of existing. */ + base -= size + IMR_ALIGN * 2; + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); + imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); + + /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ + ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, + IMR_WRITE_ACCESS_ALL); + imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); + + /* Test that a 1 KiB IMR @ zero with CPU only will work. */ + ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU); + imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); + if (ret >= 0) { + ret = imr_remove_range(0, IMR_ALIGN); + imr_self_test_result(ret == 0, "teardown - cpu-access\n"); + } + + /* Test 2 KiB works. */ + size = IMR_ALIGN * 2; + ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL); + imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); + if (ret >= 0) { + ret = imr_remove_range(0, size); + imr_self_test_result(ret == 0, "teardown 2KiB\n"); + } +} + +static const struct x86_cpu_id imr_ids[] __initconst = { + X86_MATCH_VFM(INTEL_QUARK_X1000, NULL), + {} +}; + +/** + * imr_self_test_init - entry point for IMR driver. + * + * return: -ENODEV for no IMR support 0 if good to go. + */ +static int __init imr_self_test_init(void) +{ + if (x86_match_cpu(imr_ids)) + imr_self_test(); + return 0; +} + +/** + * imr_self_test_exit - exit point for IMR code. + * + * return: + */ +device_initcall(imr_self_test_init); diff --git a/arch/x86/platform/intel/Makefile b/arch/x86/platform/intel/Makefile new file mode 100644 index 000000000000..dbee3b00f9d0 --- /dev/null +++ b/arch/x86/platform/intel/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c new file mode 100644 index 000000000000..40ae94db20d8 --- /dev/null +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IOSF-SB MailBox Interface Driver + * Copyright (c) 2013, Intel Corporation. + * + * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a + * mailbox interface (MBI) to communicate with multiple devices. This + * driver implements access to this interface for those platforms that can + * enumerate the device using PCI. + */ + +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/capability.h> +#include <linux/pm_qos.h> +#include <linux/wait.h> + +#include <asm/iosf_mbi.h> + +#define PCI_DEVICE_ID_INTEL_BAYTRAIL 0x0F00 +#define PCI_DEVICE_ID_INTEL_BRASWELL 0x2280 +#define PCI_DEVICE_ID_INTEL_QUARK_X1000 0x0958 +#define PCI_DEVICE_ID_INTEL_TANGIER 0x1170 + +static struct pci_dev *mbi_pdev; +static DEFINE_SPINLOCK(iosf_mbi_lock); + +/**************** Generic iosf_mbi access helpers ****************/ + +static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) +{ + return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE; +} + +static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr) +{ + int result; + + if (!mbi_pdev) + return -ENODEV; + + if (mcrx) { + result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, + mcrx); + if (result < 0) + goto fail_read; + } + + result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); + if (result < 0) + goto fail_read; + + result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); + if (result < 0) + goto fail_read; + + return 0; + +fail_read: + dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); + return pcibios_err_to_errno(result); +} + +static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) +{ + int result; + + if (!mbi_pdev) + return -ENODEV; + + result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); + if (result < 0) + goto fail_write; + + if (mcrx) { + result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, + mcrx); + if (result < 0) + goto fail_write; + } + + result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); + if (result < 0) + goto fail_write; + + return 0; + +fail_write: + dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); + return pcibios_err_to_errno(result); +} + +int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) +{ + u32 mcr, mcrx; + unsigned long flags; + int ret; + + /* Access to the GFX unit is handled by GPU code */ + if (port == BT_MBI_UNIT_GFX) { + WARN_ON(1); + return -EPERM; + } + + mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); + mcrx = offset & MBI_MASK_HI; + + spin_lock_irqsave(&iosf_mbi_lock, flags); + ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr); + spin_unlock_irqrestore(&iosf_mbi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_read); + +int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) +{ + u32 mcr, mcrx; + unsigned long flags; + int ret; + + /* Access to the GFX unit is handled by GPU code */ + if (port == BT_MBI_UNIT_GFX) { + WARN_ON(1); + return -EPERM; + } + + mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); + mcrx = offset & MBI_MASK_HI; + + spin_lock_irqsave(&iosf_mbi_lock, flags); + ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr); + spin_unlock_irqrestore(&iosf_mbi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_write); + +int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) +{ + u32 mcr, mcrx; + u32 value; + unsigned long flags; + int ret; + + /* Access to the GFX unit is handled by GPU code */ + if (port == BT_MBI_UNIT_GFX) { + WARN_ON(1); + return -EPERM; + } + + mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); + mcrx = offset & MBI_MASK_HI; + + spin_lock_irqsave(&iosf_mbi_lock, flags); + + /* Read current mdr value */ + ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value); + if (ret < 0) { + spin_unlock_irqrestore(&iosf_mbi_lock, flags); + return ret; + } + + /* Apply mask */ + value &= ~mask; + mdr &= mask; + value |= mdr; + + /* Write back */ + ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value); + + spin_unlock_irqrestore(&iosf_mbi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_modify); + +bool iosf_mbi_available(void) +{ + /* Mbi isn't hot-pluggable. No remove routine is provided */ + return mbi_pdev; +} +EXPORT_SYMBOL(iosf_mbi_available); + +/* + **************** P-Unit/kernel shared I2C bus arbitration **************** + * + * Some Bay Trail and Cherry Trail devices have the P-Unit and us (the kernel) + * share a single I2C bus to the PMIC. Below are helpers to arbitrate the + * accesses between the kernel and the P-Unit. + * + * See arch/x86/include/asm/iosf_mbi.h for kernel-doc text for each function. + */ + +#define SEMAPHORE_TIMEOUT 500 +#define PUNIT_SEMAPHORE_BYT 0x7 +#define PUNIT_SEMAPHORE_CHT 0x10e +#define PUNIT_SEMAPHORE_BIT BIT(0) +#define PUNIT_SEMAPHORE_ACQUIRE BIT(1) + +static DEFINE_MUTEX(iosf_mbi_pmic_access_mutex); +static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier); +static DECLARE_WAIT_QUEUE_HEAD(iosf_mbi_pmic_access_waitq); +static u32 iosf_mbi_pmic_punit_access_count; +static u32 iosf_mbi_pmic_i2c_access_count; +static u32 iosf_mbi_sem_address; +static unsigned long iosf_mbi_sem_acquired; +static struct pm_qos_request iosf_mbi_pm_qos; + +void iosf_mbi_punit_acquire(void) +{ + /* Wait for any I2C PMIC accesses from in kernel drivers to finish. */ + mutex_lock(&iosf_mbi_pmic_access_mutex); + while (iosf_mbi_pmic_i2c_access_count != 0) { + mutex_unlock(&iosf_mbi_pmic_access_mutex); + wait_event(iosf_mbi_pmic_access_waitq, + iosf_mbi_pmic_i2c_access_count == 0); + mutex_lock(&iosf_mbi_pmic_access_mutex); + } + /* + * We do not need to do anything to allow the PUNIT to safely access + * the PMIC, other then block in kernel accesses to the PMIC. + */ + iosf_mbi_pmic_punit_access_count++; + mutex_unlock(&iosf_mbi_pmic_access_mutex); +} +EXPORT_SYMBOL(iosf_mbi_punit_acquire); + +void iosf_mbi_punit_release(void) +{ + bool do_wakeup; + + mutex_lock(&iosf_mbi_pmic_access_mutex); + iosf_mbi_pmic_punit_access_count--; + do_wakeup = iosf_mbi_pmic_punit_access_count == 0; + mutex_unlock(&iosf_mbi_pmic_access_mutex); + + if (do_wakeup) + wake_up(&iosf_mbi_pmic_access_waitq); +} +EXPORT_SYMBOL(iosf_mbi_punit_release); + +static int iosf_mbi_get_sem(u32 *sem) +{ + int ret; + + ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, + iosf_mbi_sem_address, sem); + if (ret) { + dev_err(&mbi_pdev->dev, "Error P-Unit semaphore read failed\n"); + return ret; + } + + *sem &= PUNIT_SEMAPHORE_BIT; + return 0; +} + +static void iosf_mbi_reset_semaphore(void) +{ + if (iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, + iosf_mbi_sem_address, 0, PUNIT_SEMAPHORE_BIT)) + dev_err(&mbi_pdev->dev, "Error P-Unit semaphore reset failed\n"); + + cpu_latency_qos_update_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE); + + blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier, + MBI_PMIC_BUS_ACCESS_END, NULL); +} + +/* + * This function blocks P-Unit accesses to the PMIC I2C bus, so that kernel + * I2C code, such as e.g. a fuel-gauge driver, can access it safely. + * + * This function may be called by I2C controller code while an I2C driver has + * already blocked P-Unit accesses because it wants them blocked over multiple + * i2c-transfers, for e.g. read-modify-write of an I2C client register. + * + * To allow safe PMIC i2c bus accesses this function takes the following steps: + * + * 1) Some code sends request to the P-Unit which make it access the PMIC + * I2C bus. Testing has shown that the P-Unit does not check its internal + * PMIC bus semaphore for these requests. Callers of these requests call + * iosf_mbi_punit_acquire()/_release() around their P-Unit accesses, these + * functions increase/decrease iosf_mbi_pmic_punit_access_count, so first + * we wait for iosf_mbi_pmic_punit_access_count to become 0. + * + * 2) Check iosf_mbi_pmic_i2c_access_count, if access has already + * been blocked by another caller, we only need to increment + * iosf_mbi_pmic_i2c_access_count and we can skip the other steps. + * + * 3) Some code makes such P-Unit requests from atomic contexts where it + * cannot call iosf_mbi_punit_acquire() as that may sleep. + * As the second step we call a notifier chain which allows any code + * needing P-Unit resources from atomic context to acquire them before + * we take control over the PMIC I2C bus. + * + * 4) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC + * if this happens while the kernel itself is accessing the PMIC I2C bus + * the SoC hangs. + * As the third step we call cpu_latency_qos_update_request() to disallow the + * CPU to enter C6 or C7. + * + * 5) The P-Unit has a PMIC bus semaphore which we can request to stop + * autonomous P-Unit tasks from accessing the PMIC I2C bus while we hold it. + * As the fourth and final step we request this semaphore and wait for our + * request to be acknowledged. + */ +int iosf_mbi_block_punit_i2c_access(void) +{ + unsigned long start, end; + int ret = 0; + u32 sem; + + if (WARN_ON(!mbi_pdev || !iosf_mbi_sem_address)) + return -ENXIO; + + mutex_lock(&iosf_mbi_pmic_access_mutex); + + while (iosf_mbi_pmic_punit_access_count != 0) { + mutex_unlock(&iosf_mbi_pmic_access_mutex); + wait_event(iosf_mbi_pmic_access_waitq, + iosf_mbi_pmic_punit_access_count == 0); + mutex_lock(&iosf_mbi_pmic_access_mutex); + } + + if (iosf_mbi_pmic_i2c_access_count > 0) + goto success; + + blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier, + MBI_PMIC_BUS_ACCESS_BEGIN, NULL); + + /* + * Disallow the CPU to enter C6 or C7 state, entering these states + * requires the P-Unit to talk to the PMIC and if this happens while + * we're holding the semaphore, the SoC hangs. + */ + cpu_latency_qos_update_request(&iosf_mbi_pm_qos, 0); + + /* host driver writes to side band semaphore register */ + ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, + iosf_mbi_sem_address, PUNIT_SEMAPHORE_ACQUIRE); + if (ret) { + dev_err(&mbi_pdev->dev, "Error P-Unit semaphore request failed\n"); + goto error; + } + + /* host driver waits for bit 0 to be set in semaphore register */ + start = jiffies; + end = start + msecs_to_jiffies(SEMAPHORE_TIMEOUT); + do { + ret = iosf_mbi_get_sem(&sem); + if (!ret && sem) { + iosf_mbi_sem_acquired = jiffies; + dev_dbg(&mbi_pdev->dev, "P-Unit semaphore acquired after %ums\n", + jiffies_to_msecs(jiffies - start)); + goto success; + } + + usleep_range(1000, 2000); + } while (time_before(jiffies, end)); + + ret = -ETIMEDOUT; + dev_err(&mbi_pdev->dev, "Error P-Unit semaphore timed out, resetting\n"); +error: + iosf_mbi_reset_semaphore(); + if (!iosf_mbi_get_sem(&sem)) + dev_err(&mbi_pdev->dev, "P-Unit semaphore: %d\n", sem); +success: + if (!WARN_ON(ret)) + iosf_mbi_pmic_i2c_access_count++; + + mutex_unlock(&iosf_mbi_pmic_access_mutex); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_block_punit_i2c_access); + +void iosf_mbi_unblock_punit_i2c_access(void) +{ + bool do_wakeup = false; + + mutex_lock(&iosf_mbi_pmic_access_mutex); + iosf_mbi_pmic_i2c_access_count--; + if (iosf_mbi_pmic_i2c_access_count == 0) { + iosf_mbi_reset_semaphore(); + dev_dbg(&mbi_pdev->dev, "punit semaphore held for %ums\n", + jiffies_to_msecs(jiffies - iosf_mbi_sem_acquired)); + do_wakeup = true; + } + mutex_unlock(&iosf_mbi_pmic_access_mutex); + + if (do_wakeup) + wake_up(&iosf_mbi_pmic_access_waitq); +} +EXPORT_SYMBOL(iosf_mbi_unblock_punit_i2c_access); + +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) +{ + int ret; + + /* Wait for the bus to go inactive before registering */ + iosf_mbi_punit_acquire(); + ret = blocking_notifier_chain_register( + &iosf_mbi_pmic_bus_access_notifier, nb); + iosf_mbi_punit_release(); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier); + +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb) +{ + iosf_mbi_assert_punit_acquired(); + + return blocking_notifier_chain_unregister( + &iosf_mbi_pmic_bus_access_notifier, nb); +} +EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked); + +void iosf_mbi_assert_punit_acquired(void) +{ + WARN_ON(iosf_mbi_pmic_punit_access_count == 0); +} +EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired); + +/**************** iosf_mbi debug code ****************/ + +#ifdef CONFIG_IOSF_MBI_DEBUG +static u32 dbg_mdr; +static u32 dbg_mcr; +static u32 dbg_mcrx; + +static int mcr_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +static int mcr_set(void *data, u64 val) +{ + u8 command = ((u32)val & 0xFF000000) >> 24, + port = ((u32)val & 0x00FF0000) >> 16, + offset = ((u32)val & 0x0000FF00) >> 8; + int err; + + *(u32 *)data = val; + + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + + if (command & 1u) + err = iosf_mbi_write(port, + command, + dbg_mcrx | offset, + dbg_mdr); + else + err = iosf_mbi_read(port, + command, + dbg_mcrx | offset, + &dbg_mdr); + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n"); + +static struct dentry *iosf_dbg; + +static void iosf_sideband_debug_init(void) +{ + iosf_dbg = debugfs_create_dir("iosf_sb", NULL); + + /* mdr */ + debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); + + /* mcrx */ + debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); + + /* mcr - initiates mailbox transaction */ + debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); +} + +static void iosf_debugfs_init(void) +{ + iosf_sideband_debug_init(); +} + +static void iosf_debugfs_remove(void) +{ + debugfs_remove_recursive(iosf_dbg); +} +#else +static inline void iosf_debugfs_init(void) { } +static inline void iosf_debugfs_remove(void) { } +#endif /* CONFIG_IOSF_MBI_DEBUG */ + +static int iosf_mbi_probe(struct pci_dev *pdev, + const struct pci_device_id *dev_id) +{ + int ret; + + ret = pci_enable_device(pdev); + if (ret < 0) { + dev_err(&pdev->dev, "error: could not enable device\n"); + return ret; + } + + mbi_pdev = pci_dev_get(pdev); + iosf_mbi_sem_address = dev_id->driver_data; + + return 0; +} + +static const struct pci_device_id iosf_mbi_pci_ids[] = { + { PCI_DEVICE_DATA(INTEL, BAYTRAIL, PUNIT_SEMAPHORE_BYT) }, + { PCI_DEVICE_DATA(INTEL, BRASWELL, PUNIT_SEMAPHORE_CHT) }, + { PCI_DEVICE_DATA(INTEL, QUARK_X1000, 0) }, + { PCI_DEVICE_DATA(INTEL, TANGIER, 0) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids); + +static struct pci_driver iosf_mbi_pci_driver = { + .name = "iosf_mbi_pci", + .probe = iosf_mbi_probe, + .id_table = iosf_mbi_pci_ids, +}; + +static int __init iosf_mbi_init(void) +{ + iosf_debugfs_init(); + + cpu_latency_qos_add_request(&iosf_mbi_pm_qos, PM_QOS_DEFAULT_VALUE); + + return pci_register_driver(&iosf_mbi_pci_driver); +} + +static void __exit iosf_mbi_exit(void) +{ + iosf_debugfs_remove(); + + pci_unregister_driver(&iosf_mbi_pci_driver); + pci_dev_put(mbi_pdev); + mbi_pdev = NULL; + + cpu_latency_qos_remove_request(&iosf_mbi_pm_qos); +} + +module_init(iosf_mbi_init); +module_exit(iosf_mbi_exit); + +MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>"); +MODULE_DESCRIPTION("IOSF Mailbox Interface accessor"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile index db921983a102..354352748428 100644 --- a/arch/x86/platform/iris/Makefile +++ b/arch/x86/platform/iris/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_X86_32_IRIS) += iris.o diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index e6cb80f620af..5591a2d9cfe8 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Eurobraille/Iris power off support. * @@ -5,20 +6,6 @@ * It is shutdown by a special I/O sequence which this module provides. * * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org> - * - * This program is free software ; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation ; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY ; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with the program ; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/moduleparam.h> @@ -27,7 +14,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/delay.h> -#include <linux/init.h> #include <linux/pm.h> #include <asm/io.h> @@ -41,7 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); -MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); static bool force; @@ -77,17 +62,15 @@ static int iris_probe(struct platform_device *pdev) return 0; } -static int iris_remove(struct platform_device *pdev) +static void iris_remove(struct platform_device *pdev) { pm_power_off = old_pm_power_off; printk(KERN_INFO "Iris power_off handler uninstalled.\n"); - return 0; } static struct platform_driver iris_driver = { .driver = { .name = "iris", - .owner = THIS_MODULE, }, .probe = iris_probe, .remove = iris_remove, diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile deleted file mode 100644 index af1da7e623f9..000000000000 --- a/arch/x86/platform/mrst/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_X86_INTEL_MID) += mrst.o -obj-$(CONFIG_X86_INTEL_MID) += vrtc.o -obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c deleted file mode 100644 index 028454f0c3a5..000000000000 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * early_printk_mrst.c - early consoles for Intel MID platforms - * - * Copyright (c) 2008-2010, Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* - * This file implements two early consoles named mrst and hsu. - * mrst is based on Maxim3110 spi-uart device, it exists in both - * Moorestown and Medfield platforms, while hsu is based on a High - * Speed UART device which only exists in the Medfield platform - */ - -#include <linux/serial_reg.h> -#include <linux/serial_mfd.h> -#include <linux/kmsg_dump.h> -#include <linux/console.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/io.h> - -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/mrst.h> - -#define MRST_SPI_TIMEOUT 0x200000 -#define MRST_REGBASE_SPI0 0xff128000 -#define MRST_REGBASE_SPI1 0xff128400 -#define MRST_CLK_SPI0_REG 0xff11d86c - -/* Bit fields in CTRLR0 */ -#define SPI_DFS_OFFSET 0 - -#define SPI_FRF_OFFSET 4 -#define SPI_FRF_SPI 0x0 -#define SPI_FRF_SSP 0x1 -#define SPI_FRF_MICROWIRE 0x2 -#define SPI_FRF_RESV 0x3 - -#define SPI_MODE_OFFSET 6 -#define SPI_SCPH_OFFSET 6 -#define SPI_SCOL_OFFSET 7 -#define SPI_TMOD_OFFSET 8 -#define SPI_TMOD_TR 0x0 /* xmit & recv */ -#define SPI_TMOD_TO 0x1 /* xmit only */ -#define SPI_TMOD_RO 0x2 /* recv only */ -#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */ - -#define SPI_SLVOE_OFFSET 10 -#define SPI_SRL_OFFSET 11 -#define SPI_CFS_OFFSET 12 - -/* Bit fields in SR, 7 bits */ -#define SR_MASK 0x7f /* cover 7 bits */ -#define SR_BUSY (1 << 0) -#define SR_TF_NOT_FULL (1 << 1) -#define SR_TF_EMPT (1 << 2) -#define SR_RF_NOT_EMPT (1 << 3) -#define SR_RF_FULL (1 << 4) -#define SR_TX_ERR (1 << 5) -#define SR_DCOL (1 << 6) - -struct dw_spi_reg { - u32 ctrl0; - u32 ctrl1; - u32 ssienr; - u32 mwcr; - u32 ser; - u32 baudr; - u32 txfltr; - u32 rxfltr; - u32 txflr; - u32 rxflr; - u32 sr; - u32 imr; - u32 isr; - u32 risr; - u32 txoicr; - u32 rxoicr; - u32 rxuicr; - u32 msticr; - u32 icr; - u32 dmacr; - u32 dmatdlr; - u32 dmardlr; - u32 idr; - u32 version; - - /* Currently operates as 32 bits, though only the low 16 bits matter */ - u32 dr; -} __packed; - -#define dw_readl(dw, name) __raw_readl(&(dw)->name) -#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name) - -/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */ -static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; - -static u32 *pclk_spi0; -/* Always contains an accessible address, start with 0 */ -static struct dw_spi_reg *pspi; - -static struct kmsg_dumper dw_dumper; -static int dumper_registered; - -static void dw_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) -{ - static char line[1024]; - size_t len; - - /* When run to this, we'd better re-init the HW */ - mrst_early_console_init(); - - while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) - early_mrst_console.write(&early_mrst_console, line, len); -} - -/* Set the ratio rate to 115200, 8n1, IRQ disabled */ -static void max3110_write_config(void) -{ - u16 config; - - config = 0xc001; - dw_writel(pspi, dr, config); -} - -/* Translate char to a eligible word and send to max3110 */ -static void max3110_write_data(char c) -{ - u16 data; - - data = 0x8000 | c; - dw_writel(pspi, dr, data); -} - -void mrst_early_console_init(void) -{ - u32 ctrlr0 = 0; - u32 spi0_cdiv; - u32 freq; /* Freqency info only need be searched once */ - - /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */ - pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - MRST_CLK_SPI0_REG); - spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9; - freq = 100000000 / (spi0_cdiv + 1); - - if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL) - mrst_spi_paddr = MRST_REGBASE_SPI1; - - pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - mrst_spi_paddr); - - /* Disable SPI controller */ - dw_writel(pspi, ssienr, 0); - - /* Set control param, 8 bits, transmit only mode */ - ctrlr0 = dw_readl(pspi, ctrl0); - - ctrlr0 &= 0xfcc0; - ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET) - | (SPI_TMOD_TO << SPI_TMOD_OFFSET); - dw_writel(pspi, ctrl0, ctrlr0); - - /* - * Change the spi0 clk to comply with 115200 bps, use 100000 to - * calculate the clk dividor to make the clock a little slower - * than real baud rate. - */ - dw_writel(pspi, baudr, freq/100000); - - /* Disable all INT for early phase */ - dw_writel(pspi, imr, 0x0); - - /* Set the cs to spi-uart */ - dw_writel(pspi, ser, 0x2); - - /* Enable the HW, the last step for HW init */ - dw_writel(pspi, ssienr, 0x1); - - /* Set the default configuration */ - max3110_write_config(); - - /* Register the kmsg dumper */ - if (!dumper_registered) { - dw_dumper.dump = dw_kmsg_dump; - kmsg_dump_register(&dw_dumper); - dumper_registered = 1; - } -} - -/* Slave select should be called in the read/write function */ -static void early_mrst_spi_putc(char c) -{ - unsigned int timeout; - u32 sr; - - timeout = MRST_SPI_TIMEOUT; - /* Early putc needs to make sure the TX FIFO is not full */ - while (--timeout) { - sr = dw_readl(pspi, sr); - if (!(sr & SR_TF_NOT_FULL)) - cpu_relax(); - else - break; - } - - if (!timeout) - pr_warning("MRST earlycon: timed out\n"); - else - max3110_write_data(c); -} - -/* Early SPI only uses polling mode */ -static void early_mrst_spi_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_mrst_spi_putc('\r'); - early_mrst_spi_putc(*str); - str++; - } -} - -struct console early_mrst_console = { - .name = "earlymrst", - .write = early_mrst_spi_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* - * Following is the early console based on Medfield HSU (High - * Speed UART) device. - */ -#define HSU_PORT_BASE 0xffa28080 - -static void __iomem *phsu; - -void hsu_early_console_init(const char *s) -{ - unsigned long paddr, port = 0; - u8 lcr; - - /* - * Select the early HSU console port if specified by user in the - * kernel command line. - */ - if (*s && !kstrtoul(s, 10, &port)) - port = clamp_val(port, 0, 2); - - paddr = HSU_PORT_BASE + port * 0x80; - phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); - - /* Disable FIFO */ - writeb(0x0, phsu + UART_FCR); - - /* Set to default 115200 bps, 8n1 */ - lcr = readb(phsu + UART_LCR); - writeb((0x80 | lcr), phsu + UART_LCR); - writeb(0x18, phsu + UART_DLL); - writeb(lcr, phsu + UART_LCR); - writel(0x3600, phsu + UART_MUL*4); - - writeb(0x8, phsu + UART_MCR); - writeb(0x7, phsu + UART_FCR); - writeb(0x3, phsu + UART_LCR); - - /* Clear IRQ status */ - readb(phsu + UART_LSR); - readb(phsu + UART_RX); - readb(phsu + UART_IIR); - readb(phsu + UART_MSR); - - /* Enable FIFO */ - writeb(0x7, phsu + UART_FCR); -} - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -static void early_hsu_putc(char ch) -{ - unsigned int timeout = 10000; /* 10ms */ - u8 status; - - while (--timeout) { - status = readb(phsu + UART_LSR); - if (status & BOTH_EMPTY) - break; - udelay(1); - } - - /* Only write the char when there was no timeout */ - if (timeout) - writeb(ch, phsu + UART_TX); -} - -static void early_hsu_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_hsu_putc('\r'); - early_hsu_putc(*str); - str++; - } -} - -struct console early_hsu_console = { - .name = "earlyhsu", - .write = early_hsu_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c deleted file mode 100644 index a0a0a4389bbd..000000000000 --- a/arch/x86/platform/mrst/mrst.c +++ /dev/null @@ -1,1052 +0,0 @@ -/* - * mrst.c: Intel Moorestown platform specific setup code - * - * (C) Copyright 2008 Intel Corporation - * Author: Jacob Pan (jacob.jun.pan@intel.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#define pr_fmt(fmt) "mrst: " fmt - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/scatterlist.h> -#include <linux/sfi.h> -#include <linux/intel_pmic_gpio.h> -#include <linux/spi/spi.h> -#include <linux/i2c.h> -#include <linux/i2c/pca953x.h> -#include <linux/gpio_keys.h> -#include <linux/input.h> -#include <linux/platform_device.h> -#include <linux/irq.h> -#include <linux/module.h> -#include <linux/notifier.h> -#include <linux/mfd/intel_msic.h> -#include <linux/gpio.h> -#include <linux/i2c/tc35876x.h> - -#include <asm/setup.h> -#include <asm/mpspec_def.h> -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/io_apic.h> -#include <asm/mrst.h> -#include <asm/mrst-vrtc.h> -#include <asm/io.h> -#include <asm/i8259.h> -#include <asm/intel_scu_ipc.h> -#include <asm/apb_timer.h> -#include <asm/reboot.h> - -/* - * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, - * cmdline option x86_mrst_timer can be used to override the configuration - * to prefer one or the other. - * at runtime, there are basically three timer configurations: - * 1. per cpu apbt clock only - * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only - * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. - * - * by default (without cmdline option), platform code first detects cpu type - * to see if we are on lincroft or penwell, then set up both lapic or apbt - * clocks accordingly. - * i.e. by default, medfield uses configuration #2, moorestown uses #1. - * config #3 is supported but not recommended on medfield. - * - * rating and feature summary: - * lapic (with C3STOP) --------- 100 - * apbt (always-on) ------------ 110 - * lapic (always-on,ARAT) ------ 150 - */ - -__cpuinitdata enum mrst_timer_options mrst_timer_options; - -static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; -static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; -enum mrst_cpu_type __mrst_cpu_chip; -EXPORT_SYMBOL_GPL(__mrst_cpu_chip); - -int sfi_mtimer_num; - -struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; -EXPORT_SYMBOL_GPL(sfi_mrtc_array); -int sfi_mrtc_num; - -static void mrst_power_off(void) -{ -} - -static void mrst_reboot(void) -{ - intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); -} - -/* parse all the mtimer info to a static mtimer array */ -static int __init sfi_parse_mtmr(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_timer_table_entry *pentry; - struct mpc_intsrc mp_irq; - int totallen; - - sb = (struct sfi_table_simple *)table; - if (!sfi_mtimer_num) { - sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, - struct sfi_timer_table_entry); - pentry = (struct sfi_timer_table_entry *) sb->pentry; - totallen = sfi_mtimer_num * sizeof(*pentry); - memcpy(sfi_mtimer_array, pentry, totallen); - } - - pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); - pentry = sfi_mtimer_array; - for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { - pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," - " irq = %d\n", totallen, (u32)pentry->phys_addr, - pentry->freq_hz, pentry->irq); - if (!pentry->irq) - continue; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; -/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; - mp_irq.srcbus = MP_BUS_ISA; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - mp_save_irq(&mp_irq); - } - - return 0; -} - -struct sfi_timer_table_entry *sfi_get_mtmr(int hint) -{ - int i; - if (hint < sfi_mtimer_num) { - if (!sfi_mtimer_usage[hint]) { - pr_debug("hint taken for timer %d irq %d\n",\ - hint, sfi_mtimer_array[hint].irq); - sfi_mtimer_usage[hint] = 1; - return &sfi_mtimer_array[hint]; - } - } - /* take the first timer available */ - for (i = 0; i < sfi_mtimer_num;) { - if (!sfi_mtimer_usage[i]) { - sfi_mtimer_usage[i] = 1; - return &sfi_mtimer_array[i]; - } - i++; - } - return NULL; -} - -void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) -{ - int i; - for (i = 0; i < sfi_mtimer_num;) { - if (mtmr->irq == sfi_mtimer_array[i].irq) { - sfi_mtimer_usage[i] = 0; - return; - } - i++; - } -} - -/* parse all the mrtc info to a global mrtc array */ -int __init sfi_parse_mrtc(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_rtc_table_entry *pentry; - struct mpc_intsrc mp_irq; - - int totallen; - - sb = (struct sfi_table_simple *)table; - if (!sfi_mrtc_num) { - sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, - struct sfi_rtc_table_entry); - pentry = (struct sfi_rtc_table_entry *)sb->pentry; - totallen = sfi_mrtc_num * sizeof(*pentry); - memcpy(sfi_mrtc_array, pentry, totallen); - } - - pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); - pentry = sfi_mrtc_array; - for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { - pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", - totallen, (u32)pentry->phys_addr, pentry->irq); - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = 0xf; /* level trigger and active low */ - mp_irq.srcbus = MP_BUS_ISA; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - mp_save_irq(&mp_irq); - } - return 0; -} - -static unsigned long __init mrst_calibrate_tsc(void) -{ - unsigned long fast_calibrate; - u32 lo, hi, ratio, fsb; - - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); - ratio = (hi >> 8) & 0x1f; - pr_debug("ratio is %d\n", ratio); - if (!ratio) { - pr_err("read a zero ratio, should be incorrect!\n"); - pr_err("force tsc ratio to 16 ...\n"); - ratio = 16; - } - rdmsr(MSR_FSB_FREQ, lo, hi); - if ((lo & 0x7) == 0x7) - fsb = PENWELL_FSB_FREQ_83SKU; - else - fsb = PENWELL_FSB_FREQ_100SKU; - fast_calibrate = ratio * fsb; - pr_debug("read penwell tsc %lu khz\n", fast_calibrate); - lapic_timer_frequency = fsb * 1000 / HZ; - /* mark tsc clocksource as reliable */ - set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - - if (fast_calibrate) - return fast_calibrate; - - return 0; -} - -static void __init mrst_time_init(void) -{ - sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); - switch (mrst_timer_options) { - case MRST_TIMER_APBT_ONLY: - break; - case MRST_TIMER_LAPIC_APBT: - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; - x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - break; - default: - if (!boot_cpu_has(X86_FEATURE_ARAT)) - break; - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; - x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - return; - } - /* we need at least one APB timer */ - pre_init_apic_IRQ0(); - apbt_time_init(); -} - -static void __cpuinit mrst_arch_setup(void) -{ - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) - __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; - else { - pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); - __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; - } -} - -/* MID systems don't have i8042 controller */ -static int mrst_i8042_detect(void) -{ - return 0; -} - -/* - * Moorestown does not have external NMI source nor port 0x61 to report - * NMI status. The possible NMI sources are from pmu as a result of NMI - * watchdog or lock debug. Reading io port 0x61 results in 0xff which - * misled NMI handler. - */ -static unsigned char mrst_get_nmi_reason(void) -{ - return 0; -} - -/* - * Moorestown specific x86_init function overrides and early setup - * calls. - */ -void __init x86_mrst_early_setup(void) -{ - x86_init.resources.probe_roms = x86_init_noop; - x86_init.resources.reserve_resources = x86_init_noop; - - x86_init.timers.timer_init = mrst_time_init; - x86_init.timers.setup_percpu_clockev = x86_init_noop; - - x86_init.irqs.pre_vector_init = x86_init_noop; - - x86_init.oem.arch_setup = mrst_arch_setup; - - x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; - - x86_platform.calibrate_tsc = mrst_calibrate_tsc; - x86_platform.i8042_detect = mrst_i8042_detect; - x86_init.timers.wallclock_init = mrst_rtc_init; - x86_platform.get_nmi_reason = mrst_get_nmi_reason; - - x86_init.pci.init = pci_mrst_init; - x86_init.pci.fixup_irqs = x86_init_noop; - - legacy_pic = &null_legacy_pic; - - /* Moorestown specific power_off/restart method */ - pm_power_off = mrst_power_off; - machine_ops.emergency_restart = mrst_reboot; - - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - set_bit(MP_BUS_ISA, mp_bus_not_pci); -} - -/* - * if user does not want to use per CPU apb timer, just give it a lower rating - * than local apic timer and skip the late per cpu timer init. - */ -static inline int __init setup_x86_mrst_timer(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp("apbt_only", arg) == 0) - mrst_timer_options = MRST_TIMER_APBT_ONLY; - else if (strcmp("lapic_and_apbt", arg) == 0) - mrst_timer_options = MRST_TIMER_LAPIC_APBT; - else { - pr_warning("X86 MRST timer option %s not recognised" - " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", - arg); - return -EINVAL; - } - return 0; -} -__setup("x86_mrst_timer=", setup_x86_mrst_timer); - -/* - * Parsing GPIO table first, since the DEVS table will need this table - * to map the pin name to the actual pin. - */ -static struct sfi_gpio_table_entry *gpio_table; -static int gpio_num_entry; - -static int __init sfi_parse_gpio(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_gpio_table_entry *pentry; - int num, i; - - if (gpio_table) - return 0; - sb = (struct sfi_table_simple *)table; - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); - pentry = (struct sfi_gpio_table_entry *)sb->pentry; - - gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL); - if (!gpio_table) - return -1; - memcpy(gpio_table, pentry, num * sizeof(*pentry)); - gpio_num_entry = num; - - pr_debug("GPIO pin info:\n"); - for (i = 0; i < num; i++, pentry++) - pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," - " pin = %d\n", i, - pentry->controller_name, - pentry->pin_name, - pentry->pin_no); - return 0; -} - -static int get_gpio_by_name(const char *name) -{ - struct sfi_gpio_table_entry *pentry = gpio_table; - int i; - - if (!pentry) - return -1; - for (i = 0; i < gpio_num_entry; i++, pentry++) { - if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) - return pentry->pin_no; - } - return -1; -} - -/* - * Here defines the array of devices platform data that IAFW would export - * through SFI "DEVS" table, we use name and type to match the device and - * its platform data. - */ -struct devs_id { - char name[SFI_NAME_LEN + 1]; - u8 type; - u8 delay; - void *(*get_platform_data)(void *info); -}; - -/* the offset for the mapping of global gpio pin to irq */ -#define MRST_IRQ_OFFSET 0x100 - -static void __init *pmic_gpio_platform_data(void *info) -{ - static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; - int gpio_base = get_gpio_by_name("pmic_gpio_base"); - - if (gpio_base == -1) - gpio_base = 64; - pmic_gpio_pdata.gpio_base = gpio_base; - pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; - pmic_gpio_pdata.gpiointr = 0xffffeff8; - - return &pmic_gpio_pdata; -} - -static void __init *max3111_platform_data(void *info) -{ - struct spi_board_info *spi_info = info; - int intr = get_gpio_by_name("max3111_int"); - - spi_info->mode = SPI_MODE_0; - if (intr == -1) - return NULL; - spi_info->irq = intr + MRST_IRQ_OFFSET; - return NULL; -} - -/* we have multiple max7315 on the board ... */ -#define MAX7315_NUM 2 -static void __init *max7315_platform_data(void *info) -{ - static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; - static int nr; - struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; - struct i2c_board_info *i2c_info = info; - int gpio_base, intr; - char base_pin_name[SFI_NAME_LEN + 1]; - char intr_pin_name[SFI_NAME_LEN + 1]; - - if (nr == MAX7315_NUM) { - pr_err("too many max7315s, we only support %d\n", - MAX7315_NUM); - return NULL; - } - /* we have several max7315 on the board, we only need load several - * instances of the same pca953x driver to cover them - */ - strcpy(i2c_info->type, "max7315"); - if (nr++) { - sprintf(base_pin_name, "max7315_%d_base", nr); - sprintf(intr_pin_name, "max7315_%d_int", nr); - } else { - strcpy(base_pin_name, "max7315_base"); - strcpy(intr_pin_name, "max7315_int"); - } - - gpio_base = get_gpio_by_name(base_pin_name); - intr = get_gpio_by_name(intr_pin_name); - - if (gpio_base == -1) - return NULL; - max7315->gpio_base = gpio_base; - if (intr != -1) { - i2c_info->irq = intr + MRST_IRQ_OFFSET; - max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; - } else { - i2c_info->irq = -1; - max7315->irq_base = -1; - } - return max7315; -} - -static void *tca6416_platform_data(void *info) -{ - static struct pca953x_platform_data tca6416; - struct i2c_board_info *i2c_info = info; - int gpio_base, intr; - char base_pin_name[SFI_NAME_LEN + 1]; - char intr_pin_name[SFI_NAME_LEN + 1]; - - strcpy(i2c_info->type, "tca6416"); - strcpy(base_pin_name, "tca6416_base"); - strcpy(intr_pin_name, "tca6416_int"); - - gpio_base = get_gpio_by_name(base_pin_name); - intr = get_gpio_by_name(intr_pin_name); - - if (gpio_base == -1) - return NULL; - tca6416.gpio_base = gpio_base; - if (intr != -1) { - i2c_info->irq = intr + MRST_IRQ_OFFSET; - tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET; - } else { - i2c_info->irq = -1; - tca6416.irq_base = -1; - } - return &tca6416; -} - -static void *mpu3050_platform_data(void *info) -{ - struct i2c_board_info *i2c_info = info; - int intr = get_gpio_by_name("mpu3050_int"); - - if (intr == -1) - return NULL; - - i2c_info->irq = intr + MRST_IRQ_OFFSET; - return NULL; -} - -static void __init *emc1403_platform_data(void *info) -{ - static short intr2nd_pdata; - struct i2c_board_info *i2c_info = info; - int intr = get_gpio_by_name("thermal_int"); - int intr2nd = get_gpio_by_name("thermal_alert"); - - if (intr == -1 || intr2nd == -1) - return NULL; - - i2c_info->irq = intr + MRST_IRQ_OFFSET; - intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; - - return &intr2nd_pdata; -} - -static void __init *lis331dl_platform_data(void *info) -{ - static short intr2nd_pdata; - struct i2c_board_info *i2c_info = info; - int intr = get_gpio_by_name("accel_int"); - int intr2nd = get_gpio_by_name("accel_2"); - - if (intr == -1 || intr2nd == -1) - return NULL; - - i2c_info->irq = intr + MRST_IRQ_OFFSET; - intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; - - return &intr2nd_pdata; -} - -static void __init *no_platform_data(void *info) -{ - return NULL; -} - -static struct resource msic_resources[] = { - { - .start = INTEL_MSIC_IRQ_PHYS_BASE, - .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct intel_msic_platform_data msic_pdata; - -static struct platform_device msic_device = { - .name = "intel_msic", - .id = -1, - .dev = { - .platform_data = &msic_pdata, - }, - .num_resources = ARRAY_SIZE(msic_resources), - .resource = msic_resources, -}; - -static inline bool mrst_has_msic(void) -{ - return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL; -} - -static int msic_scu_status_change(struct notifier_block *nb, - unsigned long code, void *data) -{ - if (code == SCU_DOWN) { - platform_device_unregister(&msic_device); - return 0; - } - - return platform_device_register(&msic_device); -} - -static int __init msic_init(void) -{ - static struct notifier_block msic_scu_notifier = { - .notifier_call = msic_scu_status_change, - }; - - /* - * We need to be sure that the SCU IPC is ready before MSIC device - * can be registered. - */ - if (mrst_has_msic()) - intel_scu_notifier_add(&msic_scu_notifier); - - return 0; -} -arch_initcall(msic_init); - -/* - * msic_generic_platform_data - sets generic platform data for the block - * @info: pointer to the SFI device table entry for this block - * @block: MSIC block - * - * Function sets IRQ number from the SFI table entry for given device to - * the MSIC platform data. - */ -static void *msic_generic_platform_data(void *info, enum intel_msic_block block) -{ - struct sfi_device_table_entry *entry = info; - - BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST); - msic_pdata.irq[block] = entry->irq; - - return no_platform_data(info); -} - -static void *msic_battery_platform_data(void *info) -{ - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY); -} - -static void *msic_gpio_platform_data(void *info) -{ - static struct intel_msic_gpio_pdata pdata; - int gpio = get_gpio_by_name("msic_gpio_base"); - - if (gpio < 0) - return NULL; - - pdata.gpio_base = gpio; - msic_pdata.gpio = &pdata; - - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO); -} - -static void *msic_audio_platform_data(void *info) -{ - struct platform_device *pdev; - - pdev = platform_device_register_simple("sst-platform", -1, NULL, 0); - if (IS_ERR(pdev)) { - pr_err("failed to create audio platform device\n"); - return NULL; - } - - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO); -} - -static void *msic_power_btn_platform_data(void *info) -{ - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN); -} - -static void *msic_ocd_platform_data(void *info) -{ - static struct intel_msic_ocd_pdata pdata; - int gpio = get_gpio_by_name("ocd_gpio"); - - if (gpio < 0) - return NULL; - - pdata.gpio = gpio; - msic_pdata.ocd = &pdata; - - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD); -} - -static void *msic_thermal_platform_data(void *info) -{ - return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL); -} - -/* tc35876x DSI-LVDS bridge chip and panel platform data */ -static void *tc35876x_platform_data(void *data) -{ - static struct tc35876x_platform_data pdata; - - /* gpio pins set to -1 will not be used by the driver */ - pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN"); - pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN"); - pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3"); - - return &pdata; -} - -static const struct devs_id __initconst device_ids[] = { - {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data}, - {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, - {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data}, - {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, - {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, - {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, - {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data}, - {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, - {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, - {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, - {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data}, - {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data}, - - /* MSIC subdevices */ - {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data}, - {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data}, - {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data}, - {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data}, - {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data}, - {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data}, - - {}, -}; - -#define MAX_IPCDEVS 24 -static struct platform_device *ipc_devs[MAX_IPCDEVS]; -static int ipc_next_dev; - -#define MAX_SCU_SPI 24 -static struct spi_board_info *spi_devs[MAX_SCU_SPI]; -static int spi_next_dev; - -#define MAX_SCU_I2C 24 -static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; -static int i2c_bus[MAX_SCU_I2C]; -static int i2c_next_dev; - -static void __init intel_scu_device_register(struct platform_device *pdev) -{ - if(ipc_next_dev == MAX_IPCDEVS) - pr_err("too many SCU IPC devices"); - else - ipc_devs[ipc_next_dev++] = pdev; -} - -static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) -{ - struct spi_board_info *new_dev; - - if (spi_next_dev == MAX_SCU_SPI) { - pr_err("too many SCU SPI devices"); - return; - } - - new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!new_dev) { - pr_err("failed to alloc mem for delayed spi dev %s\n", - sdev->modalias); - return; - } - memcpy(new_dev, sdev, sizeof(*sdev)); - - spi_devs[spi_next_dev++] = new_dev; -} - -static void __init intel_scu_i2c_device_register(int bus, - struct i2c_board_info *idev) -{ - struct i2c_board_info *new_dev; - - if (i2c_next_dev == MAX_SCU_I2C) { - pr_err("too many SCU I2C devices"); - return; - } - - new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); - if (!new_dev) { - pr_err("failed to alloc mem for delayed i2c dev %s\n", - idev->type); - return; - } - memcpy(new_dev, idev, sizeof(*idev)); - - i2c_bus[i2c_next_dev] = bus; - i2c_devs[i2c_next_dev++] = new_dev; -} - -BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); -EXPORT_SYMBOL_GPL(intel_scu_notifier); - -/* Called by IPC driver */ -void intel_scu_devices_create(void) -{ - int i; - - for (i = 0; i < ipc_next_dev; i++) - platform_device_add(ipc_devs[i]); - - for (i = 0; i < spi_next_dev; i++) - spi_register_board_info(spi_devs[i], 1); - - for (i = 0; i < i2c_next_dev; i++) { - struct i2c_adapter *adapter; - struct i2c_client *client; - - adapter = i2c_get_adapter(i2c_bus[i]); - if (adapter) { - client = i2c_new_device(adapter, i2c_devs[i]); - if (!client) - pr_err("can't create i2c device %s\n", - i2c_devs[i]->type); - } else - i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); - } - intel_scu_notifier_post(SCU_AVAILABLE, NULL); -} -EXPORT_SYMBOL_GPL(intel_scu_devices_create); - -/* Called by IPC driver */ -void intel_scu_devices_destroy(void) -{ - int i; - - intel_scu_notifier_post(SCU_DOWN, NULL); - - for (i = 0; i < ipc_next_dev; i++) - platform_device_del(ipc_devs[i]); -} -EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); - -static void __init install_irq_resource(struct platform_device *pdev, int irq) -{ - /* Single threaded */ - static struct resource __initdata res = { - .name = "IRQ", - .flags = IORESOURCE_IRQ, - }; - res.start = irq; - platform_device_add_resources(pdev, &res, 1); -} - -static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry) -{ - const struct devs_id *dev = device_ids; - struct platform_device *pdev; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_IPC && - !strncmp(dev->name, entry->name, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(entry); - break; - } - dev++; - } - - /* - * On Medfield the platform device creation is handled by the MSIC - * MFD driver so we don't need to do it here. - */ - if (mrst_has_msic()) - return; - - pdev = platform_device_alloc(entry->name, 0); - if (pdev == NULL) { - pr_err("out of memory for SFI platform device '%s'.\n", - entry->name); - return; - } - install_irq_resource(pdev, entry->irq); - - pdev->dev.platform_data = pdata; - intel_scu_device_register(pdev); -} - -static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) -{ - const struct devs_id *dev = device_ids; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_SPI && - !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(spi_info); - break; - } - dev++; - } - spi_info->platform_data = pdata; - if (dev->delay) - intel_scu_spi_device_register(spi_info); - else - spi_register_board_info(spi_info, 1); -} - -static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) -{ - const struct devs_id *dev = device_ids; - void *pdata = NULL; - - while (dev->name[0]) { - if (dev->type == SFI_DEV_TYPE_I2C && - !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { - pdata = dev->get_platform_data(i2c_info); - break; - } - dev++; - } - i2c_info->platform_data = pdata; - - if (dev->delay) - intel_scu_i2c_device_register(bus, i2c_info); - else - i2c_register_board_info(bus, i2c_info, 1); - } - - -static int __init sfi_parse_devs(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_device_table_entry *pentry; - struct spi_board_info spi_info; - struct i2c_board_info i2c_info; - int num, i, bus; - int ioapic; - struct io_apic_irq_attr irq_attr; - - sb = (struct sfi_table_simple *)table; - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); - pentry = (struct sfi_device_table_entry *)sb->pentry; - - for (i = 0; i < num; i++, pentry++) { - int irq = pentry->irq; - - if (irq != (u8)0xff) { /* native RTE case */ - /* these SPI2 devices are not exposed to system as PCI - * devices, but they have separate RTE entry in IOAPIC - * so we have to enable them one by one here - */ - ioapic = mp_find_ioapic(irq); - irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = irq; - irq_attr.trigger = 1; - irq_attr.polarity = 1; - io_apic_set_pci_routing(NULL, irq, &irq_attr); - } else - irq = 0; /* No irq */ - - switch (pentry->type) { - case SFI_DEV_TYPE_IPC: - pr_debug("info[%2d]: IPC bus, name = %16.16s, " - "irq = 0x%2x\n", i, pentry->name, pentry->irq); - sfi_handle_ipc_dev(pentry); - break; - case SFI_DEV_TYPE_SPI: - memset(&spi_info, 0, sizeof(spi_info)); - strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); - spi_info.irq = irq; - spi_info.bus_num = pentry->host_num; - spi_info.chip_select = pentry->addr; - spi_info.max_speed_hz = pentry->max_freq; - pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " - "irq = 0x%2x, max_freq = %d, cs = %d\n", i, - spi_info.bus_num, - spi_info.modalias, - spi_info.irq, - spi_info.max_speed_hz, - spi_info.chip_select); - sfi_handle_spi_dev(&spi_info); - break; - case SFI_DEV_TYPE_I2C: - memset(&i2c_info, 0, sizeof(i2c_info)); - bus = pentry->host_num; - strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); - i2c_info.irq = irq; - i2c_info.addr = pentry->addr; - pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " - "irq = 0x%2x, addr = 0x%x\n", i, bus, - i2c_info.type, - i2c_info.irq, - i2c_info.addr); - sfi_handle_i2c_dev(bus, &i2c_info); - break; - case SFI_DEV_TYPE_UART: - case SFI_DEV_TYPE_HSI: - default: - ; - } - } - return 0; -} - -static int __init mrst_platform_init(void) -{ - sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); - sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); - return 0; -} -arch_initcall(mrst_platform_init); - -/* - * we will search these buttons in SFI GPIO table (by name) - * and register them dynamically. Please add all possible - * buttons here, we will shrink them if no GPIO found. - */ -static struct gpio_keys_button gpio_button[] = { - {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, - {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, - {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, - {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, - {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, - {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, - {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, - {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, - {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, - {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, -}; - -static struct gpio_keys_platform_data mrst_gpio_keys = { - .buttons = gpio_button, - .rep = 1, - .nbuttons = -1, /* will fill it after search */ -}; - -static struct platform_device pb_device = { - .name = "gpio-keys", - .id = -1, - .dev = { - .platform_data = &mrst_gpio_keys, - }, -}; - -/* - * Shrink the non-existent buttons, register the gpio button - * device if there is some - */ -static int __init pb_keys_init(void) -{ - struct gpio_keys_button *gb = gpio_button; - int i, num, good = 0; - - num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); - for (i = 0; i < num; i++) { - gb[i].gpio = get_gpio_by_name(gb[i].desc); - pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); - if (gb[i].gpio == -1) - continue; - - if (i != good) - gb[good] = gb[i]; - good++; - } - - if (good) { - mrst_gpio_keys.nbuttons = good; - return platform_device_register(&pb_device); - } - return 0; -} -late_initcall(pb_keys_init); diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c deleted file mode 100644 index 5e355b134ba4..000000000000 --- a/arch/x86/platform/mrst/vrtc.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * vrtc.c: Driver for virtual RTC device on Intel MID platform - * - * (C) Copyright 2009 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Note: - * VRTC is emulated by system controller firmware, the real HW - * RTC is located in the PMIC device. SCU FW shadows PMIC RTC - * in a memory mapped IO space that is visible to the host IA - * processor. - * - * This driver is based on RTC CMOS driver. - */ - -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/init.h> -#include <linux/sfi.h> -#include <linux/platform_device.h> - -#include <asm/mrst.h> -#include <asm/mrst-vrtc.h> -#include <asm/time.h> -#include <asm/fixmap.h> - -static unsigned char __iomem *vrtc_virt_base; - -unsigned char vrtc_cmos_read(unsigned char reg) -{ - unsigned char retval; - - /* vRTC's registers range from 0x0 to 0xD */ - if (reg > 0xd || !vrtc_virt_base) - return 0xff; - - lock_cmos_prefix(reg); - retval = __raw_readb(vrtc_virt_base + (reg << 2)); - lock_cmos_suffix(reg); - return retval; -} -EXPORT_SYMBOL_GPL(vrtc_cmos_read); - -void vrtc_cmos_write(unsigned char val, unsigned char reg) -{ - if (reg > 0xd || !vrtc_virt_base) - return; - - lock_cmos_prefix(reg); - __raw_writeb(val, vrtc_virt_base + (reg << 2)); - lock_cmos_suffix(reg); -} -EXPORT_SYMBOL_GPL(vrtc_cmos_write); - -void vrtc_get_time(struct timespec *now) -{ - u8 sec, min, hour, mday, mon; - unsigned long flags; - u32 year; - - spin_lock_irqsave(&rtc_lock, flags); - - while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) - cpu_relax(); - - sec = vrtc_cmos_read(RTC_SECONDS); - min = vrtc_cmos_read(RTC_MINUTES); - hour = vrtc_cmos_read(RTC_HOURS); - mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); - mon = vrtc_cmos_read(RTC_MONTH); - year = vrtc_cmos_read(RTC_YEAR); - - spin_unlock_irqrestore(&rtc_lock, flags); - - /* vRTC YEAR reg contains the offset to 1972 */ - year += 1972; - - printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " - "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - - now->tv_sec = mktime(year, mon, mday, hour, min, sec); - now->tv_nsec = 0; -} - -int vrtc_set_mmss(const struct timespec *now) -{ - unsigned long flags; - struct rtc_time tm; - int year; - int retval = 0; - - rtc_time_to_tm(now->tv_sec, &tm); - if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { - /* - * tm.year is the number of years since 1900, and the - * vrtc need the years since 1972. - */ - year = tm.tm_year - 72; - spin_lock_irqsave(&rtc_lock, flags); - vrtc_cmos_write(year, RTC_YEAR); - vrtc_cmos_write(tm.tm_mon, RTC_MONTH); - vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH); - vrtc_cmos_write(tm.tm_hour, RTC_HOURS); - vrtc_cmos_write(tm.tm_min, RTC_MINUTES); - vrtc_cmos_write(tm.tm_sec, RTC_SECONDS); - spin_unlock_irqrestore(&rtc_lock, flags); - } else { - printk(KERN_ERR - "%s: Invalid vRTC value: write of %lx to vRTC failed\n", - __FUNCTION__, now->tv_sec); - retval = -EINVAL; - } - return retval; -} - -void __init mrst_rtc_init(void) -{ - unsigned long vrtc_paddr; - - sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); - - vrtc_paddr = sfi_mrtc_array[0].phys_addr; - if (!sfi_mrtc_num || !vrtc_paddr) - return; - - vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC, - vrtc_paddr); - x86_platform.get_wallclock = vrtc_get_time; - x86_platform.set_wallclock = vrtc_set_mmss; -} - -/* - * The Moorestown platform has a memory mapped virtual RTC device that emulates - * the programming interface of the RTC. - */ - -static struct resource vrtc_resources[] = { - [0] = { - .flags = IORESOURCE_MEM, - }, - [1] = { - .flags = IORESOURCE_IRQ, - } -}; - -static struct platform_device vrtc_device = { - .name = "rtc_mrst", - .id = -1, - .resource = vrtc_resources, - .num_resources = ARRAY_SIZE(vrtc_resources), -}; - -/* Register the RTC device if appropriate */ -static int __init mrst_device_create(void) -{ - /* No Moorestown, no device */ - if (!mrst_identify_cpu()) - return -ENODEV; - /* No timer, no device */ - if (!sfi_mrtc_num) - return -ENODEV; - - /* iomem resource */ - vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr; - vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr + - MRST_VRTC_MAP_SZ; - /* irq resource */ - vrtc_resources[1].start = sfi_mrtc_array[0].irq; - vrtc_resources[1].end = sfi_mrtc_array[0].irq; - - return platform_device_register(&vrtc_device); -} - -module_init(mrst_device_create); diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index fd332c533947..049f92a9379d 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o obj-$(CONFIG_OLPC_XO1_PM) += olpc-xo1-pm.o xo1-wakeup.o obj-$(CONFIG_OLPC_XO1_RTC) += olpc-xo1-rtc.o diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c index ff0174dda810..424eeae12759 100644 --- a/arch/x86/platform/olpc/olpc-xo1-pm.c +++ b/arch/x86/platform/olpc/olpc-xo1-pm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Support for power management features of the OLPC XO-1 laptop * @@ -5,18 +6,12 @@ * Copyright (C) 2010 One Laptop per Child * Copyright (C) 2006 Red Hat, Inc. * Copyright (C) 2006 Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/cs5535.h> #include <linux/platform_device.h> #include <linux/export.h> #include <linux/pm.h> -#include <linux/mfd/core.h> #include <linux/suspend.h> #include <linux/olpc-ec.h> @@ -75,9 +70,9 @@ static int xo1_power_state_enter(suspend_state_t pm_state) return 0; } -asmlinkage int xo1_do_sleep(u8 sleep_state) +asmlinkage __visible int xo1_do_sleep(u8 sleep_state) { - void *pgd_addr = __va(read_cr3()); + void *pgd_addr = __va(read_cr3_pa()); /* Program wakeup mask (using dword access to CS5536_PM1_EN) */ outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS); @@ -124,16 +119,11 @@ static const struct platform_suspend_ops xo1_suspend_ops = { static int xo1_pm_probe(struct platform_device *pdev) { struct resource *res; - int err; /* don't run on non-XOs */ if (!machine_is_olpc()) return -ENODEV; - err = mfd_cell_enable(pdev); - if (err) - return err; - res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!res) { dev_err(&pdev->dev, "can't fetch device resource info\n"); @@ -154,23 +144,19 @@ static int xo1_pm_probe(struct platform_device *pdev) return 0; } -static int xo1_pm_remove(struct platform_device *pdev) +static void xo1_pm_remove(struct platform_device *pdev) { - mfd_cell_disable(pdev); - if (strcmp(pdev->name, "cs5535-pms") == 0) pms_base = 0; else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0) acpi_base = 0; pm_power_off = NULL; - return 0; } static struct platform_driver cs5535_pms_driver = { .driver = { .name = "cs5535-pms", - .owner = THIS_MODULE, }, .probe = xo1_pm_probe, .remove = xo1_pm_remove, @@ -179,7 +165,6 @@ static struct platform_driver cs5535_pms_driver = { static struct platform_driver cs5535_acpi_driver = { .driver = { .name = "olpc-xo1-pm-acpi", - .owner = THIS_MODULE, }, .probe = xo1_pm_probe, .remove = xo1_pm_remove, diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c index a2b4efddd61a..ee77d57bcab7 100644 --- a/arch/x86/platform/olpc/olpc-xo1-rtc.c +++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Support for OLPC XO-1 Real Time Clock (RTC) * * Copyright (C) 2011 One Laptop per Child - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/mc146818rtc.h> @@ -16,6 +12,7 @@ #include <asm/msr.h> #include <asm/olpc.h> +#include <asm/x86_init.h> static void rtc_wake_on(struct device *dev) { @@ -67,14 +64,16 @@ static int __init xo1_rtc_init(void) of_node_put(node); pr_info("olpc-xo1-rtc: Initializing OLPC XO-1 RTC\n"); - rdmsrl(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm); - rdmsrl(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm); - rdmsrl(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century); + rdmsrq(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm); + rdmsrq(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm); + rdmsrq(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century); r = platform_device_register(&xo1_rtc_device); if (r) return r; + x86_platform.legacy.rtc = 0; + device_init_wakeup(&xo1_rtc_device.dev, 1); return 0; } diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c index 9a2e590dd202..30751b42d54e 100644 --- a/arch/x86/platform/olpc/olpc-xo1-sci.c +++ b/arch/x86/platform/olpc/olpc-xo1-sci.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Support for OLPC XO-1 System Control Interrupts (SCI) * * Copyright (C) 2010 One Laptop per Child * Copyright (C) 2006 Red Hat, Inc. * Copyright (C) 2006 Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/cs5535.h> @@ -18,8 +14,6 @@ #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/pm.h> -#include <linux/pm_wakeup.h> -#include <linux/mfd/core.h> #include <linux/power_supply.h> #include <linux/suspend.h> #include <linux/workqueue.h> @@ -57,21 +51,21 @@ static const char * const lid_wake_mode_names[] = { static void battery_status_changed(void) { - struct power_supply *psy = power_supply_get_by_name("olpc-battery"); + struct power_supply *psy = power_supply_get_by_name("olpc_battery"); if (psy) { power_supply_changed(psy); - put_device(psy->dev); + power_supply_put(psy); } } static void ac_status_changed(void) { - struct power_supply *psy = power_supply_get_by_name("olpc-ac"); + struct power_supply *psy = power_supply_get_by_name("olpc_ac"); if (psy) { power_supply_changed(psy); - put_device(psy->dev); + power_supply_put(psy); } } @@ -85,7 +79,7 @@ static void send_ebook_state(void) return; } - if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state) + if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state) return; /* Nothing new to report. */ input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state); @@ -109,7 +103,7 @@ static void detect_lid_state(void) * the edge detector hookup on the gpio inputs on the geode is * odd, to say the least. See http://dev.laptop.org/ticket/5703 * for details, but in a nutshell: we don't use the edge - * detectors. instead, we make use of an anomoly: with the both + * detectors. instead, we make use of an anomaly: with the both * edge detectors turned off, we still get an edge event on a * positive edge transition. to take advantage of this, we use the * front-end inverter to ensure that that's the edge we're always @@ -161,6 +155,12 @@ static ssize_t lid_wake_mode_set(struct device *dev, static DEVICE_ATTR(lid_wake_mode, S_IWUSR | S_IRUGO, lid_wake_mode_show, lid_wake_mode_set); +static struct attribute *lid_attrs[] = { + &dev_attr_lid_wake_mode.attr, + NULL, +}; +ATTRIBUTE_GROUPS(lid); + /* * Process all items in the EC's SCI queue. * @@ -325,7 +325,7 @@ static int setup_sci_interrupt(struct platform_device *pdev) dev_info(&pdev->dev, "SCI unmapped. Mapping to IRQ 3\n"); sci_irq = 3; lo |= 0x00300000; - wrmsrl(0x51400020, lo); + wrmsrq(0x51400020, lo); } /* Select level triggered in PIC */ @@ -514,17 +514,8 @@ static int setup_lid_switch(struct platform_device *pdev) goto err_register; } - r = device_create_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode); - if (r) { - dev_err(&pdev->dev, "failed to create wake mode attr: %d\n", r); - goto err_create_attr; - } - return 0; -err_create_attr: - input_unregister_device(lid_switch_idev); - lid_switch_idev = NULL; err_register: input_free_device(lid_switch_idev); return r; @@ -532,7 +523,6 @@ err_register: static void free_lid_switch(void) { - device_remove_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode); input_unregister_device(lid_switch_idev); } @@ -545,10 +535,6 @@ static int xo1_sci_probe(struct platform_device *pdev) if (!machine_is_olpc()) return -ENODEV; - r = mfd_cell_enable(pdev); - if (r) - return r; - res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!res) { dev_err(&pdev->dev, "can't fetch device resource info\n"); @@ -611,9 +597,8 @@ err_ebook: return r; } -static int xo1_sci_remove(struct platform_device *pdev) +static void xo1_sci_remove(struct platform_device *pdev) { - mfd_cell_disable(pdev); free_irq(sci_irq, pdev); cancel_work_sync(&sci_work); free_ec_sci(); @@ -622,12 +607,12 @@ static int xo1_sci_remove(struct platform_device *pdev) free_ebook_switch(); free_power_button(); acpi_base = 0; - return 0; } static struct platform_driver xo1_sci_driver = { .driver = { .name = "olpc-xo1-sci-acpi", + .dev_groups = lid_groups, }, .probe = xo1_sci_probe, .remove = xo1_sci_remove, diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index fef7d0ba7e3a..68244a3422d1 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Support for OLPC XO-1.5 System Control Interrupts (SCI) * * Copyright (C) 2009-2010 One Laptop per Child - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/device.h> @@ -15,8 +11,7 @@ #include <linux/power_supply.h> #include <linux/olpc-ec.h> -#include <acpi/acpi_bus.h> -#include <acpi/acpi_drivers.h> +#include <linux/acpi.h> #include <asm/olpc.h> #define DRV_NAME "olpc-xo15-sci" @@ -32,7 +27,7 @@ static bool lid_wake_on_close; * wake-on-close. This is implemented as standard by the XO-1.5 DSDT. * * We provide here a sysfs attribute that will additionally enable - * wake-on-close behavior. This is useful (e.g.) when we oportunistically + * wake-on-close behavior. This is useful (e.g.) when we opportunistically * suspend with the display running; if the lid is then closed, we want to * wake up to turn the display off. * @@ -40,18 +35,11 @@ static bool lid_wake_on_close; */ static int set_lid_wake_behavior(bool wake_on_close) { - struct acpi_object_list arg_list; - union acpi_object arg; acpi_status status; - arg_list.count = 1; - arg_list.pointer = &arg; - arg.type = ACPI_TYPE_INTEGER; - arg.integer.value = wake_on_close; - - status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL); + status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close); if (ACPI_FAILURE(status)) { - pr_warning(PFX "failed to set lid behavior\n"); + pr_warn(PFX "failed to set lid behavior\n"); return 1; } @@ -87,21 +75,21 @@ static struct kobj_attribute lid_wake_on_close_attr = static void battery_status_changed(void) { - struct power_supply *psy = power_supply_get_by_name("olpc-battery"); + struct power_supply *psy = power_supply_get_by_name("olpc_battery"); if (psy) { power_supply_changed(psy); - put_device(psy->dev); + power_supply_put(psy); } } static void ac_status_changed(void) { - struct power_supply *psy = power_supply_get_by_name("olpc-ac"); + struct power_supply *psy = power_supply_get_by_name("olpc_ac"); if (psy) { power_supply_changed(psy); - put_device(psy->dev); + power_supply_put(psy); } } @@ -195,15 +183,15 @@ err_sysfs: return r; } -static int xo15_sci_remove(struct acpi_device *device) +static void xo15_sci_remove(struct acpi_device *device) { acpi_disable_gpe(NULL, xo15_sci_gpe); acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); cancel_work_sync(&sci_work); sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr); - return 0; } +#ifdef CONFIG_PM_SLEEP static int xo15_sci_resume(struct device *dev) { /* Enable all EC events */ @@ -215,6 +203,7 @@ static int xo15_sci_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume); diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index 27376081ddec..1d4a00e767ec 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -1,18 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Support for the OLPC DCON and OLPC EC access * * Copyright © 2006 Advanced Micro Devices, Inc. * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/string.h> @@ -30,9 +26,6 @@ struct olpc_platform_t olpc_platform_info; EXPORT_SYMBOL_GPL(olpc_platform_info); -/* EC event mask to be applied during suspend (defining wakeup sources). */ -static u16 ec_wakeup_mask; - /* what the timeout *should* be (in ms) */ #define EC_BASE_TIMEOUT 20 @@ -186,83 +179,6 @@ err: return ret; } -void olpc_ec_wakeup_set(u16 value) -{ - ec_wakeup_mask |= value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set); - -void olpc_ec_wakeup_clear(u16 value) -{ - ec_wakeup_mask &= ~value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear); - -/* - * Returns true if the compile and runtime configurations allow for EC events - * to wake the system. - */ -bool olpc_ec_wakeup_available(void) -{ - if (!machine_is_olpc()) - return false; - - /* - * XO-1 EC wakeups are available when olpc-xo1-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO1_SCI - if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */ - return true; -#endif - - /* - * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO15_SCI - if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */ - return true; -#endif - - return false; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available); - -int olpc_ec_mask_write(u16 bits) -{ - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word = cpu_to_be16(bits); - return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2, - NULL, 0); - } else { - unsigned char ec_byte = bits & 0xff; - return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0); - } -} -EXPORT_SYMBOL_GPL(olpc_ec_mask_write); - -int olpc_ec_sci_query(u16 *sci_value) -{ - int ret; - - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word; - ret = olpc_ec_cmd(EC_EXT_SCI_QUERY, - NULL, 0, (void *) &ec_word, 2); - if (ret == 0) - *sci_value = be16_to_cpu(ec_word); - } else { - unsigned char ec_byte; - ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1); - if (ret == 0) - *sci_value = ec_byte; - } - - return ret; -} -EXPORT_SYMBOL_GPL(olpc_ec_sci_query); - static bool __init check_ofw_architecture(struct device_node *root) { const char *olpc_arch; @@ -296,6 +212,10 @@ static bool __init platform_detect(void) if (success) { olpc_platform_info.boardrev = get_board_revision(root); olpc_platform_info.flags |= OLPC_F_PRESENT; + + pr_info("OLPC board revision %s%X\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4); } of_node_put(root); @@ -311,33 +231,12 @@ static int __init add_xo1_platform_devices(void) return PTR_ERR(pdev); pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - return 0; + return PTR_ERR_OR_ZERO(pdev); } -static int olpc_xo1_ec_probe(struct platform_device *pdev) -{ - /* get the EC revision */ - olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, - (unsigned char *) &olpc_platform_info.ecver, 1); - - /* EC version 0x5f adds support for wide SCI mask */ - if (olpc_platform_info.ecver >= 0x5f) - olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI; - - pr_info("OLPC board revision %s%X (EC=%x)\n", - ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", - olpc_platform_info.boardrev >> 4, - olpc_platform_info.ecver); - - return 0; -} static int olpc_xo1_ec_suspend(struct platform_device *pdev) { - olpc_ec_mask_write(ec_wakeup_mask); - /* * Squelch SCIs while suspended. This is a fix for * <http://dev.laptop.org/ticket/1835>. @@ -361,15 +260,27 @@ static int olpc_xo1_ec_resume(struct platform_device *pdev) } static struct olpc_ec_driver ec_xo1_driver = { - .probe = olpc_xo1_ec_probe, .suspend = olpc_xo1_ec_suspend, .resume = olpc_xo1_ec_resume, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO1_SCI + /* + * XO-1 EC wakeups are available when olpc-xo1-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static struct olpc_ec_driver ec_xo1_5_driver = { - .probe = olpc_xo1_ec_probe, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO15_SCI + /* + * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static int __init olpc_init(void) diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index d6ee92986920..e108ce7dad6a 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * OLPC-specific OFW device tree support code. * @@ -9,17 +10,11 @@ * * Adapted for sparc by David S. Miller davem@davemloft.net * Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/of.h> -#include <linux/of_platform.h> #include <linux/of_pdt.h> #include <asm/olpc.h> #include <asm/olpc_ofw.h> @@ -136,13 +131,12 @@ void * __init prom_early_alloc(unsigned long size) const size_t chunk_size = max(PAGE_SIZE, size); /* - * To mimimize the number of allocations, grab at least + * To minimize the number of allocations, grab at least * PAGE_SIZE of memory (that's an arbitrary choice that's * fast enough on the platforms we care about while minimizing * wasted bootmem) and hand off chunks of it to callers. */ - res = alloc_bootmem(chunk_size); - BUG_ON(!res); + res = memblock_alloc_or_panic(chunk_size, SMP_CACHE_BYTES); prom_early_allocated += chunk_size; memset(res, 0, chunk_size); free_mem = chunk_size; @@ -218,10 +212,25 @@ static u32 __init olpc_dt_get_board_revision(void) return be32_to_cpu(rev); } -void __init olpc_dt_fixup(void) +static int __init olpc_dt_compatible_match(phandle node, const char *compat) +{ + char buf[64], *p; + int plen; + + plen = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf)); + if (plen <= 0) + return 0; + + for (p = buf; p < buf + plen; p += strlen(p) + 1) { + if (strcmp(p, compat) == 0) + return 1; + } + + return 0; +} + +static void __init olpc_dt_fixup(void) { - int r; - char buf[64]; phandle node; u32 board_rev; @@ -229,41 +238,66 @@ void __init olpc_dt_fixup(void) if (!node) return; - /* - * If the battery node has a compatible property, we are running a new - * enough firmware and don't have fixups to make. - */ - r = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf)); - if (r > 0) - return; - - pr_info("PROM DT: Old firmware detected, applying fixes\n"); - - /* Add olpc,xo1-battery compatible marker to battery node */ - olpc_dt_interpret("\" /battery@0\" find-device" - " \" olpc,xo1-battery\" +compatible" - " device-end"); - board_rev = olpc_dt_get_board_revision(); if (!board_rev) return; if (board_rev >= olpc_board_pre(0xd0)) { - /* XO-1.5: add dcon device */ - olpc_dt_interpret("\" /pci/display@1\" find-device" - " new-device" - " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible" - " finish-device device-end"); + /* XO-1.5 */ + + if (olpc_dt_compatible_match(node, "olpc,xo1.5-battery")) + return; + + /* Add olpc,xo1.5-battery compatible marker to battery node */ + olpc_dt_interpret("\" /battery@0\" find-device"); + olpc_dt_interpret(" \" olpc,xo1.5-battery\" +compatible"); + olpc_dt_interpret("device-end"); + + if (olpc_dt_compatible_match(node, "olpc,xo1-battery")) { + /* + * If we have a olpc,xo1-battery compatible, then we're + * running a new enough firmware that already has + * the dcon node. + */ + return; + } + + /* Add dcon device */ + olpc_dt_interpret("\" /pci/display@1\" find-device"); + olpc_dt_interpret(" new-device"); + olpc_dt_interpret(" \" dcon\" device-name"); + olpc_dt_interpret(" \" olpc,xo1-dcon\" +compatible"); + olpc_dt_interpret(" finish-device"); + olpc_dt_interpret("device-end"); } else { - /* XO-1: add dcon device, mark RTC as olpc,xo1-rtc */ - olpc_dt_interpret("\" /pci/display@1,1\" find-device" - " new-device" - " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible" - " finish-device device-end" - " \" /rtc\" find-device" - " \" olpc,xo1-rtc\" +compatible" - " device-end"); + /* XO-1 */ + + if (olpc_dt_compatible_match(node, "olpc,xo1-battery")) { + /* + * If we have a olpc,xo1-battery compatible, then we're + * running a new enough firmware that already has + * the dcon and RTC nodes. + */ + return; + } + + /* Add dcon device, mark RTC as olpc,xo1-rtc */ + olpc_dt_interpret("\" /pci/display@1,1\" find-device"); + olpc_dt_interpret(" new-device"); + olpc_dt_interpret(" \" dcon\" device-name"); + olpc_dt_interpret(" \" olpc,xo1-dcon\" +compatible"); + olpc_dt_interpret(" finish-device"); + olpc_dt_interpret("device-end"); + + olpc_dt_interpret("\" /rtc\" find-device"); + olpc_dt_interpret(" \" olpc,xo1-rtc\" +compatible"); + olpc_dt_interpret("device-end"); } + + /* Add olpc,xo1-battery compatible marker to battery node */ + olpc_dt_interpret("\" /battery@0\" find-device"); + olpc_dt_interpret(" \" olpc,xo1-battery\" +compatible"); + olpc_dt_interpret("device-end"); } void __init olpc_dt_build_devicetree(void) @@ -285,20 +319,3 @@ void __init olpc_dt_build_devicetree(void) pr_info("PROM DT: Built device tree with %u bytes of memory.\n", prom_early_allocated); } - -/* A list of DT node/bus matches that we want to expose as platform devices */ -static struct of_device_id __initdata of_ids[] = { - { .compatible = "olpc,xo1-battery" }, - { .compatible = "olpc,xo1-dcon" }, - { .compatible = "olpc,xo1-rtc" }, - {}, -}; - -static int __init olpc_create_platform_devices(void) -{ - if (machine_is_olpc()) - return of_platform_bus_probe(NULL, of_ids, NULL); - else - return 0; -} -device_initcall(olpc_create_platform_devices); diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c index e7604f62870d..6bab0f0aa8f3 100644 --- a/arch/x86/platform/olpc/olpc_ofw.c +++ b/arch/x86/platform/olpc/olpc_ofw.c @@ -1,10 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/spinlock_types.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/page.h> #include <asm/setup.h> #include <asm/io.h> -#include <asm/pgtable.h> +#include <asm/cpufeature.h> +#include <asm/special_insns.h> #include <asm/olpc_ofw.h> /* address of OFW callback interface; will be NULL if OFW isn't found */ diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S index 948deb289753..3a5abffe5660 100644 --- a/arch/x86/platform/olpc/xo1-wakeup.S +++ b/arch/x86/platform/olpc/xo1-wakeup.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ .text #include <linux/linkage.h> #include <asm/segment.h> @@ -76,7 +77,7 @@ save_registers: pushfl popl saved_context_eflags - ret + RET restore_registers: movl saved_context_ebp, %ebp @@ -87,9 +88,9 @@ restore_registers: pushl saved_context_eflags popfl - ret + RET -ENTRY(do_olpc_suspend_lowlevel) +SYM_CODE_START(do_olpc_suspend_lowlevel) call save_processor_state call save_registers @@ -108,7 +109,8 @@ ret_point: call restore_registers call restore_processor_state - ret + RET +SYM_CODE_END(do_olpc_suspend_lowlevel) .data saved_gdt: .long 0,0 diff --git a/arch/x86/platform/pvh/Makefile b/arch/x86/platform/pvh/Makefile new file mode 100644 index 000000000000..c43fb7964dc4 --- /dev/null +++ b/arch/x86/platform/pvh/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +OBJECT_FILES_NON_STANDARD_head.o := y +KASAN_SANITIZE := n + +obj-$(CONFIG_PVH) += enlighten.o +obj-$(CONFIG_PVH) += head.o diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c new file mode 100644 index 000000000000..2263885d16ba --- /dev/null +++ b/arch/x86/platform/pvh/enlighten.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/acpi.h> + +#include <xen/hvc-console.h> + +#include <asm/bootparam.h> +#include <asm/io_apic.h> +#include <asm/hypervisor.h> +#include <asm/e820/api.h> +#include <asm/x86_init.h> + +#include <asm/xen/interface.h> + +#include <xen/xen.h> +#include <xen/interface/hvm/start_info.h> + +/* + * PVH variables. + * + * pvh_bootparams and pvh_start_info need to live in a data segment since + * they are used after startup_{32|64}, which clear .bss, are invoked. + */ +struct boot_params __initdata pvh_bootparams; +struct hvm_start_info __initdata pvh_start_info; + +const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info); + +static u64 __init pvh_get_root_pointer(void) +{ + return pvh_start_info.rsdp_paddr; +} + +/* + * Xen guests are able to obtain the memory map from the hypervisor via the + * HYPERVISOR_memory_op hypercall. + * If we are trying to boot a Xen PVH guest, it is expected that the kernel + * will have been configured to provide an override for this routine to do + * just that. + */ +void __init __weak mem_map_via_hcall(struct boot_params *ptr __maybe_unused) +{ + xen_raw_printk("Error: Could not find memory map\n"); + BUG(); +} + +static void __init init_pvh_bootparams(bool xen_guest) +{ + if ((pvh_start_info.version > 0) && (pvh_start_info.memmap_entries)) { + struct hvm_memmap_table_entry *ep; + int i; + + ep = __va(pvh_start_info.memmap_paddr); + pvh_bootparams.e820_entries = pvh_start_info.memmap_entries; + + for (i = 0; i < pvh_bootparams.e820_entries ; i++, ep++) { + pvh_bootparams.e820_table[i].addr = ep->addr; + pvh_bootparams.e820_table[i].size = ep->size; + pvh_bootparams.e820_table[i].type = ep->type; + } + } else if (xen_guest) { + mem_map_via_hcall(&pvh_bootparams); + } else { + /* Non-xen guests are not supported by version 0 */ + BUG(); + } + + if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) { + pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr = + ISA_START_ADDRESS; + pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size = + ISA_END_ADDRESS - ISA_START_ADDRESS; + pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type = + E820_TYPE_RESERVED; + pvh_bootparams.e820_entries++; + } else + xen_raw_printk("Warning: Can fit ISA range into e820\n"); + + pvh_bootparams.hdr.cmd_line_ptr = + pvh_start_info.cmdline_paddr; + + /* The first module is always ramdisk. */ + if (pvh_start_info.nr_modules) { + struct hvm_modlist_entry *modaddr = + __va(pvh_start_info.modlist_paddr); + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr; + pvh_bootparams.hdr.ramdisk_size = modaddr->size; + } + + /* + * See Documentation/arch/x86/boot.rst. + * + * Version 2.12 supports Xen entry point but we will use default x86/PC + * environment (i.e. hardware_subarch 0). + */ + pvh_bootparams.hdr.version = (2 << 8) | 12; + pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0; + + x86_init.acpi.get_root_pointer = pvh_get_root_pointer; +} + +/* + * If we are trying to boot a Xen PVH guest, it is expected that the kernel + * will have been configured to provide the required override for this routine. + */ +void __init __weak xen_pvh_init(struct boot_params *boot_params) +{ + xen_raw_printk("Error: Missing xen PVH initialization\n"); + BUG(); +} + +static void __init hypervisor_specific_init(bool xen_guest) +{ + if (xen_guest) + xen_pvh_init(&pvh_bootparams); +} + +/* + * This routine (and those that it might call) should not use + * anything that lives in .bss since that segment will be cleared later. + */ +void __init xen_prepare_pvh(void) +{ + + u32 msr = xen_cpuid_base(); + bool xen_guest = !!msr; + + if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) { + xen_raw_printk("Error: Unexpected magic value (0x%08x)\n", + pvh_start_info.magic); + BUG(); + } + + /* + * This must not compile to "call memset" because memset() may be + * instrumented. + */ + __builtin_memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); + + hypervisor_specific_init(xen_guest); + + init_pvh_bootparams(xen_guest); +} diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S new file mode 100644 index 000000000000..344030c1a81d --- /dev/null +++ b/arch/x86/platform/pvh/head.S @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. + */ + + .code32 + .text +#ifdef CONFIG_X86_32 +#define _pa(x) ((x) - __START_KERNEL_map) +#endif +#define rva(x) ((x) - pvh_start_xen) + +#include <linux/elfnote.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/desc_defs.h> +#include <asm/segment.h> +#include <asm/asm.h> +#include <asm/boot.h> +#include <asm/pgtable.h> +#include <asm/processor-flags.h> +#include <asm/msr.h> +#include <asm/nospec-branch.h> +#include <xen/interface/elfnote.h> + + __INIT + +/* + * Entry point for PVH guests. + * + * Xen ABI specifies the following register state when we come here: + * + * - `ebx`: contains the physical memory address where the loader has placed + * the boot start info structure. + * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. + * - `cr4`: all bits are cleared. + * - `cs `: must be a 32-bit read/execute code segment with a base of `0` + * and a limit of `0xFFFFFFFF`. The selector value is unspecified. + * - `ds`, `es`: must be a 32-bit read/write data segment with a base of + * `0` and a limit of `0xFFFFFFFF`. The selector values are all + * unspecified. + * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit + * of '0x67'. + * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. + * Bit 8 (TF) must be cleared. Other bits are all unspecified. + * + * All other processor registers and flag bits are unspecified. The OS is in + * charge of setting up its own stack, GDT and IDT. + */ + +#define PVH_GDT_ENTRY_CS 1 +#define PVH_GDT_ENTRY_DS 2 +#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) +#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) + +SYM_CODE_START(pvh_start_xen) + UNWIND_HINT_END_OF_STACK + cld + + /* + * See the comment for startup_32 for more details. We need to + * execute a call to get the execution address to be position + * independent, but we don't have a stack. Save and restore the + * magic field of start_info in ebx, and use that as the stack. + */ + mov (%ebx), %eax + leal 4(%ebx), %esp + ANNOTATE_INTRA_FUNCTION_CALL + call 1f +1: popl %ebp + mov %eax, (%ebx) + subl $rva(1b), %ebp + movl $0, %esp + + leal rva(gdt)(%ebp), %eax + addl %eax, 2(%eax) + lgdt (%eax) + + mov $PVH_DS_SEL,%eax + mov %eax,%ds + mov %eax,%es + mov %eax,%ss + + /* Stash hvm_start_info. */ + leal rva(pvh_start_info)(%ebp), %edi + mov %ebx, %esi + movl rva(pvh_start_info_sz)(%ebp), %ecx + shr $2,%ecx + rep movsl + + leal rva(early_stack_end)(%ebp), %esp + + /* Enable PAE mode. */ + mov %cr4, %eax + orl $X86_CR4_PAE, %eax + mov %eax, %cr4 + +#ifdef CONFIG_X86_64 + /* Enable Long mode. */ + mov $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* + * Reuse the non-relocatable symbol emitted for the ELF note to + * subtract the build time physical address of pvh_start_xen() from + * its actual runtime address, without relying on absolute 32-bit ELF + * relocations, as these are not supported by the linker when running + * in -pie mode, and should be avoided in .head.text in general. + */ + mov %ebp, %ebx + subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx + jz .Lpagetable_done + + /* + * Store the resulting load offset in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movl %ebx, rva(phys_base)(%ebp) + + /* Fixup page-tables for relocation. */ + leal rva(pvh_init_top_pgt)(%ebp), %edi + movl $PTRS_PER_PGD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + + /* L3 ident has a single entry. */ + leal rva(pvh_level3_ident_pgt)(%ebp), %edi + addl %ebx, 0x00(%edi) + + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi + addl %ebx, (PAGE_SIZE - 16)(%edi) + addl %ebx, (PAGE_SIZE - 8)(%edi) + + /* pvh_level2_ident_pgt is fine - large pages */ + + /* pvh_level2_kernel_pgt needs adjustment - large pages */ + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi + movl $PTRS_PER_PMD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + +.Lpagetable_done: + /* Enable pre-constructed page tables. */ + leal rva(pvh_init_top_pgt)(%ebp), %eax + mov %eax, %cr3 + mov $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + /* Jump to 64-bit mode. */ + pushl $PVH_CS_SEL + leal rva(1f)(%ebp), %eax + pushl %eax + lretl + + /* 64-bit entry point. */ + .code64 +1: + UNWIND_HINT_END_OF_STACK + + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ + movl $MSR_GS_BASE,%ecx + xorl %eax, %eax + xorl %edx, %edx + wrmsr + + /* Call xen_prepare_pvh() via the kernel virtual mapping */ + leaq xen_prepare_pvh(%rip), %rax + subq phys_base(%rip), %rax + addq $__START_KERNEL_map, %rax + ANNOTATE_RETPOLINE_SAFE + call *%rax + + /* startup_64 expects boot_params in %rsi. */ + lea pvh_bootparams(%rip), %rsi + jmp startup_64 + +#else /* CONFIG_X86_64 */ + + call mk_early_pgtbl_32 + + mov $_pa(initial_page_table), %eax + mov %eax, %cr3 + + mov %cr0, %eax + or $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + ljmp $PVH_CS_SEL, $1f +1: + call xen_prepare_pvh + mov $_pa(pvh_bootparams), %esi + + /* startup_32 doesn't expect paging and PAE to be on. */ + ljmp $PVH_CS_SEL, $_pa(2f) +2: + mov %cr0, %eax + and $~X86_CR0_PG, %eax + mov %eax, %cr0 + mov %cr4, %eax + and $~X86_CR4_PAE, %eax + mov %eax, %cr4 + + ljmp $PVH_CS_SEL, $_pa(startup_32) +#endif +SYM_CODE_END(pvh_start_xen) + + .section ".init.data","aw" + .balign 8 +SYM_DATA_START_LOCAL(gdt) + .word gdt_end - gdt_start - 1 + .long gdt_start - gdt + .word 0 +SYM_DATA_END(gdt) +SYM_DATA_START_LOCAL(gdt_start) + .quad 0x0000000000000000 /* NULL descriptor */ +#ifdef CONFIG_X86_64 + .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */ +#else + .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */ +#endif + .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */ +SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) + + .balign 16 +SYM_DATA_START_LOCAL(early_stack) + .fill BOOT_STACK_SIZE, 1, 0 +SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) + +#ifdef CONFIG_X86_64 +/* + * Xen PVH needs a set of identity mapped and kernel high mapping + * page tables. pvh_start_xen starts running on the identity mapped + * page tables, but xen_prepare_pvh calls into the high mapping. + * These page tables need to be relocatable and are only used until + * startup_64 transitions to init_top_pgt. + */ +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(pvh_init_top_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .fill 511, 8, 0 +SYM_DATA_END(pvh_level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) + /* + * Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(pvh_level2_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) + .fill L3_START_KERNEL, 8, 0 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .quad 0 /* no fixmap */ +SYM_DATA_END(pvh_level3_kernel_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) + /* + * Kernel high mapping. + * + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, + * 512 MiB otherwise. + * + * (NOTE: after that starts the module area, see MODULES_VADDR.) + * + * This table is eventually used by the kernel during normal runtime. + * Care must be taken to clear out undesired bits later, like _PAGE_RW + * or _PAGE_GLOBAL in some cases. + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) +SYM_DATA_END(pvh_level2_kernel_pgt) + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, + .long CONFIG_PHYSICAL_ALIGN; + .long LOAD_PHYSICAL_ADDR; + .long KERNEL_IMAGE_SIZE - 1) +#endif + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry; + xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .) diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile index 762b4c7f4314..981b3e4302e6 100644 --- a/arch/x86/platform/scx200/Makefile +++ b/arch/x86/platform/scx200/Makefile @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c index 3dc9aee41d91..80662b72035d 100644 --- a/arch/x86/platform/scx200/scx200_32.c +++ b/arch/x86/platform/scx200/scx200_32.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> * diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile deleted file mode 100644 index cc5db1168a5e..000000000000 --- a/arch/x86/platform/sfi/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_SFI) += sfi.o diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c deleted file mode 100644 index bcd1a703e3e6..000000000000 --- a/arch/x86/platform/sfi/sfi.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * sfi.c - x86 architecture SFI support. - * - * Copyright (c) 2009, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#define KMSG_COMPONENT "SFI" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/acpi.h> -#include <linux/init.h> -#include <linux/sfi.h> -#include <linux/io.h> - -#include <asm/io_apic.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/apic.h> - -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; - -/* All CPUs enumerated by SFI must be present and enabled */ -static void __init mp_sfi_register_lapic(u8 id) -{ - if (MAX_LOCAL_APIC - id <= 0) { - pr_warning("Processor #%d invalid (max %d)\n", - id, MAX_LOCAL_APIC); - return; - } - - pr_info("registering lapic[%d]\n", id); - - generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); -} - -static int __init sfi_parse_cpus(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_cpu_table_entry *pentry; - int i; - int cpu_num; - - sb = (struct sfi_table_simple *)table; - cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry); - pentry = (struct sfi_cpu_table_entry *)sb->pentry; - - for (i = 0; i < cpu_num; i++) { - mp_sfi_register_lapic(pentry->apic_id); - pentry++; - } - - smp_found_config = 1; - return 0; -} -#endif /* CONFIG_X86_LOCAL_APIC */ - -#ifdef CONFIG_X86_IO_APIC - -static int __init sfi_parse_ioapic(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_apic_table_entry *pentry; - int i, num; - - sb = (struct sfi_table_simple *)table; - num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); - pentry = (struct sfi_apic_table_entry *)sb->pentry; - - for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_top); - pentry++; - } - - WARN(pic_mode, KERN_WARNING - "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n"); - pic_mode = 0; - return 0; -} -#endif /* CONFIG_X86_IO_APIC */ - -/* - * sfi_platform_init(): register lapics & io-apics - */ -int __init sfi_platform_init(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - register_lapic_address(sfi_lapic_addr); - sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus); -#endif -#ifdef CONFIG_X86_IO_APIC - sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic); -#endif - return 0; -} diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile index c54e348c96a7..910fe9e3ffb4 100644 --- a/arch/x86/platform/ts5500/Makefile +++ b/arch/x86/platform/ts5500/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_TS5500) += ts5500.o diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c index 39febb214e8c..0b67da056fd9 100644 --- a/arch/x86/platform/ts5500/ts5500.c +++ b/arch/x86/platform/ts5500/ts5500.c @@ -1,30 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Technologic Systems TS-5500 Single Board Computer support * - * Copyright (C) 2013 Savoir-faire Linux Inc. + * Copyright (C) 2013-2014 Savoir-faire Linux Inc. * Vivien Didelot <vivien.didelot@savoirfairelinux.com> * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) any later - * version. - * - * * This driver registers the Technologic Systems TS-5500 Single Board Computer * (SBC) and its devices, and exposes information to userspace such as jumpers' * state or available options. For further information about sysfs entries, see * Documentation/ABI/testing/sysfs-platform-ts5500. * - * This code actually supports the TS-5500 platform, but it may be extended to - * support similar Technologic Systems x86-based platforms, such as the TS-5600. + * This code may be extended to support similar x86-based platforms. + * Actually, the TS-5500 and TS-5400 are supported. */ #include <linux/delay.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/leds.h> -#include <linux/module.h> -#include <linux/platform_data/gpio-ts5500.h> +#include <linux/init.h> #include <linux/platform_data/max197.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -32,6 +26,7 @@ /* Product code register */ #define TS5500_PRODUCT_CODE_ADDR 0x74 #define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */ +#define TS5400_PRODUCT_CODE 0x40 /* TS-5400 product code */ /* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */ #define TS5500_SRAM_RS485_ADC_ADDR 0x75 @@ -66,6 +61,7 @@ /** * struct ts5500_sbc - TS-5500 board description + * @name: Board model name. * @id: Board product ID. * @sram: Flag for SRAM option. * @rs485: Flag for RS-485 option. @@ -75,6 +71,7 @@ * @jumpers: Bitfield for jumpers' state. */ struct ts5500_sbc { + const char *name; int id; bool sram; bool rs485; @@ -88,7 +85,7 @@ struct ts5500_sbc { static const struct { const char * const string; const ssize_t offset; -} ts5500_signatures[] __initdata = { +} ts5500_signatures[] __initconst = { { "TS-5x00 AMD Elan", 0xb14 }, }; @@ -122,13 +119,16 @@ static int __init ts5500_detect_config(struct ts5500_sbc *sbc) if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500")) return -EBUSY; - tmp = inb(TS5500_PRODUCT_CODE_ADDR); - if (tmp != TS5500_PRODUCT_CODE) { - pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp); + sbc->id = inb(TS5500_PRODUCT_CODE_ADDR); + if (sbc->id == TS5500_PRODUCT_CODE) { + sbc->name = "TS-5500"; + } else if (sbc->id == TS5400_PRODUCT_CODE) { + sbc->name = "TS-5400"; + } else { + pr_err("ts5500: unknown product code 0x%x\n", sbc->id); ret = -ENODEV; goto cleanup; } - sbc->id = tmp; tmp = inb(TS5500_SRAM_RS485_ADC_ADDR); sbc->sram = tmp & TS5500_SRAM; @@ -147,48 +147,52 @@ cleanup: return ret; } -static ssize_t ts5500_show_id(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t name_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct ts5500_sbc *sbc = dev_get_drvdata(dev); - return sprintf(buf, "0x%.2x\n", sbc->id); + return sprintf(buf, "%s\n", sbc->name); } +static DEVICE_ATTR_RO(name); -static ssize_t ts5500_show_jumpers(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t id_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct ts5500_sbc *sbc = dev_get_drvdata(dev); - return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1); + return sprintf(buf, "0x%.2x\n", sbc->id); } +static DEVICE_ATTR_RO(id); -#define TS5500_SHOW(field) \ - static ssize_t ts5500_show_##field(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ - { \ - struct ts5500_sbc *sbc = dev_get_drvdata(dev); \ - return sprintf(buf, "%d\n", sbc->field); \ - } - -TS5500_SHOW(sram) -TS5500_SHOW(rs485) -TS5500_SHOW(adc) -TS5500_SHOW(ereset) -TS5500_SHOW(itr) +static ssize_t jumpers_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); -static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL); -static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL); -static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL); -static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL); -static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL); -static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL); -static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL); + return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1); +} +static DEVICE_ATTR_RO(jumpers); + +#define TS5500_ATTR_BOOL(_field) \ + static ssize_t _field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ + { \ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); \ + \ + return sprintf(buf, "%d\n", sbc->_field); \ + } \ + static DEVICE_ATTR_RO(_field) + +TS5500_ATTR_BOOL(sram); +TS5500_ATTR_BOOL(rs485); +TS5500_ATTR_BOOL(adc); +TS5500_ATTR_BOOL(ereset); +TS5500_ATTR_BOOL(itr); static struct attribute *ts5500_attributes[] = { &dev_attr_id.attr, + &dev_attr_name.attr, &dev_attr_jumpers.attr, &dev_attr_sram.attr, &dev_attr_rs485.attr, @@ -311,12 +315,14 @@ static int __init ts5500_init(void) if (err) goto error; - ts5500_dio1_pdev.dev.parent = &pdev->dev; - if (platform_device_register(&ts5500_dio1_pdev)) - dev_warn(&pdev->dev, "DIO1 block registration failed\n"); - ts5500_dio2_pdev.dev.parent = &pdev->dev; - if (platform_device_register(&ts5500_dio2_pdev)) - dev_warn(&pdev->dev, "DIO2 block registration failed\n"); + if (sbc->id == TS5500_PRODUCT_CODE) { + ts5500_dio1_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio1_pdev)) + dev_warn(&pdev->dev, "DIO1 block registration failed\n"); + ts5500_dio2_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio2_pdev)) + dev_warn(&pdev->dev, "DIO2 block registration failed\n"); + } if (led_classdev_register(&pdev->dev, &ts5500_led_cdev)) dev_warn(&pdev->dev, "LED registration failed\n"); @@ -333,7 +339,3 @@ error: return err; } device_initcall(ts5500_init); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>"); -MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver"); diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile index 6c40995fefb8..1441dda8edf7 100644 --- a/arch/x86/platform/uv/Makefile +++ b/arch/x86/platform/uv/Makefile @@ -1 +1,2 @@ -obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_time.o uv_nmi.o diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 766612137a62..bf31af3d32d6 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -1,76 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * BIOS run time interface routines. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson <rja@sgi.com> + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) Russ Anderson <rja@sgi.com> */ #include <linux/efi.h> #include <linux/export.h> +#include <linux/slab.h> #include <asm/efi.h> #include <linux/io.h> +#include <asm/pgalloc.h> #include <asm/uv/bios.h> #include <asm/uv/uv_hub.h> -static struct uv_systab uv_systab; +unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR; -s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +struct uv_systab *uv_systab; + +static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { - struct uv_systab *tab = &uv_systab; + struct uv_systab *tab = uv_systab; s64 ret; - if (!tab->function) + if (!tab || !tab->function) /* * BIOS does not support UV systab */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call6((void *)__va(tab->function), (u64)which, - a1, a2, a3, a4, a5); + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + return ret; } -EXPORT_SYMBOL_GPL(uv_bios_call); -s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, - u64 a4, u64 a5) +static s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, + u64 a5) { - unsigned long bios_flags; s64 ret; - local_irq_save(bios_flags); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); - local_irq_restore(bios_flags); + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); + up(&__efi_uv_runtime_lock); return ret; } -s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, - u64 a4, u64 a5) +static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { + unsigned long bios_flags; s64 ret; - preempt_disable(); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); - preempt_enable(); + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + + local_irq_save(bios_flags); + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + up(&__efi_uv_runtime_lock); return ret; } - long sn_partition_id; EXPORT_SYMBOL_GPL(sn_partition_id); long sn_coherency_id; @@ -78,10 +76,7 @@ EXPORT_SYMBOL_GPL(sn_coherency_id); long sn_region_size; EXPORT_SYMBOL_GPL(sn_region_size); long system_serial_number; -EXPORT_SYMBOL_GPL(system_serial_number); int uv_type; -EXPORT_SYMBOL_GPL(uv_type); - s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, long *region, long *ssn) @@ -108,7 +103,6 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, *ssn = v1; return ret; } -EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); int uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, @@ -149,11 +143,8 @@ EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); s64 uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) { - s64 ret; - - ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, - (u64)addr, buf, (u64)len, 0); - return ret; + return uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, + (u64)addr, buf, (u64)len, 0); } EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); @@ -162,7 +153,6 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, (u64)ticks_per_second, 0, 0, 0); } -EXPORT_SYMBOL_GPL(uv_bios_freq_base); /* * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target @@ -181,36 +171,99 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, (u64)decode, (u64)domain, (u64)bus, 0, 0); } -EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); +extern s64 uv_bios_get_master_nasid(u64 size, u64 *master_nasid) +{ + return uv_bios_call(UV_BIOS_EXTRA, 0, UV_BIOS_EXTRA_MASTER_NASID, 0, + size, (u64)master_nasid); +} +EXPORT_SYMBOL_GPL(uv_bios_get_master_nasid); -#ifdef CONFIG_EFI -void uv_bios_init(void) +extern s64 uv_bios_get_heapsize(u64 nasid, u64 size, u64 *heap_size) { - struct uv_systab *tab; + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_GET_HEAPSIZE, + 0, size, (u64)heap_size); +} +EXPORT_SYMBOL_GPL(uv_bios_get_heapsize); - if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || - (efi.uv_systab == (unsigned long)NULL)) { - printk(KERN_CRIT "No EFI UV System Table.\n"); - uv_systab.function = (unsigned long)NULL; - return; - } +extern s64 uv_bios_install_heap(u64 nasid, u64 heap_size, u64 *bios_heap) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_INSTALL_HEAP, + 0, heap_size, (u64)bios_heap); +} +EXPORT_SYMBOL_GPL(uv_bios_install_heap); - tab = (struct uv_systab *)ioremap(efi.uv_systab, - sizeof(struct uv_systab)); - if (strncmp(tab->signature, "UVST", 4) != 0) - printk(KERN_ERR "bad signature in UV system table!"); +extern s64 uv_bios_obj_count(u64 nasid, u64 size, u64 *objcnt) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_OBJECT_COUNT, + 0, size, (u64)objcnt); +} +EXPORT_SYMBOL_GPL(uv_bios_obj_count); - /* - * Copy table to permanent spot for later use. - */ - memcpy(&uv_systab, tab, sizeof(struct uv_systab)); - iounmap(tab); +extern s64 uv_bios_enum_objs(u64 nasid, u64 size, u64 *objbuf) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_OBJECTS, + 0, size, (u64)objbuf); +} +EXPORT_SYMBOL_GPL(uv_bios_enum_objs); + +extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 size, u64 *portbuf) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_PORTS, + obj_id, size, (u64)portbuf); +} +EXPORT_SYMBOL_GPL(uv_bios_enum_ports); + +extern s64 uv_bios_get_geoinfo(u64 nasid, u64 size, u64 *buf) +{ + return uv_bios_call(UV_BIOS_GET_GEOINFO, nasid, (u64)buf, size, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_get_geoinfo); + +extern s64 uv_bios_get_pci_topology(u64 size, u64 *buf) +{ + return uv_bios_call(UV_BIOS_GET_PCI_TOPOLOGY, (u64)buf, size, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_get_pci_topology); - printk(KERN_INFO "EFI UV System Table Revision %d\n", - uv_systab.revision); +unsigned long get_uv_systab_phys(bool msg) +{ + if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) || + !uv_systab_phys || efi_runtime_disabled()) { + if (msg) + pr_crit("UV: UVsystab: missing\n"); + return 0; + } + return uv_systab_phys; } -#else /* !CONFIG_EFI */ -void uv_bios_init(void) { } -#endif +int uv_bios_init(void) +{ + unsigned long uv_systab_phys_addr; + + uv_systab = NULL; + uv_systab_phys_addr = get_uv_systab_phys(1); + if (!uv_systab_phys_addr) + return -EEXIST; + + uv_systab = ioremap(uv_systab_phys_addr, sizeof(struct uv_systab)); + if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) { + pr_err("UV: UVsystab: bad signature!\n"); + iounmap(uv_systab); + return -EINVAL; + } + + /* Starting with UV4 the UV systab size is variable */ + if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) { + int size = uv_systab->size; + + iounmap(uv_systab); + uv_systab = ioremap(uv_systab_phys_addr, size); + if (!uv_systab) { + pr_err("UV: UVsystab: ioremap(%d) failed!\n", size); + return -EFAULT; + } + } + pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); + return 0; +} diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c deleted file mode 100644 index 0f92173a12b6..000000000000 --- a/arch/x86/platform/uv/tlb_uv.c +++ /dev/null @@ -1,2149 +0,0 @@ -/* - * SGI UltraViolet TLB flush routines. - * - * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI. - * - * This code is released under the GNU General Public License version 2 or - * later. - */ -#include <linux/seq_file.h> -#include <linux/proc_fs.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/delay.h> - -#include <asm/mmu_context.h> -#include <asm/uv/uv.h> -#include <asm/uv/uv_mmrs.h> -#include <asm/uv/uv_hub.h> -#include <asm/uv/uv_bau.h> -#include <asm/apic.h> -#include <asm/idle.h> -#include <asm/tsc.h> -#include <asm/irq_vectors.h> -#include <asm/timer.h> - -/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ -static int timeout_base_ns[] = { - 20, - 160, - 1280, - 10240, - 81920, - 655360, - 5242880, - 167772160 -}; - -static int timeout_us; -static int nobau; -static int nobau_perm; -static cycles_t congested_cycles; - -/* tunables: */ -static int max_concurr = MAX_BAU_CONCURRENT; -static int max_concurr_const = MAX_BAU_CONCURRENT; -static int plugged_delay = PLUGGED_DELAY; -static int plugsb4reset = PLUGSB4RESET; -static int giveup_limit = GIVEUP_LIMIT; -static int timeoutsb4reset = TIMEOUTSB4RESET; -static int ipi_reset_limit = IPI_RESET_LIMIT; -static int complete_threshold = COMPLETE_THRESHOLD; -static int congested_respns_us = CONGESTED_RESPONSE_US; -static int congested_reps = CONGESTED_REPS; -static int disabled_period = DISABLED_PERIOD; - -static struct tunables tunables[] = { - {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ - {&plugged_delay, PLUGGED_DELAY}, - {&plugsb4reset, PLUGSB4RESET}, - {&timeoutsb4reset, TIMEOUTSB4RESET}, - {&ipi_reset_limit, IPI_RESET_LIMIT}, - {&complete_threshold, COMPLETE_THRESHOLD}, - {&congested_respns_us, CONGESTED_RESPONSE_US}, - {&congested_reps, CONGESTED_REPS}, - {&disabled_period, DISABLED_PERIOD}, - {&giveup_limit, GIVEUP_LIMIT} -}; - -static struct dentry *tunables_dir; -static struct dentry *tunables_file; - -/* these correspond to the statistics printed by ptc_seq_show() */ -static char *stat_description[] = { - "sent: number of shootdown messages sent", - "stime: time spent sending messages", - "numuvhubs: number of hubs targeted with shootdown", - "numuvhubs16: number times 16 or more hubs targeted", - "numuvhubs8: number times 8 or more hubs targeted", - "numuvhubs4: number times 4 or more hubs targeted", - "numuvhubs2: number times 2 or more hubs targeted", - "numuvhubs1: number times 1 hub targeted", - "numcpus: number of cpus targeted with shootdown", - "dto: number of destination timeouts", - "retries: destination timeout retries sent", - "rok: : destination timeouts successfully retried", - "resetp: ipi-style resource resets for plugs", - "resett: ipi-style resource resets for timeouts", - "giveup: fall-backs to ipi-style shootdowns", - "sto: number of source timeouts", - "bz: number of stay-busy's", - "throt: number times spun in throttle", - "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", - "recv: shootdown messages received", - "rtime: time spent processing messages", - "all: shootdown all-tlb messages", - "one: shootdown one-tlb messages", - "mult: interrupts that found multiple messages", - "none: interrupts that found no messages", - "retry: number of retry messages processed", - "canc: number messages canceled by retries", - "nocan: number retries that found nothing to cancel", - "reset: number of ipi-style reset requests processed", - "rcan: number messages canceled by reset requests", - "disable: number times use of the BAU was disabled", - "enable: number times use of the BAU was re-enabled" -}; - -static int __init -setup_nobau(char *arg) -{ - nobau = 1; - return 0; -} -early_param("nobau", setup_nobau); - -/* base pnode in this partition */ -static int uv_base_pnode __read_mostly; - -static DEFINE_PER_CPU(struct ptc_stats, ptcstats); -static DEFINE_PER_CPU(struct bau_control, bau_control); -static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); - -static void -set_bau_on(void) -{ - int cpu; - struct bau_control *bcp; - - if (nobau_perm) { - pr_info("BAU not initialized; cannot be turned on\n"); - return; - } - nobau = 0; - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->nobau = 0; - } - pr_info("BAU turned on\n"); - return; -} - -static void -set_bau_off(void) -{ - int cpu; - struct bau_control *bcp; - - nobau = 1; - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->nobau = 1; - } - pr_info("BAU turned off\n"); - return; -} - -/* - * Determine the first node on a uvhub. 'Nodes' are used for kernel - * memory allocation. - */ -static int __init uvhub_to_first_node(int uvhub) -{ - int node, b; - - for_each_online_node(node) { - b = uv_node_to_blade_id(node); - if (uvhub == b) - return node; - } - return -1; -} - -/* - * Determine the apicid of the first cpu on a uvhub. - */ -static int __init uvhub_to_first_apicid(int uvhub) -{ - int cpu; - - for_each_present_cpu(cpu) - if (uvhub == uv_cpu_to_blade_id(cpu)) - return per_cpu(x86_cpu_to_apicid, cpu); - return -1; -} - -/* - * Free a software acknowledge hardware resource by clearing its Pending - * bit. This will return a reply to the sender. - * If the message has timed out, a reply has already been sent by the - * hardware but the resource has not been released. In that case our - * clear of the Timeout bit (as well) will free the resource. No reply will - * be sent (the hardware will only do one reply per message). - */ -static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp, - int do_acknowledge) -{ - unsigned long dw; - struct bau_pq_entry *msg; - - msg = mdp->msg; - if (!msg->canceled && do_acknowledge) { - dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; - write_mmr_sw_ack(dw); - } - msg->replied_to = 1; - msg->swack_vec = 0; -} - -/* - * Process the receipt of a RETRY message - */ -static void bau_process_retry_msg(struct msg_desc *mdp, - struct bau_control *bcp) -{ - int i; - int cancel_count = 0; - unsigned long msg_res; - unsigned long mmr = 0; - struct bau_pq_entry *msg = mdp->msg; - struct bau_pq_entry *msg2; - struct ptc_stats *stat = bcp->statp; - - stat->d_retries++; - /* - * cancel any message from msg+1 to the retry itself - */ - for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { - if (msg2 > mdp->queue_last) - msg2 = mdp->queue_first; - if (msg2 == msg) - break; - - /* same conditions for cancellation as do_reset */ - if ((msg2->replied_to == 0) && (msg2->canceled == 0) && - (msg2->swack_vec) && ((msg2->swack_vec & - msg->swack_vec) == 0) && - (msg2->sending_cpu == msg->sending_cpu) && - (msg2->msg_type != MSG_NOOP)) { - mmr = read_mmr_sw_ack(); - msg_res = msg2->swack_vec; - /* - * This is a message retry; clear the resources held - * by the previous message only if they timed out. - * If it has not timed out we have an unexpected - * situation to report. - */ - if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { - unsigned long mr; - /* - * Is the resource timed out? - * Make everyone ignore the cancelled message. - */ - msg2->canceled = 1; - stat->d_canceled++; - cancel_count++; - mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; - write_mmr_sw_ack(mr); - } - } - } - if (!cancel_count) - stat->d_nocanceled++; -} - -/* - * Do all the things a cpu should do for a TLB shootdown message. - * Other cpu's may come here at the same time for this message. - */ -static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, - int do_acknowledge) -{ - short socket_ack_count = 0; - short *sp; - struct atomic_short *asp; - struct ptc_stats *stat = bcp->statp; - struct bau_pq_entry *msg = mdp->msg; - struct bau_control *smaster = bcp->socket_master; - - /* - * This must be a normal message, or retry of a normal message - */ - if (msg->address == TLB_FLUSH_ALL) { - local_flush_tlb(); - stat->d_alltlb++; - } else { - __flush_tlb_one(msg->address); - stat->d_onetlb++; - } - stat->d_requestee++; - - /* - * One cpu on each uvhub has the additional job on a RETRY - * of releasing the resource held by the message that is - * being retried. That message is identified by sending - * cpu number. - */ - if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) - bau_process_retry_msg(mdp, bcp); - - /* - * This is a swack message, so we have to reply to it. - * Count each responding cpu on the socket. This avoids - * pinging the count's cache line back and forth between - * the sockets. - */ - sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; - asp = (struct atomic_short *)sp; - socket_ack_count = atom_asr(1, asp); - if (socket_ack_count == bcp->cpus_in_socket) { - int msg_ack_count; - /* - * Both sockets dump their completed count total into - * the message's count. - */ - *sp = 0; - asp = (struct atomic_short *)&msg->acknowledge_count; - msg_ack_count = atom_asr(socket_ack_count, asp); - - if (msg_ack_count == bcp->cpus_in_uvhub) { - /* - * All cpus in uvhub saw it; reply - * (unless we are in the UV2 workaround) - */ - reply_to_message(mdp, bcp, do_acknowledge); - } - } - - return; -} - -/* - * Determine the first cpu on a pnode. - */ -static int pnode_to_first_cpu(int pnode, struct bau_control *smaster) -{ - int cpu; - struct hub_and_pnode *hpp; - - for_each_present_cpu(cpu) { - hpp = &smaster->thp[cpu]; - if (pnode == hpp->pnode) - return cpu; - } - return -1; -} - -/* - * Last resort when we get a large number of destination timeouts is - * to clear resources held by a given cpu. - * Do this with IPI so that all messages in the BAU message queue - * can be identified by their nonzero swack_vec field. - * - * This is entered for a single cpu on the uvhub. - * The sender want's this uvhub to free a specific message's - * swack resources. - */ -static void do_reset(void *ptr) -{ - int i; - struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); - struct reset_args *rap = (struct reset_args *)ptr; - struct bau_pq_entry *msg; - struct ptc_stats *stat = bcp->statp; - - stat->d_resets++; - /* - * We're looking for the given sender, and - * will free its swack resource. - * If all cpu's finally responded after the timeout, its - * message 'replied_to' was set. - */ - for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { - unsigned long msg_res; - /* do_reset: same conditions for cancellation as - bau_process_retry_msg() */ - if ((msg->replied_to == 0) && - (msg->canceled == 0) && - (msg->sending_cpu == rap->sender) && - (msg->swack_vec) && - (msg->msg_type != MSG_NOOP)) { - unsigned long mmr; - unsigned long mr; - /* - * make everyone else ignore this message - */ - msg->canceled = 1; - /* - * only reset the resource if it is still pending - */ - mmr = read_mmr_sw_ack(); - msg_res = msg->swack_vec; - mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; - if (mmr & msg_res) { - stat->d_rcanceled++; - write_mmr_sw_ack(mr); - } - } - } - return; -} - -/* - * Use IPI to get all target uvhubs to release resources held by - * a given sending cpu number. - */ -static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) -{ - int pnode; - int apnode; - int maskbits; - int sender = bcp->cpu; - cpumask_t *mask = bcp->uvhub_master->cpumask; - struct bau_control *smaster = bcp->socket_master; - struct reset_args reset_args; - - reset_args.sender = sender; - cpus_clear(*mask); - /* find a single cpu for each uvhub in this distribution mask */ - maskbits = sizeof(struct pnmask) * BITSPERBYTE; - /* each bit is a pnode relative to the partition base pnode */ - for (pnode = 0; pnode < maskbits; pnode++) { - int cpu; - if (!bau_uvhub_isset(pnode, distribution)) - continue; - apnode = pnode + bcp->partition_base_pnode; - cpu = pnode_to_first_cpu(apnode, smaster); - cpu_set(cpu, *mask); - } - - /* IPI all cpus; preemption is already disabled */ - smp_call_function_many(mask, do_reset, (void *)&reset_args, 1); - return; -} - -static inline unsigned long cycles_2_us(unsigned long long cyc) -{ - unsigned long long ns; - unsigned long us; - int cpu = smp_processor_id(); - - ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; - us = ns / 1000; - return us; -} - -/* - * wait for all cpus on this hub to finish their sends and go quiet - * leaves uvhub_quiesce set so that no new broadcasts are started by - * bau_flush_send_and_wait() - */ -static inline void quiesce_local_uvhub(struct bau_control *hmaster) -{ - atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); -} - -/* - * mark this quiet-requestor as done - */ -static inline void end_uvhub_quiesce(struct bau_control *hmaster) -{ - atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); -} - -static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) -{ - unsigned long descriptor_status; - - descriptor_status = uv_read_local_mmr(mmr_offset); - descriptor_status >>= right_shift; - descriptor_status &= UV_ACT_STATUS_MASK; - return descriptor_status; -} - -/* - * Wait for completion of a broadcast software ack message - * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP - */ -static int uv1_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, - struct bau_control *bcp, long try) -{ - unsigned long descriptor_status; - cycles_t ttm; - struct ptc_stats *stat = bcp->statp; - - descriptor_status = uv1_read_status(mmr_offset, right_shift); - /* spin on the status MMR, waiting for it to go idle */ - while ((descriptor_status != DS_IDLE)) { - /* - * Our software ack messages may be blocked because - * there are no swack resources available. As long - * as none of them has timed out hardware will NACK - * our message and its state will stay IDLE. - */ - if (descriptor_status == DS_SOURCE_TIMEOUT) { - stat->s_stimeout++; - return FLUSH_GIVEUP; - } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { - stat->s_dtimeout++; - ttm = get_cycles(); - - /* - * Our retries may be blocked by all destination - * swack resources being consumed, and a timeout - * pending. In that case hardware returns the - * ERROR that looks like a destination timeout. - */ - if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { - bcp->conseccompletes = 0; - return FLUSH_RETRY_PLUGGED; - } - - bcp->conseccompletes = 0; - return FLUSH_RETRY_TIMEOUT; - } else { - /* - * descriptor_status is still BUSY - */ - cpu_relax(); - } - descriptor_status = uv1_read_status(mmr_offset, right_shift); - } - bcp->conseccompletes++; - return FLUSH_COMPLETE; -} - -/* - * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. - * But not currently used. - */ -static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) -{ - unsigned long descriptor_status; - - descriptor_status = - ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; - return descriptor_status; -} - -/* - * Return whether the status of the descriptor that is normally used for this - * cpu (the one indexed by its hub-relative cpu number) is busy. - * The status of the original 32 descriptors is always reflected in the 64 - * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0. - * The bit provided by the activation_status_2 register is irrelevant to - * the status if it is only being tested for busy or not busy. - */ -int normal_busy(struct bau_control *bcp) -{ - int cpu = bcp->uvhub_cpu; - int mmr_offset; - int right_shift; - - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - return (((((read_lmmr(mmr_offset) >> right_shift) & - UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY); -} - -/* - * Entered when a bau descriptor has gone into a permanent busy wait because - * of a hardware bug. - * Workaround the bug. - */ -int handle_uv2_busy(struct bau_control *bcp) -{ - struct ptc_stats *stat = bcp->statp; - - stat->s_uv2_wars++; - bcp->busy = 1; - return FLUSH_GIVEUP; -} - -static int uv2_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, - struct bau_control *bcp, long try) -{ - unsigned long descriptor_stat; - cycles_t ttm; - int desc = bcp->uvhub_cpu; - long busy_reps = 0; - struct ptc_stats *stat = bcp->statp; - - descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc); - - /* spin on the status MMR, waiting for it to go idle */ - while (descriptor_stat != UV2H_DESC_IDLE) { - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { - /* - * A h/w bug on the destination side may - * have prevented the message being marked - * pending, thus it doesn't get replied to - * and gets continually nacked until it times - * out with a SOURCE_TIMEOUT. - */ - stat->s_stimeout++; - return FLUSH_GIVEUP; - } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { - ttm = get_cycles(); - - /* - * Our retries may be blocked by all destination - * swack resources being consumed, and a timeout - * pending. In that case hardware returns the - * ERROR that looks like a destination timeout. - * Without using the extended status we have to - * deduce from the short time that this was a - * strong nack. - */ - if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { - bcp->conseccompletes = 0; - stat->s_plugged++; - /* FLUSH_RETRY_PLUGGED causes hang on boot */ - return FLUSH_GIVEUP; - } - stat->s_dtimeout++; - bcp->conseccompletes = 0; - /* FLUSH_RETRY_TIMEOUT causes hang on boot */ - return FLUSH_GIVEUP; - } else { - busy_reps++; - if (busy_reps > 1000000) { - /* not to hammer on the clock */ - busy_reps = 0; - ttm = get_cycles(); - if ((ttm - bcp->send_message) > - bcp->timeout_interval) - return handle_uv2_busy(bcp); - } - /* - * descriptor_stat is still BUSY - */ - cpu_relax(); - } - descriptor_stat = uv2_read_status(mmr_offset, right_shift, - desc); - } - bcp->conseccompletes++; - return FLUSH_COMPLETE; -} - -/* - * There are 2 status registers; each and array[32] of 2 bits. Set up for - * which register to read and position in that register based on cpu in - * current hub. - */ -static int wait_completion(struct bau_desc *bau_desc, - struct bau_control *bcp, long try) -{ - int right_shift; - unsigned long mmr_offset; - int desc = bcp->uvhub_cpu; - - if (desc < UV_CPUS_PER_AS) { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = desc * UV_ACT_STATUS_SIZE; - } else { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); - } - - if (bcp->uvhub_version == 1) - return uv1_wait_completion(bau_desc, mmr_offset, right_shift, - bcp, try); - else - return uv2_wait_completion(bau_desc, mmr_offset, right_shift, - bcp, try); -} - -static inline cycles_t sec_2_cycles(unsigned long sec) -{ - unsigned long ns; - cycles_t cyc; - - ns = sec * 1000000000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - -/* - * Our retries are blocked by all destination sw ack resources being - * in use, and a timeout is pending. In that case hardware immediately - * returns the ERROR that looks like a destination timeout. - */ -static void destination_plugged(struct bau_desc *bau_desc, - struct bau_control *bcp, - struct bau_control *hmaster, struct ptc_stats *stat) -{ - udelay(bcp->plugged_delay); - bcp->plugged_tries++; - - if (bcp->plugged_tries >= bcp->plugsb4reset) { - bcp->plugged_tries = 0; - - quiesce_local_uvhub(hmaster); - - spin_lock(&hmaster->queue_lock); - reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); - - end_uvhub_quiesce(hmaster); - - bcp->ipi_attempts++; - stat->s_resets_plug++; - } -} - -static void destination_timeout(struct bau_desc *bau_desc, - struct bau_control *bcp, struct bau_control *hmaster, - struct ptc_stats *stat) -{ - hmaster->max_concurr = 1; - bcp->timeout_tries++; - if (bcp->timeout_tries >= bcp->timeoutsb4reset) { - bcp->timeout_tries = 0; - - quiesce_local_uvhub(hmaster); - - spin_lock(&hmaster->queue_lock); - reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); - - end_uvhub_quiesce(hmaster); - - bcp->ipi_attempts++; - stat->s_resets_timeout++; - } -} - -/* - * Stop all cpus on a uvhub from using the BAU for a period of time. - * This is reversed by check_enable. - */ -static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) -{ - int tcpu; - struct bau_control *tbcp; - struct bau_control *hmaster; - cycles_t tm1; - - hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); - if (!bcp->baudisabled) { - stat->s_bau_disabled++; - tm1 = get_cycles(); - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - if (tbcp->uvhub_master == hmaster) { - tbcp->baudisabled = 1; - tbcp->set_bau_on_time = - tm1 + bcp->disabled_period; - } - } - } - spin_unlock(&hmaster->disable_lock); -} - -static void count_max_concurr(int stat, struct bau_control *bcp, - struct bau_control *hmaster) -{ - bcp->plugged_tries = 0; - bcp->timeout_tries = 0; - if (stat != FLUSH_COMPLETE) - return; - if (bcp->conseccompletes <= bcp->complete_threshold) - return; - if (hmaster->max_concurr >= hmaster->max_concurr_const) - return; - hmaster->max_concurr++; -} - -static void record_send_stats(cycles_t time1, cycles_t time2, - struct bau_control *bcp, struct ptc_stats *stat, - int completion_status, int try) -{ - cycles_t elapsed; - - if (time2 > time1) { - elapsed = time2 - time1; - stat->s_time += elapsed; - - if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { - bcp->period_requests++; - bcp->period_time += elapsed; - if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->cong_reps) && - ((bcp->period_time / bcp->period_requests) > - congested_cycles)) { - stat->s_congested++; - disable_for_period(bcp, stat); - } - } - } else - stat->s_requestor--; - - if (completion_status == FLUSH_COMPLETE && try > 1) - stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) { - stat->s_giveup++; - if (get_cycles() > bcp->period_end) - bcp->period_giveups = 0; - bcp->period_giveups++; - if (bcp->period_giveups == 1) - bcp->period_end = get_cycles() + bcp->disabled_period; - if (bcp->period_giveups > bcp->giveup_limit) { - disable_for_period(bcp, stat); - stat->s_giveuplimit++; - } - } -} - -/* - * Because of a uv1 hardware bug only a limited number of concurrent - * requests can be made. - */ -static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) -{ - spinlock_t *lock = &hmaster->uvhub_lock; - atomic_t *v; - - v = &hmaster->active_descriptor_count; - if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { - stat->s_throttles++; - do { - cpu_relax(); - } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); - } -} - -/* - * Handle the completion status of a message send. - */ -static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, - struct bau_control *bcp, struct bau_control *hmaster, - struct ptc_stats *stat) -{ - if (completion_status == FLUSH_RETRY_PLUGGED) - destination_plugged(bau_desc, bcp, hmaster, stat); - else if (completion_status == FLUSH_RETRY_TIMEOUT) - destination_timeout(bau_desc, bcp, hmaster, stat); -} - -/* - * Send a broadcast and wait for it to complete. - * - * The flush_mask contains the cpus the broadcast is to be sent to including - * cpus that are on the local uvhub. - * - * Returns 0 if all flushing represented in the mask was done. - * Returns 1 if it gives up entirely and the original cpu mask is to be - * returned to the kernel. - */ -int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, - struct bau_desc *bau_desc) -{ - int seq_number = 0; - int completion_stat = 0; - int uv1 = 0; - long try = 0; - unsigned long index; - cycles_t time1; - cycles_t time2; - struct ptc_stats *stat = bcp->statp; - struct bau_control *hmaster = bcp->uvhub_master; - struct uv1_bau_msg_header *uv1_hdr = NULL; - struct uv2_bau_msg_header *uv2_hdr = NULL; - - if (bcp->uvhub_version == 1) { - uv1 = 1; - uv1_throttle(hmaster, stat); - } - - while (hmaster->uvhub_quiesce) - cpu_relax(); - - time1 = get_cycles(); - if (uv1) - uv1_hdr = &bau_desc->header.uv1_hdr; - else - uv2_hdr = &bau_desc->header.uv2_hdr; - - do { - if (try == 0) { - if (uv1) - uv1_hdr->msg_type = MSG_REGULAR; - else - uv2_hdr->msg_type = MSG_REGULAR; - seq_number = bcp->message_number++; - } else { - if (uv1) - uv1_hdr->msg_type = MSG_RETRY; - else - uv2_hdr->msg_type = MSG_RETRY; - stat->s_retry_messages++; - } - - if (uv1) - uv1_hdr->sequence = seq_number; - else - uv2_hdr->sequence = seq_number; - index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; - bcp->send_message = get_cycles(); - - write_mmr_activation(index); - - try++; - completion_stat = wait_completion(bau_desc, bcp, try); - - handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); - - if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { - bcp->ipi_attempts = 0; - stat->s_overipilimit++; - completion_stat = FLUSH_GIVEUP; - break; - } - cpu_relax(); - } while ((completion_stat == FLUSH_RETRY_PLUGGED) || - (completion_stat == FLUSH_RETRY_TIMEOUT)); - - time2 = get_cycles(); - - count_max_concurr(completion_stat, bcp, hmaster); - - while (hmaster->uvhub_quiesce) - cpu_relax(); - - atomic_dec(&hmaster->active_descriptor_count); - - record_send_stats(time1, time2, bcp, stat, completion_stat, try); - - if (completion_stat == FLUSH_GIVEUP) - /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */ - return 1; - return 0; -} - -/* - * The BAU is disabled for this uvhub. When the disabled time period has - * expired re-enable it. - * Return 0 if it is re-enabled for all cpus on this uvhub. - */ -static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) -{ - int tcpu; - struct bau_control *tbcp; - struct bau_control *hmaster; - - hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); - if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { - stat->s_bau_reenabled++; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - if (tbcp->uvhub_master == hmaster) { - tbcp->baudisabled = 0; - tbcp->period_requests = 0; - tbcp->period_time = 0; - tbcp->period_giveups = 0; - } - } - spin_unlock(&hmaster->disable_lock); - return 0; - } - spin_unlock(&hmaster->disable_lock); - return -1; -} - -static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, - int remotes, struct bau_desc *bau_desc) -{ - stat->s_requestor++; - stat->s_ntargcpu += remotes + locals; - stat->s_ntargremotes += remotes; - stat->s_ntarglocals += locals; - - /* uvhub statistics */ - hubs = bau_uvhub_weight(&bau_desc->distribution); - if (locals) { - stat->s_ntarglocaluvhub++; - stat->s_ntargremoteuvhub += (hubs - 1); - } else - stat->s_ntargremoteuvhub += hubs; - - stat->s_ntarguvhub += hubs; - - if (hubs >= 16) - stat->s_ntarguvhub16++; - else if (hubs >= 8) - stat->s_ntarguvhub8++; - else if (hubs >= 4) - stat->s_ntarguvhub4++; - else if (hubs >= 2) - stat->s_ntarguvhub2++; - else - stat->s_ntarguvhub1++; -} - -/* - * Translate a cpu mask to the uvhub distribution mask in the BAU - * activation descriptor. - */ -static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, - struct bau_desc *bau_desc, int *localsp, int *remotesp) -{ - int cpu; - int pnode; - int cnt = 0; - struct hub_and_pnode *hpp; - - for_each_cpu(cpu, flush_mask) { - /* - * The distribution vector is a bit map of pnodes, relative - * to the partition base pnode (and the partition base nasid - * in the header). - * Translate cpu to pnode and hub using a local memory array. - */ - hpp = &bcp->socket_master->thp[cpu]; - pnode = hpp->pnode - bcp->partition_base_pnode; - bau_uvhub_set(pnode, &bau_desc->distribution); - cnt++; - if (hpp->uvhub == bcp->uvhub) - (*localsp)++; - else - (*remotesp)++; - } - if (!cnt) - return 1; - return 0; -} - -/* - * globally purge translation cache of a virtual address or all TLB's - * @cpumask: mask of all cpu's in which the address is to be removed - * @mm: mm_struct containing virtual address range - * @start: start virtual address to be removed from TLB - * @end: end virtual address to be remove from TLB - * @cpu: the current cpu - * - * This is the entry point for initiating any UV global TLB shootdown. - * - * Purges the translation caches of all specified processors of the given - * virtual address, or purges all TLB's on specified processors. - * - * The caller has derived the cpumask from the mm_struct. This function - * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) - * - * The cpumask is converted into a uvhubmask of the uvhubs containing - * those cpus. - * - * Note that this function should be called with preemption disabled. - * - * Returns NULL if all remote flushing was done. - * Returns pointer to cpumask if some remote flushing remains to be - * done. The returned pointer is valid till preemption is re-enabled. - */ -const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned int cpu) -{ - int locals = 0; - int remotes = 0; - int hubs = 0; - struct bau_desc *bau_desc; - struct cpumask *flush_mask; - struct ptc_stats *stat; - struct bau_control *bcp; - unsigned long descriptor_status; - unsigned long status; - - bcp = &per_cpu(bau_control, cpu); - stat = bcp->statp; - stat->s_enters++; - - if (bcp->nobau) - return cpumask; - - if (bcp->busy) { - descriptor_status = - read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); - status = ((descriptor_status >> (bcp->uvhub_cpu * - UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; - if (status == UV2H_DESC_BUSY) - return cpumask; - bcp->busy = 0; - } - - /* bau was disabled due to slow response */ - if (bcp->baudisabled) { - if (check_enable(bcp, stat)) { - stat->s_ipifordisabled++; - return cpumask; - } - } - - /* - * Each sending cpu has a per-cpu mask which it fills from the caller's - * cpu mask. All cpus are converted to uvhubs and copied to the - * activation descriptor. - */ - flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); - /* don't actually do a shootdown of the local cpu */ - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); - - if (cpu_isset(cpu, *cpumask)) - stat->s_ntargself++; - - bau_desc = bcp->descriptor_base; - bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); - bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) - return NULL; - - record_send_statistics(stat, locals, hubs, remotes, bau_desc); - - if (!end || (end - start) <= PAGE_SIZE) - bau_desc->payload.address = start; - else - bau_desc->payload.address = TLB_FLUSH_ALL; - bau_desc->payload.sending_cpu = cpu; - /* - * uv_flush_send_and_wait returns 0 if all cpu's were messaged, - * or 1 if it gave up and the original cpumask should be returned. - */ - if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) - return NULL; - else - return cpumask; -} - -/* - * Search the message queue for any 'other' unprocessed message with the - * same software acknowledge resource bit vector as the 'msg' message. - */ -struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, - struct bau_control *bcp) -{ - struct bau_pq_entry *msg_next = msg + 1; - unsigned char swack_vec = msg->swack_vec; - - if (msg_next > bcp->queue_last) - msg_next = bcp->queue_first; - while (msg_next != msg) { - if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && - (msg_next->swack_vec == swack_vec)) - return msg_next; - msg_next++; - if (msg_next > bcp->queue_last) - msg_next = bcp->queue_first; - } - return NULL; -} - -/* - * UV2 needs to work around a bug in which an arriving message has not - * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register. - * Such a message must be ignored. - */ -void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) -{ - unsigned long mmr_image; - unsigned char swack_vec; - struct bau_pq_entry *msg = mdp->msg; - struct bau_pq_entry *other_msg; - - mmr_image = read_mmr_sw_ack(); - swack_vec = msg->swack_vec; - - if ((swack_vec & mmr_image) == 0) { - /* - * This message was assigned a swack resource, but no - * reserved acknowlegment is pending. - * The bug has prevented this message from setting the MMR. - */ - /* - * Some message has set the MMR 'pending' bit; it might have - * been another message. Look for that message. - */ - other_msg = find_another_by_swack(msg, bcp); - if (other_msg) { - /* - * There is another. Process this one but do not - * ack it. - */ - bau_process_message(mdp, bcp, 0); - /* - * Let the natural processing of that other message - * acknowledge it. Don't get the processing of sw_ack's - * out of order. - */ - return; - } - } - - /* - * Either the MMR shows this one pending a reply or there is no - * other message using this sw_ack, so it is safe to acknowledge it. - */ - bau_process_message(mdp, bcp, 1); - - return; -} - -/* - * The BAU message interrupt comes here. (registered by set_intr_gate) - * See entry_64.S - * - * We received a broadcast assist message. - * - * Interrupts are disabled; this interrupt could represent - * the receipt of several messages. - * - * All cores/threads on this hub get this interrupt. - * The last one to see it does the software ack. - * (the resource will not be freed until noninterruptable cpus see this - * interrupt; hardware may timeout the s/w ack and reply ERROR) - */ -void uv_bau_message_interrupt(struct pt_regs *regs) -{ - int count = 0; - cycles_t time_start; - struct bau_pq_entry *msg; - struct bau_control *bcp; - struct ptc_stats *stat; - struct msg_desc msgdesc; - - ack_APIC_irq(); - time_start = get_cycles(); - - bcp = &per_cpu(bau_control, smp_processor_id()); - stat = bcp->statp; - - msgdesc.queue_first = bcp->queue_first; - msgdesc.queue_last = bcp->queue_last; - - msg = bcp->bau_msg_head; - while (msg->swack_vec) { - count++; - - msgdesc.msg_slot = msg - msgdesc.queue_first; - msgdesc.msg = msg; - if (bcp->uvhub_version == 2) - process_uv2_message(&msgdesc, bcp); - else - bau_process_message(&msgdesc, bcp, 1); - - msg++; - if (msg > msgdesc.queue_last) - msg = msgdesc.queue_first; - bcp->bau_msg_head = msg; - } - stat->d_time += (get_cycles() - time_start); - if (!count) - stat->d_nomsg++; - else if (count > 1) - stat->d_multmsg++; -} - -/* - * Each target uvhub (i.e. a uvhub that has cpu's) needs to have - * shootdown message timeouts enabled. The timeout does not cause - * an interrupt, but causes an error message to be returned to - * the sender. - */ -static void __init enable_timeouts(void) -{ - int uvhub; - int nuvhubs; - int pnode; - unsigned long mmr_image; - - nuvhubs = uv_num_possible_blades(); - - for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - if (!uv_blade_nr_possible_cpus(uvhub)) - continue; - - pnode = uv_blade_to_pnode(uvhub); - mmr_image = read_mmr_misc_control(pnode); - /* - * Set the timeout period and then lock it in, in three - * steps; captures and locks in the period. - * - * To program the period, the SOFT_ACK_MODE must be off. - */ - mmr_image &= ~(1L << SOFTACK_MSHIFT); - write_mmr_misc_control(pnode, mmr_image); - /* - * Set the 4-bit period. - */ - mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); - mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); - write_mmr_misc_control(pnode, mmr_image); - /* - * UV1: - * Subsequent reversals of the timebase bit (3) cause an - * immediate timeout of one or all INTD resources as - * indicated in bits 2:0 (7 causes all of them to timeout). - */ - mmr_image |= (1L << SOFTACK_MSHIFT); - if (is_uv2_hub()) { - /* hw bug workaround; do not use extended status */ - mmr_image &= ~(1L << UV2_EXT_SHFT); - } - write_mmr_misc_control(pnode, mmr_image); - } -} - -static void *ptc_seq_start(struct seq_file *file, loff_t *offset) -{ - if (*offset < num_possible_cpus()) - return offset; - return NULL; -} - -static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) -{ - (*offset)++; - if (*offset < num_possible_cpus()) - return offset; - return NULL; -} - -static void ptc_seq_stop(struct seq_file *file, void *data) -{ -} - -static inline unsigned long long usec_2_cycles(unsigned long microsec) -{ - unsigned long ns; - unsigned long long cyc; - - ns = microsec * 1000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - -/* - * Display the statistics thru /proc/sgi_uv/ptc_statistics - * 'data' points to the cpu number - * Note: see the descriptions in stat_description[]. - */ -static int ptc_seq_show(struct seq_file *file, void *data) -{ - struct ptc_stats *stat; - struct bau_control *bcp; - int cpu; - - cpu = *(loff_t *)data; - if (!cpu) { - seq_printf(file, - "# cpu bauoff sent stime self locals remotes ncpus localhub "); - seq_printf(file, - "remotehub numuvhubs numuvhubs16 numuvhubs8 "); - seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); - seq_printf(file, - "rok resetp resett giveup sto bz throt disable "); - seq_printf(file, - "enable wars warshw warwaits enters ipidis plugged "); - seq_printf(file, - "ipiover glim cong swack recv rtime all one mult "); - seq_printf(file, - "none retry canc nocan reset rcan\n"); - } - if (cpu < num_possible_cpus() && cpu_online(cpu)) { - bcp = &per_cpu(bau_control, cpu); - stat = bcp->statp; - /* source side statistics */ - seq_printf(file, - "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - cpu, bcp->nobau, stat->s_requestor, - cycles_2_us(stat->s_time), - stat->s_ntargself, stat->s_ntarglocals, - stat->s_ntargremotes, stat->s_ntargcpu, - stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, - stat->s_ntarguvhub, stat->s_ntarguvhub16); - seq_printf(file, "%ld %ld %ld %ld %ld %ld ", - stat->s_ntarguvhub8, stat->s_ntarguvhub4, - stat->s_ntarguvhub2, stat->s_ntarguvhub1, - stat->s_dtimeout, stat->s_strongnacks); - seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", - stat->s_retry_messages, stat->s_retriesok, - stat->s_resets_plug, stat->s_resets_timeout, - stat->s_giveup, stat->s_stimeout, - stat->s_busy, stat->s_throttles); - seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - stat->s_bau_disabled, stat->s_bau_reenabled, - stat->s_uv2_wars, stat->s_uv2_wars_hw, - stat->s_uv2_war_waits, stat->s_enters, - stat->s_ipifordisabled, stat->s_plugged, - stat->s_overipilimit, stat->s_giveuplimit, - stat->s_congested); - - /* destination side statistics */ - seq_printf(file, - "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", - read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), - stat->d_requestee, cycles_2_us(stat->d_time), - stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, - stat->d_nomsg, stat->d_retries, stat->d_canceled, - stat->d_nocanceled, stat->d_resets, - stat->d_rcanceled); - } - return 0; -} - -/* - * Display the tunables thru debugfs - */ -static ssize_t tunables_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char *buf; - int ret; - - buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", - "max_concur plugged_delay plugsb4reset timeoutsb4reset", - "ipi_reset_limit complete_threshold congested_response_us", - "congested_reps disabled_period giveup_limit", - max_concurr, plugged_delay, plugsb4reset, - timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_respns_us, congested_reps, disabled_period, - giveup_limit); - - if (!buf) - return -ENOMEM; - - ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); - kfree(buf); - return ret; -} - -/* - * handle a write to /proc/sgi_uv/ptc_statistics - * -1: reset the statistics - * 0: display meaning of the statistics - */ -static ssize_t ptc_proc_write(struct file *file, const char __user *user, - size_t count, loff_t *data) -{ - int cpu; - int i; - int elements; - long input_arg; - char optstr[64]; - struct ptc_stats *stat; - - if (count == 0 || count > sizeof(optstr)) - return -EINVAL; - if (copy_from_user(optstr, user, count)) - return -EFAULT; - optstr[count - 1] = '\0'; - - if (!strcmp(optstr, "on")) { - set_bau_on(); - return count; - } else if (!strcmp(optstr, "off")) { - set_bau_off(); - return count; - } - - if (strict_strtol(optstr, 10, &input_arg) < 0) { - printk(KERN_DEBUG "%s is invalid\n", optstr); - return -EINVAL; - } - - if (input_arg == 0) { - elements = ARRAY_SIZE(stat_description); - printk(KERN_DEBUG "# cpu: cpu number\n"); - printk(KERN_DEBUG "Sender statistics:\n"); - for (i = 0; i < elements; i++) - printk(KERN_DEBUG "%s\n", stat_description[i]); - } else if (input_arg == -1) { - for_each_present_cpu(cpu) { - stat = &per_cpu(ptcstats, cpu); - memset(stat, 0, sizeof(struct ptc_stats)); - } - } - - return count; -} - -static int local_atoi(const char *name) -{ - int val = 0; - - for (;; name++) { - switch (*name) { - case '0' ... '9': - val = 10*val+(*name-'0'); - break; - default: - return val; - } - } -} - -/* - * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. - * Zero values reset them to defaults. - */ -static int parse_tunables_write(struct bau_control *bcp, char *instr, - int count) -{ - char *p; - char *q; - int cnt = 0; - int val; - int e = ARRAY_SIZE(tunables); - - p = instr + strspn(instr, WHITESPACE); - q = p; - for (; *p; p = q + strspn(q, WHITESPACE)) { - q = p + strcspn(p, WHITESPACE); - cnt++; - if (q == p) - break; - } - if (cnt != e) { - printk(KERN_INFO "bau tunable error: should be %d values\n", e); - return -EINVAL; - } - - p = instr + strspn(instr, WHITESPACE); - q = p; - for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { - q = p + strcspn(p, WHITESPACE); - val = local_atoi(p); - switch (cnt) { - case 0: - if (val == 0) { - max_concurr = MAX_BAU_CONCURRENT; - max_concurr_const = MAX_BAU_CONCURRENT; - continue; - } - if (val < 1 || val > bcp->cpus_in_uvhub) { - printk(KERN_DEBUG - "Error: BAU max concurrent %d is invalid\n", - val); - return -EINVAL; - } - max_concurr = val; - max_concurr_const = val; - continue; - default: - if (val == 0) - *tunables[cnt].tunp = tunables[cnt].deflt; - else - *tunables[cnt].tunp = val; - continue; - } - if (q == p) - break; - } - return 0; -} - -/* - * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) - */ -static ssize_t tunables_write(struct file *file, const char __user *user, - size_t count, loff_t *data) -{ - int cpu; - int ret; - char instr[100]; - struct bau_control *bcp; - - if (count == 0 || count > sizeof(instr)-1) - return -EINVAL; - if (copy_from_user(instr, user, count)) - return -EFAULT; - - instr[count] = '\0'; - - cpu = get_cpu(); - bcp = &per_cpu(bau_control, cpu); - ret = parse_tunables_write(bcp, instr, count); - put_cpu(); - if (ret) - return ret; - - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->max_concurr = max_concurr; - bcp->max_concurr_const = max_concurr; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->cong_response_us = congested_respns_us; - bcp->cong_reps = congested_reps; - bcp->disabled_period = sec_2_cycles(disabled_period); - bcp->giveup_limit = giveup_limit; - } - return count; -} - -static const struct seq_operations uv_ptc_seq_ops = { - .start = ptc_seq_start, - .next = ptc_seq_next, - .stop = ptc_seq_stop, - .show = ptc_seq_show -}; - -static int ptc_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &uv_ptc_seq_ops); -} - -static int tunables_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static const struct file_operations proc_uv_ptc_operations = { - .open = ptc_proc_open, - .read = seq_read, - .write = ptc_proc_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations tunables_fops = { - .open = tunables_open, - .read = tunables_read, - .write = tunables_write, - .llseek = default_llseek, -}; - -static int __init uv_ptc_init(void) -{ - struct proc_dir_entry *proc_uv_ptc; - - if (!is_uv_system()) - return 0; - - proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, - &proc_uv_ptc_operations); - if (!proc_uv_ptc) { - printk(KERN_ERR "unable to create %s proc entry\n", - UV_PTC_BASENAME); - return -EINVAL; - } - - tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); - if (!tunables_dir) { - printk(KERN_ERR "unable to create debugfs directory %s\n", - UV_BAU_TUNABLES_DIR); - return -EINVAL; - } - tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, - tunables_dir, NULL, &tunables_fops); - if (!tunables_file) { - printk(KERN_ERR "unable to create debugfs file %s\n", - UV_BAU_TUNABLES_FILE); - return -EINVAL; - } - return 0; -} - -/* - * Initialize the sending side's sending buffers. - */ -static void activation_descriptor_init(int node, int pnode, int base_pnode) -{ - int i; - int cpu; - int uv1 = 0; - unsigned long gpa; - unsigned long m; - unsigned long n; - size_t dsize; - struct bau_desc *bau_desc; - struct bau_desc *bd2; - struct uv1_bau_msg_header *uv1_hdr; - struct uv2_bau_msg_header *uv2_hdr; - struct bau_control *bcp; - - /* - * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) - * per cpu; and one per cpu on the uvhub (ADP_SZ) - */ - dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; - bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); - BUG_ON(!bau_desc); - - gpa = uv_gpa(bau_desc); - n = uv_gpa_to_gnode(gpa); - m = uv_gpa_to_offset(gpa); - if (is_uv1_hub()) - uv1 = 1; - - /* the 14-bit pnode */ - write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); - /* - * Initializing all 8 (ITEMS_PER_DESC) descriptors for each - * cpu even though we only use the first one; one descriptor can - * describe a broadcast to 256 uv hubs. - */ - for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { - memset(bd2, 0, sizeof(struct bau_desc)); - if (uv1) { - uv1_hdr = &bd2->header.uv1_hdr; - uv1_hdr->swack_flag = 1; - /* - * The base_dest_nasid set in the message header - * is the nasid of the first uvhub in the partition. - * The bit map will indicate destination pnode numbers - * relative to that base. They may not be consecutive - * if nasid striding is being used. - */ - uv1_hdr->base_dest_nasid = - UV_PNODE_TO_NASID(base_pnode); - uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID; - uv1_hdr->command = UV_NET_ENDPOINT_INTD; - uv1_hdr->int_both = 1; - /* - * all others need to be set to zero: - * fairness chaining multilevel count replied_to - */ - } else { - /* - * BIOS uses legacy mode, but UV2 hardware always - * uses native mode for selective broadcasts. - */ - uv2_hdr = &bd2->header.uv2_hdr; - uv2_hdr->swack_flag = 1; - uv2_hdr->base_dest_nasid = - UV_PNODE_TO_NASID(base_pnode); - uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID; - uv2_hdr->command = UV_NET_ENDPOINT_INTD; - } - } - for_each_present_cpu(cpu) { - if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) - continue; - bcp = &per_cpu(bau_control, cpu); - bcp->descriptor_base = bau_desc; - } -} - -/* - * initialize the destination side's receiving buffers - * entered for each uvhub in the partition - * - node is first node (kernel memory notion) on the uvhub - * - pnode is the uvhub's physical identifier - */ -static void pq_init(int node, int pnode) -{ - int cpu; - size_t plsize; - char *cp; - void *vp; - unsigned long pn; - unsigned long first; - unsigned long pn_first; - unsigned long last; - struct bau_pq_entry *pqp; - struct bau_control *bcp; - - plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); - vp = kmalloc_node(plsize, GFP_KERNEL, node); - pqp = (struct bau_pq_entry *)vp; - BUG_ON(!pqp); - - cp = (char *)pqp + 31; - pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); - - for_each_present_cpu(cpu) { - if (pnode != uv_cpu_to_pnode(cpu)) - continue; - /* for every cpu on this pnode: */ - bcp = &per_cpu(bau_control, cpu); - bcp->queue_first = pqp; - bcp->bau_msg_head = pqp; - bcp->queue_last = pqp + (DEST_Q_SIZE - 1); - } - /* - * need the gnode of where the memory was really allocated - */ - pn = uv_gpa_to_gnode(uv_gpa(pqp)); - first = uv_physnodeaddr(pqp); - pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; - last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); - write_mmr_payload_first(pnode, pn_first); - write_mmr_payload_tail(pnode, first); - write_mmr_payload_last(pnode, last); - write_gmmr_sw_ack(pnode, 0xffffUL); - - /* in effect, all msg_type's are set to MSG_NOOP */ - memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); -} - -/* - * Initialization of each UV hub's structures - */ -static void __init init_uvhub(int uvhub, int vector, int base_pnode) -{ - int node; - int pnode; - unsigned long apicid; - - node = uvhub_to_first_node(uvhub); - pnode = uv_blade_to_pnode(uvhub); - - activation_descriptor_init(node, pnode, base_pnode); - - pq_init(node, pnode); - /* - * The below initialization can't be in firmware because the - * messaging IRQ will be determined by the OS. - */ - apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; - write_mmr_data_config(pnode, ((apicid << 32) | vector)); -} - -/* - * We will set BAU_MISC_CONTROL with a timeout period. - * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. - * So the destination timeout period has to be calculated from them. - */ -static int calculate_destination_timeout(void) -{ - unsigned long mmr_image; - int mult1; - int mult2; - int index; - int base; - int ret; - unsigned long ts_ns; - - if (is_uv1_hub()) { - mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; - mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); - index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; - mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); - mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - ts_ns = timeout_base_ns[index]; - ts_ns *= (mult1 * mult2); - ret = ts_ns / 1000; - } else { - /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ - mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL); - mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; - if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) - base = 80; - else - base = 10; - mult1 = mmr_image & UV2_ACK_MASK; - ret = mult1 * base; - } - return ret; -} - -static void __init init_per_cpu_tunables(void) -{ - int cpu; - struct bau_control *bcp; - - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->baudisabled = 0; - if (nobau) - bcp->nobau = 1; - bcp->statp = &per_cpu(ptcstats, cpu); - /* time interval to catch a hardware stay-busy bug */ - bcp->timeout_interval = usec_2_cycles(2*timeout_us); - bcp->max_concurr = max_concurr; - bcp->max_concurr_const = max_concurr; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->cong_response_us = congested_respns_us; - bcp->cong_reps = congested_reps; - bcp->disabled_period = sec_2_cycles(disabled_period); - bcp->giveup_limit = giveup_limit; - spin_lock_init(&bcp->queue_lock); - spin_lock_init(&bcp->uvhub_lock); - spin_lock_init(&bcp->disable_lock); - } -} - -/* - * Scan all cpus to collect blade and socket summaries. - */ -static int __init get_cpu_topology(int base_pnode, - struct uvhub_desc *uvhub_descs, - unsigned char *uvhub_mask) -{ - int cpu; - int pnode; - int uvhub; - int socket; - struct bau_control *bcp; - struct uvhub_desc *bdp; - struct socket_desc *sdp; - - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - - memset(bcp, 0, sizeof(struct bau_control)); - - pnode = uv_cpu_hub_info(cpu)->pnode; - if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { - printk(KERN_EMERG - "cpu %d pnode %d-%d beyond %d; BAU disabled\n", - cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); - return 1; - } - - bcp->osnode = cpu_to_node(cpu); - bcp->partition_base_pnode = base_pnode; - - uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; - *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); - bdp = &uvhub_descs[uvhub]; - - bdp->num_cpus++; - bdp->uvhub = uvhub; - bdp->pnode = pnode; - - /* kludge: 'assuming' one node per socket, and assuming that - disabling a socket just leaves a gap in node numbers */ - socket = bcp->osnode & 1; - bdp->socket_mask |= (1 << socket); - sdp = &bdp->socket[socket]; - sdp->cpu_number[sdp->num_cpus] = cpu; - sdp->num_cpus++; - if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { - printk(KERN_EMERG "%d cpus per socket invalid\n", - sdp->num_cpus); - return 1; - } - } - return 0; -} - -/* - * Each socket is to get a local array of pnodes/hubs. - */ -static void make_per_cpu_thp(struct bau_control *smaster) -{ - int cpu; - size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); - - smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); - memset(smaster->thp, 0, hpsz); - for_each_present_cpu(cpu) { - smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; - smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; - } -} - -/* - * Each uvhub is to get a local cpumask. - */ -static void make_per_hub_cpumask(struct bau_control *hmaster) -{ - int sz = sizeof(cpumask_t); - - hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode); -} - -/* - * Initialize all the per_cpu information for the cpu's on a given socket, - * given what has been gathered into the socket_desc struct. - * And reports the chosen hub and socket masters back to the caller. - */ -static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, - struct bau_control **smasterp, - struct bau_control **hmasterp) -{ - int i; - int cpu; - struct bau_control *bcp; - - for (i = 0; i < sdp->num_cpus; i++) { - cpu = sdp->cpu_number[i]; - bcp = &per_cpu(bau_control, cpu); - bcp->cpu = cpu; - if (i == 0) { - *smasterp = bcp; - if (!(*hmasterp)) - *hmasterp = bcp; - } - bcp->cpus_in_uvhub = bdp->num_cpus; - bcp->cpus_in_socket = sdp->num_cpus; - bcp->socket_master = *smasterp; - bcp->uvhub = bdp->uvhub; - if (is_uv1_hub()) - bcp->uvhub_version = 1; - else if (is_uv2_hub()) - bcp->uvhub_version = 2; - else { - printk(KERN_EMERG "uvhub version not 1 or 2\n"); - return 1; - } - bcp->uvhub_master = *hmasterp; - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; - if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { - printk(KERN_EMERG "%d cpus per uvhub invalid\n", - bcp->uvhub_cpu); - return 1; - } - } - return 0; -} - -/* - * Summarize the blade and socket topology into the per_cpu structures. - */ -static int __init summarize_uvhub_sockets(int nuvhubs, - struct uvhub_desc *uvhub_descs, - unsigned char *uvhub_mask) -{ - int socket; - int uvhub; - unsigned short socket_mask; - - for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - struct uvhub_desc *bdp; - struct bau_control *smaster = NULL; - struct bau_control *hmaster = NULL; - - if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) - continue; - - bdp = &uvhub_descs[uvhub]; - socket_mask = bdp->socket_mask; - socket = 0; - while (socket_mask) { - struct socket_desc *sdp; - if ((socket_mask & 1)) { - sdp = &bdp->socket[socket]; - if (scan_sock(sdp, bdp, &smaster, &hmaster)) - return 1; - make_per_cpu_thp(smaster); - } - socket++; - socket_mask = (socket_mask >> 1); - } - make_per_hub_cpumask(hmaster); - } - return 0; -} - -/* - * initialize the bau_control structure for each cpu - */ -static int __init init_per_cpu(int nuvhubs, int base_part_pnode) -{ - unsigned char *uvhub_mask; - void *vp; - struct uvhub_desc *uvhub_descs; - - timeout_us = calculate_destination_timeout(); - - vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); - uvhub_descs = (struct uvhub_desc *)vp; - memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); - uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); - - if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) - goto fail; - - if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) - goto fail; - - kfree(uvhub_descs); - kfree(uvhub_mask); - init_per_cpu_tunables(); - return 0; - -fail: - kfree(uvhub_descs); - kfree(uvhub_mask); - return 1; -} - -/* - * Initialization of BAU-related structures - */ -static int __init uv_bau_init(void) -{ - int uvhub; - int pnode; - int nuvhubs; - int cur_cpu; - int cpus; - int vector; - cpumask_var_t *mask; - - if (!is_uv_system()) - return 0; - - for_each_possible_cpu(cur_cpu) { - mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); - } - - nuvhubs = uv_num_possible_blades(); - congested_cycles = usec_2_cycles(congested_respns_us); - - uv_base_pnode = 0x7fffffff; - for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - cpus = uv_blade_nr_possible_cpus(uvhub); - if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) - uv_base_pnode = uv_blade_to_pnode(uvhub); - } - - enable_timeouts(); - - if (init_per_cpu(nuvhubs, uv_base_pnode)) { - set_bau_off(); - nobau_perm = 1; - return 0; - } - - vector = UV_BAU_MESSAGE; - for_each_possible_blade(uvhub) - if (uv_blade_nr_possible_cpus(uvhub)) - init_uvhub(uvhub, vector, uv_base_pnode); - - alloc_intr_gate(vector, uv_bau_message_intr1); - - for_each_possible_blade(uvhub) { - if (uv_blade_nr_possible_cpus(uvhub)) { - unsigned long val; - unsigned long mmr; - pnode = uv_blade_to_pnode(uvhub); - /* INIT the bau */ - val = 1L << 63; - write_gmmr_activation(pnode, val); - mmr = 1; /* should be 1 to broadcast to both sockets */ - if (!is_uv1_hub()) - write_mmr_data_broadcast(pnode, mmr); - } - } - - return 0; -} -core_initcall(uv_bau_init); -fs_initcall(uv_ptc_init); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index acf7752da952..4f200ac96ce0 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -8,226 +8,169 @@ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/rbtree.h> #include <linux/slab.h> #include <linux/irq.h> +#include <asm/irqdomain.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> #include <asm/uv/uv_hub.h> /* MMR offset and pnode of hub sourcing interrupts for a given irq */ -struct uv_irq_2_mmr_pnode{ - struct rb_node list; +struct uv_irq_2_mmr_pnode { unsigned long offset; int pnode; - int irq; }; -static DEFINE_SPINLOCK(uv_irq_lock); -static struct rb_root uv_irq_root; +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = APIC_DELIVERY_MODE_FIXED; + entry->dest_mode = apic->dest_mode_logical; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cfg->dest_apicid; + + uv_write_global_mmr64(info->pnode, info->offset, mmr_value); +} static void uv_noop(struct irq_data *data) { } -static void uv_ack_apic(struct irq_data *data) +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) { - ack_APIC_irq(); + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + uv_program_mmr(cfg, data->chip_data); + vector_schedule_cleanup(cfg); + } + + return ret; } static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, .irq_unmask = uv_noop, - .irq_eoi = uv_ack_apic, + .irq_eoi = apic_ack_irq, .irq_set_affinity = uv_set_irq_affinity, }; -/* - * Add offset and pnode information of the hub sourcing interrupts to the - * rb tree for a specific irq. - */ -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct rb_node **link = &uv_irq_root.rb_node; - struct rb_node *parent = NULL; - struct uv_irq_2_mmr_pnode *n; - struct uv_irq_2_mmr_pnode *e; - unsigned long irqflags; - - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, - uv_blade_to_memory_nid(blade)); - if (!n) + struct uv_irq_2_mmr_pnode *chip_data; + struct irq_alloc_info *info = arg; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV) + return -EINVAL; + + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL, + irq_data_get_node(irq_data)); + if (!chip_data) return -ENOMEM; - n->irq = irq; - n->offset = offset; - n->pnode = uv_blade_to_pnode(blade); - spin_lock_irqsave(&uv_irq_lock, irqflags); - /* Find the right place in the rbtree: */ - while (*link) { - parent = *link; - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); - - if (unlikely(irq == e->irq)) { - /* irq entry exists */ - e->pnode = uv_blade_to_pnode(blade); - e->offset = offset; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - kfree(n); - return 0; - } - - if (irq < e->irq) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + if (info->uv.limit == UV_AFFINITY_CPU) + irq_set_status_flags(virq, IRQ_NO_BALANCING); + + chip_data->pnode = uv_blade_to_pnode(info->uv.blade); + chip_data->offset = info->uv.offset; + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data, + handle_percpu_irq, NULL, info->uv.name); + } else { + kfree(chip_data); } - /* Insert the node into the rbtree. */ - rb_link_node(&n->list, parent, link); - rb_insert_color(&n->list, &uv_irq_root); - - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; + return ret; } -/* Retrieve offset and pnode information from the rb tree for a specific irq */ -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +static void uv_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - - if (e->irq == irq) { - *offset = e->offset; - *pnode = e->pnode; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; - } - - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return -1; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_clear_status_flags(virq, IRQ_NO_BALANCING); + irq_domain_free_irqs_top(domain, virq, nr_irqs); } /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static int -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) +static int uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = irq_get_chip_data(irq); - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode, err; - unsigned int dest; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); - if (err != 0) - return err; - - if (limit == UV_AFFINITY_CPU) - irq_set_status_flags(irq, IRQ_NO_BALANCING); - else - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); + return 0; } /* * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +static void uv_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } -static int -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest; - unsigned long mmr_value, mmr_offset; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); +static const struct irq_domain_ops uv_domain_ops = { + .alloc = uv_domain_alloc, + .free = uv_domain_free, + .activate = uv_domain_activate, + .deactivate = uv_domain_deactivate, +}; - return IRQ_SET_MASK_OK_NOCOPY; +static struct irq_domain *uv_get_irq_domain(void) +{ + static struct irq_domain *uv_domain; + static DEFINE_MUTEX(uv_lock); + struct fwnode_handle *fn; + + mutex_lock(&uv_lock); + if (uv_domain) + goto out; + + fn = irq_domain_alloc_named_fwnode("UV-CORE"); + if (!fn) + goto out; + + uv_domain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, fn, + &uv_domain_ops, NULL); + if (!uv_domain) + irq_domain_free_fwnode(fn); +out: + mutex_unlock(&uv_lock); + + return uv_domain; } /* @@ -238,21 +181,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int irq, ret; + struct irq_alloc_info info; + struct irq_domain *domain = uv_get_irq_domain(); - irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - - if (irq <= 0) - return -EBUSY; + if (!domain) + return -ENOMEM; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, - limit); - if (ret == irq) - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); - else - destroy_irq(irq); + init_irq_alloc_info(&info, cpumask_of(cpu)); + info.type = X86_IRQ_ALLOC_TYPE_UV; + info.uv.limit = limit; + info.uv.blade = mmr_blade; + info.uv.offset = mmr_offset; + info.uv.name = irq_name; - return ret; + return irq_domain_alloc_irqs(domain, 1, + uv_blade_to_memory_nid(mmr_blade), &info); } EXPORT_SYMBOL_GPL(uv_setup_irq); @@ -265,26 +208,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); */ void uv_teardown_irq(unsigned int irq) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - if (e->irq == irq) { - arch_disable_uv_irq(e->pnode, e->offset); - rb_erase(n, &uv_irq_root); - kfree(e); - break; - } - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - destroy_irq(irq); + irq_domain_free_irqs(irq, 1); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c new file mode 100644 index 000000000000..5c50e550ab63 --- /dev/null +++ b/arch/x86/platform/uv/uv_nmi.c @@ -0,0 +1,1102 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SGI NMI support routines + * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) Mike Travis + */ + +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/kdb.h> +#include <linux/kexec.h> +#include <linux/kgdb.h> +#include <linux/moduleparam.h> +#include <linux/nmi.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/clocksource.h> + +#include <asm/apic.h> +#include <asm/current.h> +#include <asm/kdebug.h> +#include <asm/local64.h> +#include <asm/nmi.h> +#include <asm/reboot.h> +#include <asm/traps.h> +#include <asm/uv/uv.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_mmrs.h> + +/* + * UV handler for NMI + * + * Handle system-wide NMI events generated by the global 'power nmi' command. + * + * Basic operation is to field the NMI interrupt on each CPU and wait + * until all CPU's have arrived into the nmi handler. If some CPU's do not + * make it into the handler, try and force them in with the IPI(NMI) signal. + * + * We also have to lessen UV Hub MMR accesses as much as possible as this + * disrupts the UV Hub's primary mission of directing NumaLink traffic and + * can cause system problems to occur. + * + * To do this we register our primary NMI notifier on the NMI_UNKNOWN + * chain. This reduces the number of false NMI calls when the perf + * tools are running which generate an enormous number of NMIs per + * second (~4M/s for 1024 CPU threads). Our secondary NMI handler is + * very short as it only checks that if it has been "pinged" with the + * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR. + * + */ + +static struct uv_hub_nmi_s **uv_hub_nmi_list; + +DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); + +/* Newer SMM NMI handler, not present in all systems */ +static unsigned long uvh_nmi_mmrx; /* UVH_EVENT_OCCURRED0/1 */ +static unsigned long uvh_nmi_mmrx_clear; /* UVH_EVENT_OCCURRED0/1_ALIAS */ +static int uvh_nmi_mmrx_shift; /* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */ +static char *uvh_nmi_mmrx_type; /* "EXTIO_INT0" */ + +/* Non-zero indicates newer SMM NMI handler present */ +static unsigned long uvh_nmi_mmrx_supported; /* UVH_EXTIO_INT0_BROADCAST */ + +/* Indicates to BIOS that we want to use the newer SMM NMI handler */ +static unsigned long uvh_nmi_mmrx_req; /* UVH_BIOS_KERNEL_MMR_ALIAS_2 */ +static int uvh_nmi_mmrx_req_shift; /* 62 */ + +/* UV hubless values */ +#define NMI_CONTROL_PORT 0x70 +#define NMI_DUMMY_PORT 0x71 +#define PAD_OWN_GPP_D_0 0x2c +#define GPI_NMI_STS_GPP_D_0 0x164 +#define GPI_NMI_ENA_GPP_D_0 0x174 +#define STS_GPP_D_0_MASK 0x1 +#define PAD_CFG_DW0_GPP_D_0 0x4c0 +#define GPIROUTNMI (1ul << 17) +#define PCH_PCR_GPIO_1_BASE 0xfdae0000ul +#define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset)) + +static u64 *pch_base; +static unsigned long nmi_mmr; +static unsigned long nmi_mmr_clear; +static unsigned long nmi_mmr_pending; + +static atomic_t uv_in_nmi; +static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); +static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); +static atomic_t uv_nmi_slave_continue; +static cpumask_var_t uv_nmi_cpu_mask; + +static atomic_t uv_nmi_kexec_failed; + +/* Values for uv_nmi_slave_continue */ +#define SLAVE_CLEAR 0 +#define SLAVE_CONTINUE 1 +#define SLAVE_EXIT 2 + +/* + * Default is all stack dumps go to the console and buffer. + * Lower level to send to log buffer only. + */ +static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; +module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644); + +/* + * The following values show statistics on how perf events are affecting + * this system. + */ +static int param_get_local64(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg)); +} + +static int param_set_local64(const char *val, const struct kernel_param *kp) +{ + /* Clear on any write */ + local64_set((local64_t *)kp->arg, 0); + return 0; +} + +static const struct kernel_param_ops param_ops_local64 = { + .get = param_get_local64, + .set = param_set_local64, +}; +#define param_check_local64(name, p) __param_check(name, p, local64_t) + +static local64_t uv_nmi_count; +module_param_named(nmi_count, uv_nmi_count, local64, 0644); + +static local64_t uv_nmi_misses; +module_param_named(nmi_misses, uv_nmi_misses, local64, 0644); + +static local64_t uv_nmi_ping_count; +module_param_named(ping_count, uv_nmi_ping_count, local64, 0644); + +static local64_t uv_nmi_ping_misses; +module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644); + +/* + * Following values allow tuning for large systems under heavy loading + */ +static int uv_nmi_initial_delay = 100; +module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644); + +static int uv_nmi_slave_delay = 100; +module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644); + +static int uv_nmi_loop_delay = 100; +module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644); + +static int uv_nmi_trigger_delay = 10000; +module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644); + +static int uv_nmi_wait_count = 100; +module_param_named(wait_count, uv_nmi_wait_count, int, 0644); + +static int uv_nmi_retry_count = 500; +module_param_named(retry_count, uv_nmi_retry_count, int, 0644); + +static bool uv_pch_intr_enable = true; +static bool uv_pch_intr_now_enabled; +module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644); + +static bool uv_pch_init_enable = true; +module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644); + +static int uv_nmi_debug; +module_param_named(debug, uv_nmi_debug, int, 0644); + +#define nmi_debug(fmt, ...) \ + do { \ + if (uv_nmi_debug) \ + pr_info(fmt, ##__VA_ARGS__); \ + } while (0) + +/* Valid NMI Actions */ +enum action_t { + nmi_act_kdump, + nmi_act_dump, + nmi_act_ips, + nmi_act_kdb, + nmi_act_kgdb, + nmi_act_health, + nmi_act_max +}; + +static const char * const actions[nmi_act_max] = { + [nmi_act_kdump] = "kdump", + [nmi_act_dump] = "dump", + [nmi_act_ips] = "ips", + [nmi_act_kdb] = "kdb", + [nmi_act_kgdb] = "kgdb", + [nmi_act_health] = "health", +}; + +static const char * const actions_desc[nmi_act_max] = { + [nmi_act_kdump] = "do kernel crash dump", + [nmi_act_dump] = "dump process stack for each cpu", + [nmi_act_ips] = "dump Inst Ptr info for each cpu", + [nmi_act_kdb] = "enter KDB (needs kgdboc= assignment)", + [nmi_act_kgdb] = "enter KGDB (needs gdb target remote)", + [nmi_act_health] = "check if CPUs respond to NMI", +}; + +static enum action_t uv_nmi_action = nmi_act_dump; + +static int param_get_action(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%s\n", actions[uv_nmi_action]); +} + +static int param_set_action(const char *val, const struct kernel_param *kp) +{ + int i, n = ARRAY_SIZE(actions); + + i = sysfs_match_string(actions, val); + if (i >= 0) { + uv_nmi_action = i; + pr_info("UV: New NMI action:%s\n", actions[i]); + return 0; + } + + pr_err("UV: Invalid NMI action. Valid actions are:\n"); + for (i = 0; i < n; i++) + pr_err("UV: %-8s - %s\n", actions[i], actions_desc[i]); + + return -EINVAL; +} + +static const struct kernel_param_ops param_ops_action = { + .get = param_get_action, + .set = param_set_action, +}; +#define param_check_action(name, p) __param_check(name, p, enum action_t) + +module_param_named(action, uv_nmi_action, action, 0644); + +/* Setup which NMI support is present in system */ +static void uv_nmi_setup_mmrs(void) +{ + bool new_nmi_method_only = false; + + /* First determine arch specific MMRs to handshake with BIOS */ + if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { /* UV2,3,4 setup */ + uvh_nmi_mmrx = UVH_EVENT_OCCURRED0; + uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS; + uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT; + uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0"; + + uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST; + uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2; + uvh_nmi_mmrx_req_shift = 62; + + } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */ + uvh_nmi_mmrx = UVH_EVENT_OCCURRED1; + uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS; + uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT; + uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0"; + + new_nmi_method_only = true; /* Newer nmi always valid on UV5+ */ + uvh_nmi_mmrx_req = 0; /* no request bit to clear */ + + } else { + pr_err("UV:%s:NMI support not available on this system\n", __func__); + return; + } + + /* Then find out if new NMI is supported */ + if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) { + if (uvh_nmi_mmrx_req) + uv_write_local_mmr(uvh_nmi_mmrx_req, + 1UL << uvh_nmi_mmrx_req_shift); + nmi_mmr = uvh_nmi_mmrx; + nmi_mmr_clear = uvh_nmi_mmrx_clear; + nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift; + pr_info("UV: SMI NMI support: %s\n", uvh_nmi_mmrx_type); + } else { + nmi_mmr = UVH_NMI_MMR; + nmi_mmr_clear = UVH_NMI_MMR_CLEAR; + nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT; + pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE); + } +} + +/* Read NMI MMR and check if NMI flag was set by BMC. */ +static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi) +{ + hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr); + atomic_inc(&hub_nmi->read_mmr_count); + return !!(hub_nmi->nmi_value & nmi_mmr_pending); +} + +static inline void uv_local_mmr_clear_nmi(void) +{ + uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending); +} + +/* + * UV hubless NMI handler functions + */ +static inline void uv_reassert_nmi(void) +{ + /* (from arch/x86/include/asm/mach_traps.h) */ + outb(0x8f, NMI_CONTROL_PORT); + inb(NMI_DUMMY_PORT); /* dummy read */ + outb(0x0f, NMI_CONTROL_PORT); + inb(NMI_DUMMY_PORT); /* dummy read */ +} + +static void uv_init_hubless_pch_io(int offset, int mask, int data) +{ + int *addr = PCH_PCR_GPIO_ADDRESS(offset); + int readd = readl(addr); + + if (mask) { /* OR in new data */ + int writed = (readd & ~mask) | data; + + nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n", + addr, readd, ~mask, data, writed); + writel(writed, addr); + } else if (readd & data) { /* clear status bit */ + nmi_debug("UV:PCH: %p = %x\n", addr, data); + writel(data, addr); + } + + (void)readl(addr); /* flush write data */ +} + +static void uv_nmi_setup_hubless_intr(void) +{ + uv_pch_intr_now_enabled = uv_pch_intr_enable; + + uv_init_hubless_pch_io( + PAD_CFG_DW0_GPP_D_0, GPIROUTNMI, + uv_pch_intr_now_enabled ? GPIROUTNMI : 0); + + nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n", + uv_pch_intr_now_enabled ? "enabled" : "disabled"); +} + +static struct init_nmi { + unsigned int offset; + unsigned int mask; + unsigned int data; +} init_nmi[] = { + { /* HOSTSW_OWN_GPP_D_0 */ + .offset = 0x84, + .mask = 0x1, + .data = 0x0, /* ACPI Mode */ + }, + +/* Clear status: */ + { /* GPI_INT_STS_GPP_D_0 */ + .offset = 0x104, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_GPE_STS_GPP_D_0 */ + .offset = 0x124, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_SMI_STS_GPP_D_0 */ + .offset = 0x144, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_NMI_STS_GPP_D_0 */ + .offset = 0x164, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + +/* Disable interrupts: */ + { /* GPI_INT_EN_GPP_D_0 */ + .offset = 0x114, + .mask = 0x1, + .data = 0x0, /* Disable interrupt generation */ + }, + { /* GPI_GPE_EN_GPP_D_0 */ + .offset = 0x134, + .mask = 0x1, + .data = 0x0, /* Disable interrupt generation */ + }, + { /* GPI_SMI_EN_GPP_D_0 */ + .offset = 0x154, + .mask = 0x1, + .data = 0x0, /* Disable interrupt generation */ + }, + { /* GPI_NMI_EN_GPP_D_0 */ + .offset = 0x174, + .mask = 0x1, + .data = 0x0, /* Disable interrupt generation */ + }, + +/* Setup GPP_D_0 Pad Config: */ + { /* PAD_CFG_DW0_GPP_D_0 */ + .offset = 0x4c0, + .mask = 0xffffffff, + .data = 0x82020100, +/* + * 31:30 Pad Reset Config (PADRSTCFG): = 2h # PLTRST# (default) + * + * 29 RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly + * from RX buffer (default) + * + * 28 RX Raw Override to '1' (RXRAW1): = 0 # No Override + * + * 26:25 RX Level/Edge Configuration (RXEVCFG): + * = 0h # Level + * = 1h # Edge + * + * 23 RX Invert (RXINV): = 0 # No Inversion (signal active high) + * + * 20 GPIO Input Route IOxAPIC (GPIROUTIOXAPIC): + * = 0 # Routing does not cause peripheral IRQ... + * # (we want an NMI not an IRQ) + * + * 19 GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI. + * 18 GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI. + * 17 GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI. + * + * 11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad. + * 9 GPIO RX Disable (GPIORXDIS): + * = 0 # Enable the input buffer (active low enable) + * + * 8 GPIO TX Disable (GPIOTXDIS): + * = 1 # Disable the output buffer; i.e. Hi-Z + * + * 1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state.. + * 0 GPIO TX State (GPIOTXSTATE): + * = 0 # (Leave at default) + */ + }, + +/* Pad Config DW1 */ + { /* PAD_CFG_DW1_GPP_D_0 */ + .offset = 0x4c4, + .mask = 0x3c00, + .data = 0, /* Termination = none (default) */ + }, +}; + +static void uv_init_hubless_pch_d0(void) +{ + int i, read; + + read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0); + if (read != 0) { + pr_info("UV: Hubless NMI already configured\n"); + return; + } + + nmi_debug("UV: Initializing UV Hubless NMI on PCH\n"); + for (i = 0; i < ARRAY_SIZE(init_nmi); i++) { + uv_init_hubless_pch_io(init_nmi[i].offset, + init_nmi[i].mask, + init_nmi[i].data); + } +} + +static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi) +{ + int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0); + int status = *pstat; + + hub_nmi->nmi_value = status; + atomic_inc(&hub_nmi->read_mmr_count); + + if (!(status & STS_GPP_D_0_MASK)) /* Not a UV external NMI */ + return 0; + + *pstat = STS_GPP_D_0_MASK; /* Is a UV NMI: clear GPP_D_0 status */ + (void)*pstat; /* Flush write */ + + return 1; +} + +static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi) +{ + if (hub_nmi->hub_present) + return uv_nmi_test_mmr(hub_nmi); + + if (hub_nmi->pch_owner) /* Only PCH owner can check status */ + return uv_nmi_test_hubless(hub_nmi); + + return -1; +} + +/* + * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and + * return true. If first CPU in on the system, set global "in_nmi" flag. + */ +static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi) +{ + int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1); + + if (first) { + atomic_set(&hub_nmi->cpu_owner, cpu); + if (atomic_add_unless(&uv_in_nmi, 1, 1)) + atomic_set(&uv_nmi_cpu, cpu); + + atomic_inc(&hub_nmi->nmi_count); + } + return first; +} + +/* Check if this is a system NMI event */ +static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) +{ + int cpu = smp_processor_id(); + int nmi = 0; + int nmi_detected = 0; + + local64_inc(&uv_nmi_count); + this_cpu_inc(uv_cpu_nmi.queries); + + do { + nmi = atomic_read(&hub_nmi->in_nmi); + if (nmi) + break; + + if (raw_spin_trylock(&hub_nmi->nmi_lock)) { + nmi_detected = uv_test_nmi(hub_nmi); + + /* Check flag for UV external NMI */ + if (nmi_detected > 0) { + uv_set_in_nmi(cpu, hub_nmi); + nmi = 1; + break; + } + + /* A non-PCH node in a hubless system waits for NMI */ + else if (nmi_detected < 0) + goto slave_wait; + + /* MMR/PCH NMI flag is clear */ + raw_spin_unlock(&hub_nmi->nmi_lock); + + } else { + + /* Wait a moment for the HUB NMI locker to set flag */ +slave_wait: cpu_relax(); + udelay(uv_nmi_slave_delay); + + /* Re-check hub in_nmi flag */ + nmi = atomic_read(&hub_nmi->in_nmi); + if (nmi) + break; + } + + /* + * Check if this BMC missed setting the MMR NMI flag (or) + * UV hubless system where only PCH owner can check flag + */ + if (!nmi) { + nmi = atomic_read(&uv_in_nmi); + if (nmi) + uv_set_in_nmi(cpu, hub_nmi); + } + + /* If we're holding the hub lock, release it now */ + if (nmi_detected < 0) + raw_spin_unlock(&hub_nmi->nmi_lock); + + } while (0); + + if (!nmi) + local64_inc(&uv_nmi_misses); + + return nmi; +} + +/* Need to reset the NMI MMR register, but only once per hub. */ +static inline void uv_clear_nmi(int cpu) +{ + struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; + + if (cpu == atomic_read(&hub_nmi->cpu_owner)) { + atomic_set(&hub_nmi->cpu_owner, -1); + atomic_set(&hub_nmi->in_nmi, 0); + if (hub_nmi->hub_present) + uv_local_mmr_clear_nmi(); + else + uv_reassert_nmi(); + raw_spin_unlock(&hub_nmi->nmi_lock); + } +} + +/* Ping non-responding CPU's attempting to force them into the NMI handler */ +static void uv_nmi_nr_cpus_ping(void) +{ + int cpu; + + for_each_cpu(cpu, uv_nmi_cpu_mask) + uv_cpu_nmi_per(cpu).pinging = 1; + + __apic_send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI); +} + +/* Clean up flags for CPU's that ignored both NMI and ping */ +static void uv_nmi_cleanup_mask(void) +{ + int cpu; + + for_each_cpu(cpu, uv_nmi_cpu_mask) { + uv_cpu_nmi_per(cpu).pinging = 0; + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT; + cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); + } +} + +/* Loop waiting as CPU's enter NMI handler */ +static int uv_nmi_wait_cpus(int first) +{ + int i, j, k, n = num_online_cpus(); + int last_k = 0, waiting = 0; + int cpu = smp_processor_id(); + + if (first) { + cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask); + k = 0; + } else { + k = n - cpumask_weight(uv_nmi_cpu_mask); + } + + /* PCH NMI causes only one CPU to respond */ + if (first && uv_pch_intr_now_enabled) { + cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); + return n - k - 1; + } + + udelay(uv_nmi_initial_delay); + for (i = 0; i < uv_nmi_retry_count; i++) { + int loop_delay = uv_nmi_loop_delay; + + for_each_cpu(j, uv_nmi_cpu_mask) { + if (uv_cpu_nmi_per(j).state) { + cpumask_clear_cpu(j, uv_nmi_cpu_mask); + if (++k >= n) + break; + } + } + if (k >= n) { /* all in? */ + k = n; + break; + } + if (last_k != k) { /* abort if no new CPU's coming in */ + last_k = k; + waiting = 0; + } else if (++waiting > uv_nmi_wait_count) + break; + + /* Extend delay if waiting only for CPU 0: */ + if (waiting && (n - k) == 1 && + cpumask_test_cpu(0, uv_nmi_cpu_mask)) + loop_delay *= 100; + + udelay(loop_delay); + } + atomic_set(&uv_nmi_cpus_in_nmi, k); + return n - k; +} + +/* Wait until all slave CPU's have entered UV NMI handler */ +static void uv_nmi_wait(int master) +{ + /* Indicate this CPU is in: */ + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN); + + /* If not the first CPU in (the master), then we are a slave CPU */ + if (!master) + return; + + do { + /* Wait for all other CPU's to gather here */ + if (!uv_nmi_wait_cpus(1)) + break; + + /* If not all made it in, send IPI NMI to them */ + pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n", + cpumask_weight(uv_nmi_cpu_mask), + cpumask_pr_args(uv_nmi_cpu_mask)); + + uv_nmi_nr_cpus_ping(); + + /* If all CPU's are in, then done */ + if (!uv_nmi_wait_cpus(0)) + break; + + pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n", + cpumask_weight(uv_nmi_cpu_mask), + cpumask_pr_args(uv_nmi_cpu_mask)); + } while (0); + + pr_alert("UV: %d of %d CPUs in NMI\n", + atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus()); +} + +/* Dump Instruction Pointer header */ +static void uv_nmi_dump_cpu_ip_hdr(void) +{ + pr_info("\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n", + "CPU", "PID", "COMMAND", "IP"); +} + +/* Dump Instruction Pointer info */ +static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs) +{ + pr_info("UV: %4d %6d %-32.32s %pS", + cpu, current->pid, current->comm, (void *)regs->ip); +} + +/* + * Dump this CPU's state. If action was set to "kdump" and the crash_kexec + * failed, then we provide "dump" as an alternate action. Action "dump" now + * also includes the show "ips" (instruction pointers) action whereas the + * action "ips" only displays instruction pointers for the non-idle CPU's. + * This is an abbreviated form of the "ps" command. + */ +static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) +{ + const char *dots = " ................................. "; + + if (cpu == 0) + uv_nmi_dump_cpu_ip_hdr(); + + if (current->pid != 0 || uv_nmi_action != nmi_act_ips) + uv_nmi_dump_cpu_ip(cpu, regs); + + if (uv_nmi_action == nmi_act_dump) { + pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu); + show_regs(regs); + } + + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); +} + +/* Trigger a slave CPU to dump its state */ +static void uv_nmi_trigger_dump(int cpu) +{ + int retry = uv_nmi_trigger_delay; + + if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN) + return; + + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP; + do { + cpu_relax(); + udelay(10); + if (uv_cpu_nmi_per(cpu).state + != UV_NMI_STATE_DUMP) + return; + } while (--retry > 0); + + pr_crit("UV: CPU %d stuck in process dump function\n", cpu); + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE; +} + +/* Wait until all CPU's ready to exit */ +static void uv_nmi_sync_exit(int master) +{ + atomic_dec(&uv_nmi_cpus_in_nmi); + if (master) { + while (atomic_read(&uv_nmi_cpus_in_nmi) > 0) + cpu_relax(); + atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR); + } else { + while (atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + } +} + +/* Current "health" check is to check which CPU's are responsive */ +static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master) +{ + if (master) { + int in = atomic_read(&uv_nmi_cpus_in_nmi); + int out = num_online_cpus() - in; + + pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out); + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + } else { + while (!atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + } + uv_nmi_sync_exit(master); +} + +/* Walk through CPU list and dump state of each */ +static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) +{ + if (master) { + int tcpu; + int ignored = 0; + int saved_console_loglevel = console_loglevel; + + pr_alert("UV: tracing %s for %d CPUs from CPU %d\n", + uv_nmi_action == nmi_act_ips ? "IPs" : "processes", + atomic_read(&uv_nmi_cpus_in_nmi), cpu); + + console_loglevel = uv_nmi_loglevel; + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + for_each_online_cpu(tcpu) { + if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask)) + ignored++; + else if (tcpu == cpu) + uv_nmi_dump_state_cpu(tcpu, regs); + else + uv_nmi_trigger_dump(tcpu); + } + if (ignored) + pr_alert("UV: %d CPUs ignored NMI\n", ignored); + + console_loglevel = saved_console_loglevel; + pr_alert("UV: process trace complete\n"); + } else { + while (!atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) + cpu_relax(); + uv_nmi_dump_state_cpu(cpu, regs); + } + uv_nmi_sync_exit(master); +} + +static void uv_nmi_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); +} + +static void uv_nmi_kdump(int cpu, int main, struct pt_regs *regs) +{ + /* Check if kdump kernel loaded for both main and secondary CPUs */ + if (!kexec_crash_image) { + if (main) + pr_err("UV: NMI error: kdump kernel not loaded\n"); + return; + } + + /* Call crash to dump system state */ + if (main) { + pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu); + crash_kexec(regs); + + pr_emerg("UV: crash_kexec unexpectedly returned\n"); + atomic_set(&uv_nmi_kexec_failed, 1); + + } else { /* secondary */ + + /* If kdump kernel fails, secondaries will exit this loop */ + while (atomic_read(&uv_nmi_kexec_failed) == 0) { + + /* Once shootdown cpus starts, they do not return */ + run_crash_ipi_callback(regs); + + mdelay(10); + } + } +} + +#ifdef CONFIG_KGDB +#ifdef CONFIG_KGDB_KDB +static inline int uv_nmi_kdb_reason(void) +{ + return KDB_REASON_SYSTEM_NMI; +} +#else /* !CONFIG_KGDB_KDB */ +static inline int uv_nmi_kdb_reason(void) +{ + /* Ensure user is expecting to attach gdb remote */ + if (uv_nmi_action == nmi_act_kgdb) + return 0; + + pr_err("UV: NMI error: KDB is not enabled in this kernel\n"); + return -1; +} +#endif /* CONFIG_KGDB_KDB */ + +/* + * Call KGDB/KDB from NMI handler + * + * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or + * 'kdb' has no affect on which is used. See the KGDB documentation for further + * information. + */ +static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) +{ + if (master) { + int reason = uv_nmi_kdb_reason(); + int ret; + + if (reason < 0) + return; + + /* Call KGDB NMI handler as MASTER */ + ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason, + &uv_nmi_slave_continue); + if (ret) { + pr_alert("KGDB returned error, is kgdboc set?\n"); + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + } + } else { + /* Wait for KGDB signal that it's ready for slaves to enter */ + int sig; + + do { + cpu_relax(); + sig = atomic_read(&uv_nmi_slave_continue); + } while (!sig); + + /* Call KGDB as slave */ + if (sig == SLAVE_CONTINUE) + kgdb_nmicallback(cpu, regs); + } + uv_nmi_sync_exit(master); +} + +#else /* !CONFIG_KGDB */ +static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) +{ + pr_err("UV: NMI error: KGDB is not enabled in this kernel\n"); +} +#endif /* !CONFIG_KGDB */ + +/* + * UV NMI handler + */ +static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) +{ + struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; + int cpu = smp_processor_id(); + int master = 0; + unsigned long flags; + + local_irq_save(flags); + + /* If not a UV System NMI, ignore */ + if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { + local_irq_restore(flags); + return NMI_DONE; + } + + /* Indicate we are the first CPU into the NMI handler */ + master = (atomic_read(&uv_nmi_cpu) == cpu); + + /* If NMI action is "kdump", then attempt to do it */ + if (uv_nmi_action == nmi_act_kdump) { + uv_nmi_kdump(cpu, master, regs); + + /* Unexpected return, revert action to "dump" */ + if (master) + uv_nmi_action = nmi_act_dump; + } + + /* Pause as all CPU's enter the NMI handler */ + uv_nmi_wait(master); + + /* Process actions other than "kdump": */ + switch (uv_nmi_action) { + case nmi_act_health: + uv_nmi_action_health(cpu, regs, master); + break; + case nmi_act_ips: + case nmi_act_dump: + uv_nmi_dump_state(cpu, regs, master); + break; + case nmi_act_kdb: + case nmi_act_kgdb: + uv_call_kgdb_kdb(cpu, regs, master); + break; + default: + if (master) + pr_alert("UV: unknown NMI action: %d\n", uv_nmi_action); + uv_nmi_sync_exit(master); + break; + } + + /* Clear per_cpu "in_nmi" flag */ + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT); + + /* Clear MMR NMI flag on each hub */ + uv_clear_nmi(cpu); + + /* Clear global flags */ + if (master) { + if (!cpumask_empty(uv_nmi_cpu_mask)) + uv_nmi_cleanup_mask(); + atomic_set(&uv_nmi_cpus_in_nmi, -1); + atomic_set(&uv_nmi_cpu, -1); + atomic_set(&uv_in_nmi, 0); + atomic_set(&uv_nmi_kexec_failed, 0); + atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR); + } + + uv_nmi_touch_watchdogs(); + local_irq_restore(flags); + + return NMI_HANDLED; +} + +/* + * NMI handler for pulling in CPU's when perf events are grabbing our NMI + */ +static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) +{ + int ret; + + this_cpu_inc(uv_cpu_nmi.queries); + if (!this_cpu_read(uv_cpu_nmi.pinging)) { + local64_inc(&uv_nmi_ping_misses); + return NMI_DONE; + } + + this_cpu_inc(uv_cpu_nmi.pings); + local64_inc(&uv_nmi_ping_count); + ret = uv_handle_nmi(reason, regs); + this_cpu_write(uv_cpu_nmi.pinging, 0); + return ret; +} + +static void uv_register_nmi_notifier(void) +{ + if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) + pr_warn("UV: NMI handler failed to register\n"); + + if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping")) + pr_warn("UV: PING NMI handler failed to register\n"); +} + +void uv_nmi_init(void) +{ + unsigned int value; + + /* + * Unmask NMI on all CPU's + */ + value = apic_read(APIC_LVT1) | APIC_DM_NMI; + value &= ~APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); +} + +/* Setup HUB NMI info */ +static void __init uv_nmi_setup_common(bool hubbed) +{ + int size = sizeof(void *) * (1 << NODES_SHIFT); + int cpu; + + uv_hub_nmi_list = kzalloc(size, GFP_KERNEL); + nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size); + BUG_ON(!uv_hub_nmi_list); + size = sizeof(struct uv_hub_nmi_s); + for_each_present_cpu(cpu) { + int nid = cpu_to_node(cpu); + if (uv_hub_nmi_list[nid] == NULL) { + uv_hub_nmi_list[nid] = kzalloc_node(size, + GFP_KERNEL, nid); + BUG_ON(!uv_hub_nmi_list[nid]); + raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock)); + atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1); + uv_hub_nmi_list[nid]->hub_present = hubbed; + uv_hub_nmi_list[nid]->pch_owner = (nid == 0); + } + uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; + } + BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); +} + +/* Setup for UV Hub systems */ +void __init uv_nmi_setup(void) +{ + uv_nmi_setup_mmrs(); + uv_nmi_setup_common(true); + uv_register_nmi_notifier(); + pr_info("UV: Hub NMI enabled\n"); +} + +/* Setup for UV Hubless systems */ +void __init uv_nmi_setup_hubless(void) +{ + uv_nmi_setup_common(false); + pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE); + nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n", + pch_base, PCH_PCR_GPIO_1_BASE); + if (uv_pch_init_enable) + uv_init_hubless_pch_d0(); + uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0, + STS_GPP_D_0_MASK, STS_GPP_D_0_MASK); + uv_nmi_setup_hubless_intr(); + /* Ensure NMI enabled in Processor Interface Reg: */ + uv_reassert_nmi(); + uv_register_nmi_notifier(); + pr_info("UV: PCH NMI enabled\n"); +} diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c deleted file mode 100644 index 5d4ba301e776..000000000000 --- a/arch/x86/platform/uv/uv_sysfs.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson - */ - -#include <linux/device.h> -#include <asm/uv/bios.h> -#include <asm/uv/uv.h> - -struct kobject *sgi_uv_kobj; - -static ssize_t partition_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); -} - -static ssize_t coherence_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); -} - -static struct kobj_attribute partition_id_attr = - __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); - -static struct kobj_attribute coherence_id_attr = - __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); - - -static int __init sgi_uv_sysfs_init(void) -{ - unsigned long ret; - - if (!is_uv_system()) - return -ENODEV; - - if (!sgi_uv_kobj) - sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); - if (!sgi_uv_kobj) { - printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); - return -EINVAL; - } - - ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); - return ret; - } - - ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); - return ret; - } - - return 0; -} - -device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 5c86786bbfd2..3712afc3534d 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * SGI RTC clock/timer routines. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved. * Copyright (c) Dimitri Sivanich */ @@ -30,28 +18,27 @@ #define RTC_NAME "sgi_rtc" -static cycle_t uv_read_rtc(struct clocksource *cs); +static u64 uv_read_rtc(struct clocksource *cs); static int uv_rtc_next_event(unsigned long, struct clock_event_device *); -static void uv_rtc_timer_setup(enum clock_event_mode, - struct clock_event_device *); +static int uv_rtc_shutdown(struct clock_event_device *evt); static struct clocksource clocksource_uv = { .name = RTC_NAME, .rating = 299, .read = uv_read_rtc, - .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, + .mask = (u64)UVH_RTC_REAL_TIME_CLOCK_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static struct clock_event_device clock_event_device_uv = { - .name = RTC_NAME, - .features = CLOCK_EVT_FEAT_ONESHOT, - .shift = 20, - .rating = 400, - .irq = -1, - .set_next_event = uv_rtc_next_event, - .set_mode = uv_rtc_timer_setup, - .event_handler = NULL, + .name = RTC_NAME, + .features = CLOCK_EVT_FEAT_ONESHOT, + .shift = 20, + .rating = 400, + .irq = -1, + .set_next_event = uv_rtc_next_event, + .set_state_shutdown = uv_rtc_shutdown, + .event_handler = NULL, }; static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); @@ -66,7 +53,7 @@ struct uv_rtc_timer_head { struct { int lcpu; /* systemwide logical cpu number */ u64 expires; /* next timer expiration for this cpu */ - } cpu[1]; + } cpu[] __counted_by(ncpus); }; /* @@ -88,7 +75,6 @@ static void uv_rtc_send_IPI(int cpu) apicid = cpu_physical_id(cpu); pnode = uv_apicid_to_pnode(apicid); - apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); @@ -99,32 +85,23 @@ static void uv_rtc_send_IPI(int cpu) /* Check for an RTC interrupt pending */ static int uv_intr_pending(int pnode) { - if (is_uv1_hub()) - return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & - UV1H_EVENT_OCCURRED0_RTC1_MASK; - else if (is_uvx_hub()) - return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) & - UVXH_EVENT_OCCURRED2_RTC_1_MASK; - return 0; + return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED2) & + UVH_EVENT_OCCURRED2_RTC_1_MASK; } /* Setup interrupt and return non-zero if early expiration occurred. */ static int uv_setup_intr(int cpu, u64 expires) { u64 val; - unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; + unsigned long apicid = cpu_physical_id(cpu); int pnode = uv_cpu_to_pnode(cpu); uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, UVH_RTC1_INT_CONFIG_M_MASK); uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); - if (is_uv1_hub()) - uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, - UV1H_EVENT_OCCURRED0_RTC1_MASK); - else - uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS, - UVXH_EVENT_OCCURRED2_RTC_1_MASK); + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED2_ALIAS, + UVH_EVENT_OCCURRED2_RTC_1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); @@ -159,20 +136,19 @@ static __init int uv_rtc_allocate_timers(void) { int cpu; - blade_info = kzalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); + blade_info = kcalloc(uv_possible_blades, sizeof(void *), GFP_KERNEL); if (!blade_info) return -ENOMEM; for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); int bid = uv_cpu_to_blade_id(cpu); - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; if (!head) { - head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + - (uv_blade_nr_possible_cpus(bid) * - 2 * sizeof(u64)), + head = kmalloc_node(struct_size(head, cpu, + uv_blade_nr_possible_cpus(bid)), GFP_KERNEL, nid); if (!head) { uv_rtc_deallocate_timers(); @@ -227,7 +203,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) int pnode = uv_cpu_to_pnode(cpu); int bid = uv_cpu_to_blade_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); u64 *t = &head->cpu[bcpu].expires; unsigned long flags; int next_cpu; @@ -263,7 +239,7 @@ static int uv_rtc_unset_timer(int cpu, int force) int pnode = uv_cpu_to_pnode(cpu); int bid = uv_cpu_to_blade_id(cpu); struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + int bcpu = uv_cpu_blade_processor_id(cpu); u64 *t = &head->cpu[bcpu].expires; unsigned long flags; int rc = 0; @@ -294,10 +270,10 @@ static int uv_rtc_unset_timer(int cpu, int force) * Read the RTC. * * Starting with HUB rev 2.0, the UV RTC register is replicated across all - * cachelines of it's own page. This allows faster simultaneous reads + * cachelines of its own page. This allows faster simultaneous reads * from a given socket. */ -static cycle_t uv_read_rtc(struct clocksource *cs) +static u64 uv_read_rtc(struct clocksource *cs) { unsigned long offset; @@ -306,7 +282,7 @@ static cycle_t uv_read_rtc(struct clocksource *cs) else offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + return (u64)uv_read_local_mmr(UVH_RTC | offset); } /* @@ -321,24 +297,14 @@ static int uv_rtc_next_event(unsigned long delta, } /* - * Setup the RTC timer in oneshot mode + * Shutdown the RTC timer */ -static void uv_rtc_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) +static int uv_rtc_shutdown(struct clock_event_device *evt) { int ced_cpu = cpumask_first(evt->cpumask); - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here yet */ - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - uv_rtc_unset_timer(ced_cpu, 1); - break; - } + uv_rtc_unset_timer(ced_cpu, 1); + return 0; } static void uv_rtc_interrupt(void) @@ -365,7 +331,7 @@ __setup("uvrtcevt", uv_enable_evt_rtc); static __init void uv_rtc_register_clockevents(struct work_struct *dummy) { - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); + struct clock_event_device *ced = this_cpu_ptr(&cpu_ced); *ced = clock_event_device_uv; ced->cpumask = cpumask_of(smp_processor_id()); @@ -401,9 +367,11 @@ static __init int uv_rtc_setup_clock(void) clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / sn_rtc_cycles_per_second; + clock_event_device_uv.min_delta_ticks = 1; clock_event_device_uv.max_delta_ns = clocksource_uv.mask * (NSEC_PER_SEC / sn_rtc_cycles_per_second); + clock_event_device_uv.max_delta_ticks = clocksource_uv.mask; rc = schedule_on_each_cpu(uv_rtc_register_clockevents); if (rc) { diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile deleted file mode 100644 index 91bc17ab2fd5..000000000000 --- a/arch/x86/platform/visws/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c deleted file mode 100644 index 94d8a39332ec..000000000000 --- a/arch/x86/platform/visws/visws_quirks.c +++ /dev/null @@ -1,608 +0,0 @@ -/* - * SGI Visual Workstation support and quirks, unmaintained. - * - * Split out from setup.c by davej@suse.de - * - * Copyright (C) 1999 Bent Hagemark, Ingo Molnar - * - * SGI Visual Workstation interrupt controller - * - * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC - * which serves as the main interrupt controller in the system. Non-legacy - * hardware in the system uses this controller directly. Legacy devices - * are connected to the PIIX4 which in turn has its 8259(s) connected to - * a of the Cobalt APIC entry. - * - * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com - * - * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> - */ -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> - -#include <asm/visws/cobalt.h> -#include <asm/visws/piix4.h> -#include <asm/io_apic.h> -#include <asm/fixmap.h> -#include <asm/reboot.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/time.h> -#include <asm/io.h> - -#include <linux/kernel_stat.h> - -#include <asm/i8259.h> -#include <asm/irq_vectors.h> -#include <asm/visws/lithium.h> - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/pci_ids.h> - -extern int no_broadcast; - -char visws_board_type = -1; -char visws_board_rev = -1; - -static void __init visws_time_init(void) -{ - printk(KERN_INFO "Starting Cobalt Timer system clock\n"); - - /* Set the countdown value */ - co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); - - /* Start the timer */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); - - /* Enable (unmask) the timer interrupt */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); - - setup_default_timer_irq(); -} - -/* Replaces the default init_ISA_irqs in the generic setup */ -static void __init visws_pre_intr_init(void); - -/* Quirk for machine specific memory setup. */ - -#define MB (1024 * 1024) - -unsigned long sgivwfb_mem_phys; -unsigned long sgivwfb_mem_size; -EXPORT_SYMBOL(sgivwfb_mem_phys); -EXPORT_SYMBOL(sgivwfb_mem_size); - -long long mem_size __initdata = 0; - -static char * __init visws_memory_setup(void) -{ - long long gfx_mem_size = 8 * MB; - - mem_size = boot_params.alt_mem_k; - - if (!mem_size) { - printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); - mem_size = 128 * MB; - } - - /* - * this hardcodes the graphics memory to 8 MB - * it really should be sized dynamically (or at least - * set as a boot param) - */ - if (!sgivwfb_mem_size) { - printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); - sgivwfb_mem_size = 8 * MB; - } - - /* - * Trim to nearest MB - */ - sgivwfb_mem_size &= ~((1 << 20) - 1); - sgivwfb_mem_phys = mem_size - gfx_mem_size; - - e820_add_region(0, LOWMEMSIZE(), E820_RAM); - e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); - - return "PROM"; -} - -static void visws_machine_emergency_restart(void) -{ - /* - * Visual Workstations restart after this - * register is poked on the PIIX4 - */ - outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); -} - -static void visws_machine_power_off(void) -{ - unsigned short pm_status; -/* extern unsigned int pci_bus0; */ - - while ((pm_status = inw(PMSTS_PORT)) & 0x100) - outw(pm_status, PMSTS_PORT); - - outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); - - mdelay(10); - -#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) - -/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ - outl(PIIX_SPECIAL_STOP, 0xCFC); -} - -static void __init visws_get_smp_config(unsigned int early) -{ -} - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesn't have a BIOS(-configuration table). - * No problem for Linux. - */ - -static void __init MP_processor_info(struct mpc_cpu *m) -{ - int ver, logical_apicid; - physid_mask_t apic_cpus; - - if (!(m->cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->apicid; - printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", - m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); - - if (m->cpuflag & CPU_BOOTPROCESSOR) - boot_cpu_physical_apicid = m->apicid; - - ver = m->apicver; - if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", - m->apicid, MAX_LOCAL_APIC); - return; - } - - apic->apicid_to_cpu_present(m->apicid, &apic_cpus); - physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->apicid); - ver = 0x10; - } - apic_version[m->apicid] = ver; -} - -static void __init visws_find_smp_config(void) -{ - struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); - unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); - - if (ncpus > CO_CPU_MAX) { - printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", - ncpus, mp); - - ncpus = CO_CPU_MAX; - } - - if (ncpus > setup_max_cpus) - ncpus = setup_max_cpus; - -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 1; -#endif - while (ncpus--) - MP_processor_info(mp++); - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -static void visws_trap_init(void); - -void __init visws_early_detect(void) -{ - int raw; - - visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) - >> PIIX_GPI_BD_SHIFT; - - if (visws_board_type < 0) - return; - - /* - * Override the default platform setup functions - */ - x86_init.resources.memory_setup = visws_memory_setup; - x86_init.mpparse.get_smp_config = visws_get_smp_config; - x86_init.mpparse.find_smp_config = visws_find_smp_config; - x86_init.irqs.pre_vector_init = visws_pre_intr_init; - x86_init.irqs.trap_init = visws_trap_init; - x86_init.timers.timer_init = visws_time_init; - x86_init.pci.init = pci_visws_init; - x86_init.pci.init_irq = x86_init_noop; - - /* - * Install reboot quirks: - */ - pm_power_off = visws_machine_power_off; - machine_ops.emergency_restart = visws_machine_emergency_restart; - - /* - * Do not use broadcast IPIs: - */ - no_broadcast = 0; - -#ifdef CONFIG_X86_IO_APIC - /* - * Turn off IO-APIC detection and initialization: - */ - skip_ioapic_setup = 1; -#endif - - /* - * Get Board rev. - * First, we have to initialize the 307 part to allow us access - * to the GPIO registers. Let's map them at 0x0fc0 which is right - * after the PIIX4 PM section. - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable GPIO registers. */ - - /* - * Now, we have to map the power management section to write - * a bit which enables access to the GPIO registers. - * What lunatic came up with this shit? - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable PM registers. */ - - /* - * Now, write the PM register which enables the GPIO registers. - */ - outb_p(SIO_PM_FER2, SIO_PM_INDEX); - outb_p(SIO_PM_GP_EN, SIO_PM_DATA); - - /* - * Now, initialize the GPIO registers. - * We want them all to be inputs which is the - * power on default, so let's leave them alone. - * So, let's just read the board rev! - */ - raw = inb_p(SIO_GP_DATA1); - raw &= 0x7f; /* 7 bits of valid board revision ID. */ - - if (visws_board_type == VISWS_320) { - if (raw < 0x6) { - visws_board_rev = 4; - } else if (raw < 0xc) { - visws_board_rev = 5; - } else { - visws_board_rev = 6; - } - } else if (visws_board_type == VISWS_540) { - visws_board_rev = 2; - } else { - visws_board_rev = raw; - } - - printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", - (visws_board_type == VISWS_320 ? "320" : - (visws_board_type == VISWS_540 ? "540" : - "unknown")), visws_board_rev); -} - -#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) -#define BCD (LI_INTB | LI_INTC | LI_INTD) -#define ALLDEVS (A01234 | BCD) - -static __init void lithium_init(void) -{ - set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); - set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); - - if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - li_pcia_write16(LI_PCI_INTEN, ALLDEVS); - li_pcib_write16(LI_PCI_INTEN, ALLDEVS); -} - -static __init void cobalt_init(void) -{ - /* - * On normal SMP PC this is used only with SMP, but we have to - * use it and set it up here to start the Cobalt clock - */ - set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - setup_local_APIC(); - printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", - (unsigned int)apic_read(APIC_LVR), - (unsigned int)apic_read(APIC_ID)); - - set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); - set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); - printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", - co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); - - /* Enable Cobalt APIC being careful to NOT change the ID! */ - co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); - - printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", - co_apic_read(CO_APIC_ID)); -} - -static void __init visws_trap_init(void) -{ - lithium_init(); - cobalt_init(); -} - -/* - * IRQ controller / APIC support: - */ - -static DEFINE_SPINLOCK(cobalt_lock); - -/* - * Set the given Cobalt APIC Redirection Table entry to point - * to the given IDT vector/index. - */ -static inline void co_apic_set(int entry, int irq) -{ - co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); - co_apic_write(CO_APIC_HI(entry), 0); -} - -/* - * Cobalt (IO)-APIC functions to handle PCI devices. - */ -static inline int co_apic_ide0_hack(void) -{ - extern char visws_board_type; - extern char visws_board_rev; - - if (visws_board_type == VISWS_320 && visws_board_rev == 5) - return 5; - return CO_APIC_IDE0; -} - -static int is_co_apic(unsigned int irq) -{ - if (IS_CO_APIC(irq)) - return CO_APIC(irq); - - switch (irq) { - case 0: return CO_APIC_CPU; - case CO_IRQ_IDE0: return co_apic_ide0_hack(); - case CO_IRQ_IDE1: return CO_APIC_IDE1; - default: return -1; - } -} - - -/* - * This is the SGI Cobalt (IO-)APIC: - */ -static void enable_cobalt_irq(struct irq_data *data) -{ - co_apic_set(is_co_apic(data->irq), data->irq); -} - -static void disable_cobalt_irq(struct irq_data *data) -{ - int entry = is_co_apic(data->irq); - - co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); - co_apic_read(CO_APIC_LO(entry)); -} - -static void ack_cobalt_irq(struct irq_data *data) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - disable_cobalt_irq(data); - apic_write(APIC_EOI, APIC_EOI_ACK); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip cobalt_irq_type = { - .name = "Cobalt-APIC", - .irq_enable = enable_cobalt_irq, - .irq_disable = disable_cobalt_irq, - .irq_ack = ack_cobalt_irq, -}; - - -/* - * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt - * -- not the manner expected by the code in i8259.c. - * - * there is a 'master' physical interrupt source that gets sent to - * the CPU. But in the chipset there are various 'virtual' interrupts - * waiting to be handled. We represent this to Linux through a 'master' - * interrupt controller type, and through a special virtual interrupt- - * controller. Device drivers only see the virtual interrupt sources. - */ -static unsigned int startup_piix4_master_irq(struct irq_data *data) -{ - legacy_pic->init(0); - enable_cobalt_irq(data); - return 0; -} - -static struct irq_chip piix4_master_irq_type = { - .name = "PIIX4-master", - .irq_startup = startup_piix4_master_irq, - .irq_ack = ack_cobalt_irq, -}; - -static void pii4_mask(struct irq_data *data) { } - -static struct irq_chip piix4_virtual_irq_type = { - .name = "PIIX4-virtual", - .irq_mask = pii4_mask, -}; - -/* - * PIIX4-8259 master/virtual functions to handle interrupt requests - * from legacy devices: floppy, parallel, serial, rtc. - * - * None of these get Cobalt APIC entries, neither do they have IDT - * entries. These interrupts are purely virtual and distributed from - * the 'master' interrupt source: CO_IRQ_8259. - * - * When the 8259 interrupts its handler figures out which of these - * devices is interrupting and dispatches to its handler. - * - * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ - * enable_irq gets the right irq. This 'master' irq is never directly - * manipulated by any driver. - */ -static irqreturn_t piix4_master_intr(int irq, void *dev_id) -{ - unsigned long flags; - int realirq; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - /* Find out what's interrupting in the PIIX4 master 8259 */ - outb(0x0c, 0x20); /* OCW3 Poll command */ - realirq = inb(0x20); - - /* - * Bit 7 == 0 means invalid/spurious - */ - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq &= 7; - - if (unlikely(realirq == 2)) { - outb(0x0c, 0xa0); - realirq = inb(0xa0); - - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq = (realirq & 7) + 8; - } - - /* mask and ack interrupt */ - cached_irq_mask |= 1 << realirq; - if (unlikely(realirq > 7)) { - inb(0xa1); - outb(cached_slave_mask, 0xa1); - outb(0x60 + (realirq & 7), 0xa0); - outb(0x60 + 2, 0x20); - } else { - inb(0x21); - outb(cached_master_mask, 0x21); - outb(0x60 + realirq, 0x20); - } - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - /* - * handle this 'virtual interrupt' as a Cobalt one now. - */ - generic_handle_irq(realirq); - - return IRQ_HANDLED; - -out_unlock: - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - return IRQ_NONE; -} - -static struct irqaction master_action = { - .handler = piix4_master_intr, - .name = "PIIX4-8259", - .flags = IRQF_NO_THREAD, -}; - -static struct irqaction cascade_action = { - .handler = no_action, - .name = "cascade", - .flags = IRQF_NO_THREAD, -}; - -static inline void set_piix4_virtual_irq_type(void) -{ - piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask; - piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask; - piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask; -} - -static void __init visws_pre_intr_init(void) -{ - int i; - - set_piix4_virtual_irq_type(); - - for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - struct irq_chip *chip = NULL; - - if (i == 0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE1) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_8259) - chip = &piix4_master_irq_type; - else if (i < CO_IRQ_APIC0) - chip = &piix4_virtual_irq_type; - else if (IS_CO_APIC(i)) - chip = &cobalt_irq_type; - - if (chip) - irq_set_chip(i, chip); - } - - setup_irq(CO_IRQ_8259, &master_action); - setup_irq(2, &cascade_action); -} |
