diff options
Diffstat (limited to 'arch')
36 files changed, 349 insertions, 178 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5c7adf100a58..9d5fd00d9e91 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -39,7 +39,7 @@ config ARC select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM - select PERF_USE_VMALLOC + select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index e114000a84f5..74d070cd3c13 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -16,6 +16,12 @@ ranges = <0x00000000 0x0 0xe0000000 0x10000000>; interrupt-parent = <&mb_intc>; + creg_rst: reset-controller@11220 { + compatible = "snps,axs10x-reset"; + #reset-cells = <1>; + reg = <0x11220 0x4>; + }; + i2sclk: i2sclk@100a0 { compatible = "snps,axs10x-i2s-pll-clock"; reg = <0x100a0 0x10>; @@ -73,6 +79,8 @@ clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; + resets = <&creg_rst 5>; + reset-names = "stmmaceth"; }; ehci@0x40000 { diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index b1c56d35f2a9..49bfbd879caa 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -11,12 +11,14 @@ /* Build Configuration Registers */ #define ARC_REG_AUX_DCCM 0x18 /* DCCM Base Addr ARCv2 */ +#define ARC_REG_ERP_CTRL 0x3F /* ARCv2 Error protection control */ #define ARC_REG_DCCM_BASE_BUILD 0x61 /* DCCM Base Addr ARCompact */ #define ARC_REG_CRC_BCR 0x62 #define ARC_REG_VECBASE_BCR 0x68 #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ +#define ARC_REG_ERP_BUILD 0xc7 /* ARCv2 Error protection Build: ECC/Parity */ #define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ #define ARC_REG_SLC_BCR 0xce #define ARC_REG_DCCM_BUILD 0x74 /* DCCM size (common) */ @@ -32,11 +34,14 @@ #define ARC_REG_D_UNCACH_BCR 0x6A #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 +#define ARC_REG_LPB_BUILD 0xE9 /* ARCv2 Loop Buffer Build */ #define ARC_REG_RTT_BCR 0xF2 #define ARC_REG_IRQ_BCR 0xF3 +#define ARC_REG_MICRO_ARCH_BCR 0xF9 /* ARCv2 Product revision */ #define ARC_REG_SMART_BCR 0xFF #define ARC_REG_CLUSTER_BCR 0xcf #define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */ +#define ARC_REG_LPB_CTRL 0x488 /* ARCv2 Loop Buffer control */ /* Common for ARCompact and ARCv2 status register */ #define ARC_REG_STATUS32 0x0A @@ -229,6 +234,32 @@ struct bcr_bpu_arcv2 { #endif }; +/* Error Protection Build: ECC/Parity */ +struct bcr_erp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:5, mmu:3, pad2:4, ic:3, dc:3, pad1:6, ver:8; +#else + unsigned int ver:8, pad1:6, dc:3, ic:3, pad2:4, mmu:3, pad3:5; +#endif +}; + +/* Error Protection Control */ +struct ctl_erp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:27, mpd:1, pad1:2, dpd:1, dpi:1; +#else + unsigned int dpi:1, dpd:1, pad1:2, mpd:1, pad2:27; +#endif +}; + +struct bcr_lpb { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:16, entries:8, ver:8; +#else + unsigned int ver:8, entries:8, pad:16; +#endif +}; + struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int info:24, ver:8; @@ -270,7 +301,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, - fpu_sp:1, fpu_dp:1, dual_iss_enb:1, dual_iss_exist:1, pad2:4, + fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 2ce24e74f879..8aec462d90fb 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -336,15 +336,12 @@ static int arc_pmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (__test_and_set_bit(idx, pmu_cpu->used_mask)) { - idx = find_first_zero_bit(pmu_cpu->used_mask, - arc_pmu->n_counters); - if (idx == arc_pmu->n_counters) - return -EAGAIN; - - __set_bit(idx, pmu_cpu->used_mask); - hwc->idx = idx; - } + idx = ffz(pmu_cpu->used_mask[0]); + if (idx == arc_pmu->n_counters) + return -EAGAIN; + + __set_bit(idx, pmu_cpu->used_mask); + hwc->idx = idx; write_aux_reg(ARC_REG_PCT_INDEX, idx); @@ -377,21 +374,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) struct perf_sample_data data; struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct pt_regs *regs; - int active_ints; + unsigned int active_ints; int idx; arc_pmu_disable(&arc_pmu->pmu); active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT); + if (!active_ints) + goto done; regs = get_irq_regs(); - for (idx = 0; idx < arc_pmu->n_counters; idx++) { - struct perf_event *event = pmu_cpu->act_counter[idx]; + do { + struct perf_event *event; struct hw_perf_event *hwc; - if (!(active_ints & (1 << idx))) - continue; + idx = __ffs(active_ints); /* Reset interrupt flag by writing of 1 */ write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); @@ -404,19 +402,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) write_aux_reg(ARC_REG_PCT_INT_CTRL, read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); + event = pmu_cpu->act_counter[idx]; hwc = &event->hw; WARN_ON_ONCE(hwc->idx != idx); arc_perf_event_update(event, &event->hw, event->hw.idx); perf_sample_data_init(&data, 0, hwc->last_period); - if (!arc_pmu_event_set_period(event)) - continue; + if (arc_pmu_event_set_period(event)) { + if (perf_event_overflow(event, &data, regs)) + arc_pmu_stop(event, 0); + } - if (perf_event_overflow(event, &data, regs)) - arc_pmu_stop(event, 0); - } + active_ints &= ~(1U << idx); + } while (active_ints); +done: arc_pmu_enable(&arc_pmu->pmu); return IRQ_HANDLED; @@ -461,6 +462,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) pr_err("This core does not have performance counters!\n"); return -ENODEV; } + BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32); BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS); READ_BCR(ARC_REG_CC_BUILD, cc_bcr); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index fb83844daeea..7ef7d9a8ff89 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,8 +199,10 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_iss_exist = 1; - cpu->extn.dual_iss_enb = exec_ctrl & 1; + cpu->extn.dual_enb = exec_ctrl & 1; + + /* dual issue always present for this core */ + cpu->extn.dual = 1; } } @@ -253,7 +255,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), - IS_AVAIL3(cpu->extn.dual_iss_exist, cpu->extn.dual_iss_enb, " Dual-Issue")); + IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue ")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), @@ -293,11 +295,26 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, - "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n", + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d", IS_AVAIL1(cpu->bpu.full, "full"), IS_AVAIL1(!cpu->bpu.full, "partial"), cpu->bpu.num_cache, cpu->bpu.num_pred); + if (is_isa_arcv2()) { + struct bcr_lpb lpb; + + READ_BCR(ARC_REG_LPB_BUILD, lpb); + if (lpb.ver) { + unsigned int ctl; + ctl = read_aux_reg(ARC_REG_LPB_CTRL); + + n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s", + lpb.entries, + IS_DISABLED_RUN(!ctl)); + } + } + + n += scnprintf(buf + n, len - n, "\n"); return buf; } @@ -326,6 +343,24 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); + if (is_isa_arcv2()) { + + /* Error Protection: ECC/Parity */ + struct bcr_erp erp; + READ_BCR(ARC_REG_ERP_BUILD, erp); + + if (erp.ver) { + struct ctl_erp ctl; + READ_BCR(ARC_REG_ERP_CTRL, ctl); + + /* inverted bits: 0 means enabled */ + n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n", + IS_AVAIL3(erp.ic, !ctl.dpi, "IC "), + IS_AVAIL3(erp.dc, !ctl.dpd, "DC "), + IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU ")); + } + } + n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n", EF_ARC_OSABI_CURRENT >> 8, EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ? diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 8ceefbf72fb0..4097764fea23 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -762,21 +762,23 @@ void read_decode_mmu_bcr(void) tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); - if (mmu->ver <= 2) { - mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(0x2000); - mmu->sets = 1 << mmu2->sets; - mmu->ways = 1 << mmu2->ways; - mmu->u_dtlb = mmu2->u_dtlb; - mmu->u_itlb = mmu2->u_itlb; - } else if (mmu->ver == 3) { - mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); - mmu->sets = 1 << mmu3->sets; - mmu->ways = 1 << mmu3->ways; - mmu->u_dtlb = mmu3->u_dtlb; - mmu->u_itlb = mmu3->u_itlb; - mmu->sasid = mmu3->sasid; + if (is_isa_arcompact()) { + if (mmu->ver <= 2) { + mmu2 = (struct bcr_mmu_1_2 *)&tmp; + mmu->pg_sz_k = TO_KB(0x2000); + mmu->sets = 1 << mmu2->sets; + mmu->ways = 1 << mmu2->ways; + mmu->u_dtlb = mmu2->u_dtlb; + mmu->u_itlb = mmu2->u_itlb; + } else { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; + } } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -818,8 +820,9 @@ int pae40_exist_but_not_enab(void) void arc_mmu_init(void) { - char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + char str[256]; + int compat = 0; pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); @@ -834,15 +837,21 @@ void arc_mmu_init(void) */ BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); - /* For efficiency sake, kernel is compile time built for a MMU ver - * This must match the hardware it is running on. - * Linux built for MMU V2, if run on MMU V1 will break down because V1 - * hardware doesn't understand cmds such as WriteNI, or IVUTLB - * On the other hand, Linux built for V1 if run on MMU V2 will do - * un-needed workarounds to prevent memcpy thrashing. - * Similarly MMU V3 has new features which won't work on older MMU + /* + * Ensure that MMU features assumed by kernel exist in hardware. + * For older ARC700 cpus, it has to be exact match, since the MMU + * revisions were not backwards compatible (MMUv3 TLB layout changed + * so even if kernel for v2 didn't use any new cmds of v3, it would + * still not work. + * For HS cpus, MMUv4 was baseline and v5 is backwards compatible + * (will run older software). */ - if (mmu->ver != CONFIG_ARC_MMU_VER) { + if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER) + compat = 1; + else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER) + compat = 1; + + if (!compat) { panic("MMU ver %d doesn't match kernel built for %d...\n", mmu->ver, CONFIG_ARC_MMU_VER); } diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig index c54d1ae57fe0..4e0df7b7a248 100644 --- a/arch/arc/plat-axs10x/Kconfig +++ b/arch/arc/plat-axs10x/Kconfig @@ -14,6 +14,8 @@ menuconfig ARC_PLAT_AXS10X select MIGHT_HAVE_PCI select GENERIC_IRQ_CHIP select GPIOLIB + select AXS101 if ISA_ARCOMPACT + select AXS103 if ISA_ARCV2 help Support for the ARC AXS10x Software Development Platforms. diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index cf14ebc36916..f1ac6790da5f 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -111,13 +111,6 @@ static void __init axs10x_early_init(void) axs10x_enable_gpio_intc_wire(); - /* - * Reset ethernet IP core. - * TODO: get rid of this quirk after axs10x reset driver (or simple - * reset driver) will be available in upstream. - */ - iowrite32((1 << 5), (void __iomem *) CREG_MB_SW_RESET); - scnprintf(mb, 32, "MainBoard v%d", mb_rev); axs10x_print_board_ver(CREG_MB_VER, mb); } diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f24628db5409..e2bd35b6780c 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -23,6 +24,8 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select ARM_GIC + select ARM_GIC_V3 + select ARM_GIC_V3_ITS select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO @@ -36,6 +39,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index f550abd64a25..48de846f2246 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-v3.o +obj-y += $(KVM)/arm/vgic/vgic-v4.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c index b592f79a1742..fa8e7dd4d898 100644 --- a/arch/arm/mach-pxa/cm-x255.c +++ b/arch/arm/mach-pxa/cm-x255.c @@ -14,7 +14,7 @@ #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> #include <linux/mtd/nand-gpio.h> - +#include <linux/gpio/machine.h> #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> @@ -176,6 +176,17 @@ static inline void cmx255_init_nor(void) {} #endif #if defined(CONFIG_MTD_NAND_GPIO) || defined(CONFIG_MTD_NAND_GPIO_MODULE) + +static struct gpiod_lookup_table cmx255_nand_gpiod_table = { + .dev_id = "gpio-nand", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CS, "nce", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CLE, "cle", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_ALE, "ale", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_RB, "rdy", GPIO_ACTIVE_HIGH), + }, +}; + static struct resource cmx255_nand_resource[] = { [0] = { .start = PXA_CS1_PHYS, @@ -198,11 +209,6 @@ static struct mtd_partition cmx255_nand_parts[] = { }; static struct gpio_nand_platdata cmx255_nand_platdata = { - .gpio_nce = GPIO_NAND_CS, - .gpio_cle = GPIO_NAND_CLE, - .gpio_ale = GPIO_NAND_ALE, - .gpio_rdy = GPIO_NAND_RB, - .gpio_nwp = -1, .parts = cmx255_nand_parts, .num_parts = ARRAY_SIZE(cmx255_nand_parts), .chip_delay = 25, @@ -220,6 +226,7 @@ static struct platform_device cmx255_nand = { static void __init cmx255_init_nand(void) { + gpiod_add_lookup_table(&cmx255_nand_gpiod_table); platform_device_register(&cmx255_nand); } #else diff --git a/arch/arm/mach-uniphier/Makefile b/arch/arm/mach-uniphier/Makefile index 6bea3d3a2dd7..e69de29bb2d1 100644 --- a/arch/arm/mach-uniphier/Makefile +++ b/arch/arm/mach-uniphier/Makefile @@ -1 +0,0 @@ -obj- += dummy.o diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13f81f971390..2257dfcc44cc 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -36,6 +37,8 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 861acbbac385..87c4f7ae24de 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile index 09ba7e894bad..d8787c9a499e 100644 --- a/arch/mips/boot/dts/brcm/Makefile +++ b/arch/mips/boot/dts/brcm/Makefile @@ -35,6 +35,3 @@ dtb-$(CONFIG_DT_NONE) += \ bcm97435svmb.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/cavium-octeon/Makefile b/arch/mips/boot/dts/cavium-octeon/Makefile index f5d01b31df50..24a8efcd7b03 100644 --- a/arch/mips/boot/dts/cavium-octeon/Makefile +++ b/arch/mips/boot/dts/cavium-octeon/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_CAVIUM_OCTEON_SOC) += octeon_3xxx.dtb octeon_68xx.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile index 3eb2597a4d6c..441a3c16efb0 100644 --- a/arch/mips/boot/dts/img/Makefile +++ b/arch/mips/boot/dts/img/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += boston.dtb dtb-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb obj-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb.o - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile index 035769269cbc..6a31759839b4 100644 --- a/arch/mips/boot/dts/ingenic/Makefile +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/lantiq/Makefile b/arch/mips/boot/dts/lantiq/Makefile index 00e2e540ed3f..51ab9c1dff42 100644 --- a/arch/mips/boot/dts/lantiq/Makefile +++ b/arch/mips/boot/dts/lantiq/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_DT_EASY50712) += easy50712.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/mti/Makefile b/arch/mips/boot/dts/mti/Makefile index 480af498a9dd..3508720cb6d9 100644 --- a/arch/mips/boot/dts/mti/Makefile +++ b/arch/mips/boot/dts/mti/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_MIPS_MALTA) += malta.dtb dtb-$(CONFIG_LEGACY_BOARD_SEAD3) += sead3.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/netlogic/Makefile b/arch/mips/boot/dts/netlogic/Makefile index 2b99450d7433..d630b27950f0 100644 --- a/arch/mips/boot/dts/netlogic/Makefile +++ b/arch/mips/boot/dts/netlogic/Makefile @@ -6,6 +6,3 @@ dtb-$(CONFIG_DT_XLP_GVP) += xlp_gvp.dtb dtb-$(CONFIG_DT_XLP_RVP) += xlp_rvp.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ni/Makefile b/arch/mips/boot/dts/ni/Makefile index 6cd9c606f025..9e2c9faede47 100644 --- a/arch/mips/boot/dts/ni/Makefile +++ b/arch/mips/boot/dts/ni/Makefile @@ -1,4 +1 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_NI169445) += 169445.dtb - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile index a139a0fbd7b7..ba9bcef8fde9 100644 --- a/arch/mips/boot/dts/pic32/Makefile +++ b/arch/mips/boot/dts/pic32/Makefile @@ -5,6 +5,3 @@ dtb-$(CONFIG_DTB_PIC32_NONE) += \ pic32mzda_sk.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/qca/Makefile b/arch/mips/boot/dts/qca/Makefile index 639adeac90af..4451cf45b0ad 100644 --- a/arch/mips/boot/dts/qca/Makefile +++ b/arch/mips/boot/dts/qca/Makefile @@ -5,6 +5,3 @@ dtb-$(CONFIG_ATH79) += ar9331_dpt_module.dtb dtb-$(CONFIG_ATH79) += ar9331_dragino_ms14.dtb dtb-$(CONFIG_ATH79) += ar9331_omega.dtb dtb-$(CONFIG_ATH79) += ar9331_tl_mr3020.dtb - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ralink/Makefile b/arch/mips/boot/dts/ralink/Makefile index 323c8bcfb602..94bee5b38b53 100644 --- a/arch/mips/boot/dts/ralink/Makefile +++ b/arch/mips/boot/dts/ralink/Makefile @@ -7,6 +7,3 @@ dtb-$(CONFIG_DTB_OMEGA2P) += omega2p.dtb dtb-$(CONFIG_DTB_VOCORE2) += vocore2.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile index 616322405ade..9987e0e378c5 100644 --- a/arch/mips/boot/dts/xilfpga/Makefile +++ b/arch/mips/boot/dts/xilfpga/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 7f74c282710f..fad0e6ff460f 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -21,11 +21,6 @@ #include <asm/opal.h> /* - * For static allocation of some of the structures. - */ -#define IMC_MAX_PMUS 32 - -/* * Compatibility macros for IMC devices */ #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters" @@ -125,4 +120,5 @@ enum { extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); extern void thread_imc_disable(void); +extern int get_max_nest_dev(void); #endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 602e0fde19b4..8bdc2f96c5d6 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -735,8 +735,8 @@ static __init void cpufeatures_cpu_quirks(void) */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; - else if ((version & 0xffffefff) == 0x004e0200) - cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1; + else if ((version & 0xffffefff) == 0x004e0201) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c9de03e0c1f1..d469224c4ada 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,6 +21,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> +#include <asm/setup.h> static int __patch_instruction(unsigned int *addr, unsigned int instr) { @@ -146,11 +147,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * During early early boot patch_instruction is called * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching - * We use slab_is_available and per cpu read * via this_cpu_read - * of text_poke_area. Per-CPU areas might not be up early - * this can create problems with just using this_cpu_read() */ - if (!slab_is_available() || !this_cpu_read(text_poke_area)) + if (!this_cpu_read(*PTRRELOC(&text_poke_area))) return __patch_instruction(addr, instr); local_irq_save(flags); diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 564fff06f5c1..23ec2c5e3b78 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -122,7 +122,8 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) return !slice_area_is_free(mm, start, end - start); } -static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) +static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, + unsigned long high_limit) { unsigned long i; @@ -133,15 +134,16 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) if (!slice_low_has_vma(mm, i)) ret->low_slices |= 1u << i; - if (mm->context.slb_addr_limit <= SLICE_LOW_TOP) + if (high_limit <= SLICE_LOW_TOP) return; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) + for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) if (!slice_high_has_vma(mm, i)) __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret) +static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, + unsigned long high_limit) { unsigned char *hpsizes; int index, mask_index; @@ -156,8 +158,11 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma if (((lpsizes >> (i * 4)) & 0xf) == psize) ret->low_slices |= 1u << i; + if (high_limit <= SLICE_LOW_TOP) + return; + hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { mask_index = i & 0x1; index = i >> 1; if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) @@ -169,6 +174,10 @@ static int slice_check_fit(struct mm_struct *mm, struct slice_mask mask, struct slice_mask available) { DECLARE_BITMAP(result, SLICE_NUM_HIGH); + /* + * Make sure we just do bit compare only to the max + * addr limit and not the full bit map size. + */ unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); bitmap_and(result, mask.high_slices, @@ -472,7 +481,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask); + slice_mask_for_size(mm, psize, &good_mask, high_limit); slice_print_mask(" good_mask", good_mask); /* @@ -497,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, #ifdef CONFIG_PPC_64K_PAGES /* If we support combo pages, we can allow 64k pages in 4k slices */ if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); if (fixed) slice_or_mask(&good_mask, &compat_mask); } @@ -530,11 +539,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, return newaddr; } } - - /* We don't fit in the good mask, check what other slices are + /* + * We don't fit in the good mask, check what other slices are * empty and thus can be converted */ - slice_mask_for_free(mm, &potential_mask); + slice_mask_for_free(mm, &potential_mask, high_limit); slice_or_mask(&potential_mask, &good_mask); slice_print_mask(" potential", potential_mask); @@ -744,17 +753,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, { struct slice_mask mask, available; unsigned int psize = mm->context.user_psize; + unsigned long high_limit = mm->context.slb_addr_limit; if (radix_enabled()) return 0; slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available); + slice_mask_for_size(mm, psize, &available, high_limit); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); slice_or_mask(&available, &compat_mask); } #endif diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 36344117c680..0ead3cd73caa 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -26,7 +26,7 @@ */ static DEFINE_MUTEX(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); -static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; @@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) static void nest_change_cpu_context(int old_cpu, int new_cpu) { struct imc_pmu **pn = per_nest_pmu_arr; - int i; if (old_cpu < 0 || new_cpu < 0) return; - for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) + while (*pn) { perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); + pn++; + } } static int ppc_nest_imc_cpu_offline(unsigned int cpu) @@ -467,7 +468,7 @@ static int nest_imc_event_init(struct perf_event *event) * Nest HW counter memory resides in a per-chip reserve-memory (HOMER). * Get the base memory addresss for this cpu. */ - chip_id = topology_physical_package_id(event->cpu); + chip_id = cpu_to_chip_id(event->cpu); pcni = pmu->mem_info; do { if (pcni->id == chip_id) { @@ -524,19 +525,19 @@ static int nest_imc_event_init(struct perf_event *event) */ static int core_imc_mem_init(int cpu, int size) { - int phys_id, rc = 0, core_id = (cpu / threads_per_core); + int nid, rc = 0, core_id = (cpu / threads_per_core); struct imc_mem_info *mem_info; /* * alloc_pages_node() will allocate memory for core in the * local node only. */ - phys_id = topology_physical_package_id(cpu); + nid = cpu_to_node(cpu); mem_info = &core_imc_pmu->mem_info[core_id]; mem_info->id = core_id; /* We need only vbase for core counters */ - mem_info->vbase = page_address(alloc_pages_node(phys_id, + mem_info->vbase = page_address(alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | __GFP_NOWARN, get_order(size))); if (!mem_info->vbase) @@ -797,14 +798,14 @@ static int core_imc_event_init(struct perf_event *event) static int thread_imc_mem_alloc(int cpu_id, int size) { u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); - int phys_id = topology_physical_package_id(cpu_id); + int nid = cpu_to_node(cpu_id); if (!local_mem) { /* * This case could happen only once at start, since we dont * free the memory in cpu offline path. */ - local_mem = page_address(alloc_pages_node(phys_id, + local_mem = page_address(alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | __GFP_NOWARN, get_order(size))); if (!local_mem) @@ -1194,6 +1195,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); + kfree(per_nest_pmu_arr); return; } @@ -1218,6 +1220,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, return -ENOMEM; /* Needed for hotplug/migration */ + if (!per_nest_pmu_arr) { + per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1, + sizeof(struct imc_pmu *), + GFP_KERNEL); + if (!per_nest_pmu_arr) + return -ENOMEM; + } per_nest_pmu_arr[pmu_index] = pmu_ptr; break; case IMC_DOMAIN_CORE: diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 21f6531fae20..465ea105b771 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void) put_online_cpus(); } +int get_max_nest_dev(void) +{ + struct device_node *node; + u32 pmu_units = 0, type; + + for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(node, "type", &type)) + continue; + + if (type == IMC_TYPE_CHIP) + pmu_units++; + } + + return pmu_units; +} + static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; @@ -191,8 +207,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) break; } - if (!imc_pmu_create(imc_dev, pmu_count, domain)) - pmu_count++; + if (!imc_pmu_create(imc_dev, pmu_count, domain)) { + if (domain == IMC_DOMAIN_NEST) + pmu_count++; + } } return 0; diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index c488621dbec3..aebbe95c9230 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -135,6 +135,7 @@ int chip_to_vas_id(int chipid) } return -1; } +EXPORT_SYMBOL(chip_to_vas_id); static int vas_probe(struct platform_device *pdev) { diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index d9280482a2f8..c68add8df3ae 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -10,7 +10,6 @@ config UML select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES - select GENERIC_IO select GENERIC_CLOCKEVENTS select HAVE_GCC_PLUGINS select TTY # Needed for line.c diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b71daed3cca2..59e13a79c2e3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u32 ecx = msr->index; u64 data = msr->data; switch (ecx) { + case MSR_IA32_CR_PAT: + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm->vmcb->save.g_pat = data; + mark_dirty(svm->vmcb, VMCB_NPT); + break; case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c3522a989d0..714a0673ec3c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); +static bool __read_mostly enable_vnmi = 1; +module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); + static bool __read_mostly flexpriority_enabled = 1; module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); @@ -202,6 +205,10 @@ struct loaded_vmcs { bool nmi_known_unmasked; unsigned long vmcs_host_cr3; /* May not match real cr3 */ unsigned long vmcs_host_cr4; /* May not match real cr4 */ + /* Support for vnmi-less CPUs */ + int soft_vnmi_blocked; + ktime_t entry_time; + s64 vnmi_blocked_time; struct list_head loaded_vmcss_on_cpu_link; }; @@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void) SECONDARY_EXEC_ENABLE_INVPCID; } +static inline bool cpu_has_virtual_nmis(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; +} + static inline bool cpu_has_vmx_wbinvd_exit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) (vmcs12->secondary_vm_exec_control & bit); } -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) -{ - return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; -} - static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) { return vmcs12->pin_based_vm_exec_control & @@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_vmexit_control) < 0) return -EIO; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; - opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | + PIN_BASED_VMX_PREEMPTION_TIMER; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + + if (!enable_vnmi) + pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; + /* Enable the preemption timer dynamically */ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; return pin_based_exec_ctrl; @@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) static void enable_nmi_window(struct kvm_vcpu *vcpu) { - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { + if (!enable_vnmi || + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { enable_irq_window(vcpu); return; } @@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!enable_vnmi) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->loaded_vmcs->soft_vnmi_blocked = 1; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + ++vcpu->stat.nmi_injections; vmx->loaded_vmcs->nmi_known_unmasked = false; @@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); bool masked; + if (!enable_vnmi) + return vmx->loaded_vmcs->soft_vnmi_blocked; if (vmx->loaded_vmcs->nmi_known_unmasked) return false; masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; @@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + if (!enable_vnmi) { + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { + vmx->loaded_vmcs->soft_vnmi_blocked = masked; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + } else { + vmx->loaded_vmcs->nmi_known_unmasked = !masked; + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) if (to_vmx(vcpu)->nested.nested_run_pending) return 0; + if (!enable_vnmi && + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) + return 0; + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | GUEST_INTR_STATE_NMI)); @@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * AAK134, BY25. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { + WARN_ON_ONCE(!enable_vnmi); vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_VIRTUAL_NMI_PENDING); ++vcpu->stat.nmi_window_exits; @@ -6758,6 +6798,9 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_flexpriority()) flexpriority_enabled = 0; + if (!cpu_has_virtual_nmis()) + enable_vnmi = 0; + /* * set_apic_access_page_addr() is used to reload apic access * page upon invalidation. No need to do anything if not @@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) } /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); @@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) * "blocked by NMI" bit has to be set before next VM entry. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) return 0; } + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) { + if (vmx_interrupt_allowed(vcpu)) { + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); @@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); + if (enable_vnmi) { + if (vmx->loaded_vmcs->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + /* + * SDM 3: 27.7.1.2 (September 2008) + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + * SDM 3: 23.2.2 (September 2008) + * Bit 12 is undefined in any of the following cases: + * If the VM exit sets the valid bit in the IDT-vectoring + * information field. + * If the VM exit is due to a double fault. + */ + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && + vector != DF_VECTOR && !idtv_info_valid) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), + vmx->loaded_vmcs->entry_time)); } static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, @@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr, cr3, cr4; + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->entry_time = ktime_get(); + /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ if (vmx->emulation_required) |