diff options
Diffstat (limited to 'arch')
99 files changed, 2892 insertions, 410 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a208bfe367b5..61a0cb15067e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -380,7 +380,7 @@ config ARCH_EP93XX bool "EP93xx-based" select ARCH_HAS_HOLES_MEMORYMODEL select ARM_AMBA - select ARM_PATCH_PHYS_VIRT + imply ARM_PATCH_PHYS_VIRT select ARM_VIC select AUTO_ZRELADDR select CLKDEV_LOOKUP diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 895fa6cfa15a..563901e0ec07 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -75,7 +75,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -87,7 +87,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index a423e8ebfb37..67e72bc72e80 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -301,25 +301,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>, <&vpif_display_pins>; status = "okay"; - - /* VPIF capture port */ - port@0 { - vpif_input_ch0: endpoint@0 { - reg = <0>; - bus-width = <8>; - }; - - vpif_input_ch1: endpoint@1 { - reg = <1>; - bus-width = <8>; - data-shift = <8>; - }; - }; - - /* VPIF display port */ - port@1 { - vpif_output_ch0: endpoint { - bus-width = <8>; - }; - }; }; diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index b837fec70eec..a0f0916156e6 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -318,11 +318,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>; status = "okay"; - - /* VPIF capture port */ - port { - vpif_ch0: endpoint { - bus-width = <8>; - }; - }; }; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index 1865976db5f9..c72a2132aa82 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -68,6 +68,34 @@ DM816X_IOPAD(0x0d08, MUX_MODE0) /* USB1_DRVVBUS */ >; }; + + nandflash_pins: nandflash_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0b38, PULL_UP | MUX_MODE0) /* PINCTRL207 GPMC_CS0*/ + DM816X_IOPAD(0x0b60, PULL_ENA | MUX_MODE0) /* PINCTRL217 GPMC_ADV_ALE */ + DM816X_IOPAD(0x0b54, PULL_UP | PULL_ENA | MUX_MODE0) /* PINCTRL214 GPMC_OE_RE */ + DM816X_IOPAD(0x0b58, PULL_ENA | MUX_MODE0) /* PINCTRL215 GPMC_BE0_CLE */ + DM816X_IOPAD(0x0b50, PULL_UP | MUX_MODE0) /* PINCTRL213 GPMC_WE */ + DM816X_IOPAD(0x0b6c, MUX_MODE0) /* PINCTRL220 GPMC_WAIT */ + DM816X_IOPAD(0x0be4, PULL_ENA | MUX_MODE0) /* PINCTRL250 GPMC_CLK */ + DM816X_IOPAD(0x0ba4, MUX_MODE0) /* PINCTRL234 GPMC_D0 */ + DM816X_IOPAD(0x0ba8, MUX_MODE0) /* PINCTRL234 GPMC_D1 */ + DM816X_IOPAD(0x0bac, MUX_MODE0) /* PINCTRL234 GPMC_D2 */ + DM816X_IOPAD(0x0bb0, MUX_MODE0) /* PINCTRL234 GPMC_D3 */ + DM816X_IOPAD(0x0bb4, MUX_MODE0) /* PINCTRL234 GPMC_D4 */ + DM816X_IOPAD(0x0bb8, MUX_MODE0) /* PINCTRL234 GPMC_D5 */ + DM816X_IOPAD(0x0bbc, MUX_MODE0) /* PINCTRL234 GPMC_D6 */ + DM816X_IOPAD(0x0bc0, MUX_MODE0) /* PINCTRL234 GPMC_D7 */ + DM816X_IOPAD(0x0bc4, MUX_MODE0) /* PINCTRL234 GPMC_D8 */ + DM816X_IOPAD(0x0bc8, MUX_MODE0) /* PINCTRL234 GPMC_D9 */ + DM816X_IOPAD(0x0bcc, MUX_MODE0) /* PINCTRL234 GPMC_D10 */ + DM816X_IOPAD(0x0bd0, MUX_MODE0) /* PINCTRL234 GPMC_D11 */ + DM816X_IOPAD(0x0bd4, MUX_MODE0) /* PINCTRL234 GPMC_D12 */ + DM816X_IOPAD(0x0bd8, MUX_MODE0) /* PINCTRL234 GPMC_D13 */ + DM816X_IOPAD(0x0bdc, MUX_MODE0) /* PINCTRL234 GPMC_D14 */ + DM816X_IOPAD(0x0be0, MUX_MODE0) /* PINCTRL234 GPMC_D15 */ + >; + }; }; &i2c1 { @@ -90,6 +118,8 @@ &gpmc { ranges = <0 0 0x04000000 0x01000000>; /* CS0: 16MB for NAND */ + pinctrl-names = "default"; + pinctrl-0 = <&nandflash_pins>; nand@0,0 { compatible = "ti,omap2-nand"; @@ -98,9 +128,11 @@ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ #address-cells = <1>; #size-cells = <1>; ti,nand-ecc-opt = "bch8"; + ti,elm-id = <&elm>; nand-bus-width = <16>; gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; @@ -164,7 +196,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <4>; cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 59cbf958fcc3..566b2a8c8b96 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -145,7 +145,7 @@ }; elm: elm@48080000 { - compatible = "ti,816-elm"; + compatible = "ti,am3352-elm"; ti,hwmods = "elm"; reg = <0x48080000 0x2000>; interrupts = <4>; diff --git a/arch/arm/boot/dts/dra71-evm.dts b/arch/arm/boot/dts/dra71-evm.dts index 4d57a55473af..a6298eb56978 100644 --- a/arch/arm/boot/dts/dra71-evm.dts +++ b/arch/arm/boot/dts/dra71-evm.dts @@ -190,7 +190,7 @@ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; dp83867_1: ethernet-phy@3 { @@ -198,7 +198,7 @@ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; ti,tx-internal-delay = <DP83867_RGMIIDCTL_250_PS>; ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_8_B_NIB>; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; }; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index 497a9470c888..5739389f5bb8 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -59,6 +59,9 @@ compatible = "samsung,exynos4210-audss-clock"; reg = <0x03810000 0x0C>; #clock-cells = <1>; + clocks = <&clock CLK_FIN_PLL>, <&clock CLK_FOUT_EPLL>, + <&clock CLK_SCLK_AUDIO0>, <&clock CLK_SCLK_AUDIO0>; + clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in"; }; i2s0: i2s@03830000 { diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 2484f11761ea..858e1fed762a 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1126,8 +1126,8 @@ }; }; - gpu: mali@ffa30000 { - compatible = "rockchip,rk3288-mali", "arm,mali-t760", "arm,mali-midgard"; + gpu: gpu@ffa30000 { + compatible = "rockchip,rk3288-mali", "arm,mali-t760"; reg = <0xffa30000 0x10000>; interrupts = <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 8923ba625b76..19a8f4fcfab5 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -44,7 +44,9 @@ #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/clock/sun8i-a83t-ccu.h> #include <dt-bindings/clock/sun8i-r-ccu.h> +#include <dt-bindings/reset/sun8i-a83t-ccu.h> / { interrupt-parent = <&gic>; @@ -175,8 +177,8 @@ compatible = "allwinner,sun8i-a83t-dma"; reg = <0x01c02000 0x1000>; interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu 21>; - resets = <&ccu 7>; + clocks = <&ccu CLK_BUS_DMA>; + resets = <&ccu RST_BUS_DMA>; #dma-cells = <1>; }; @@ -195,7 +197,7 @@ <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; reg = <0x01c20800 0x400>; - clocks = <&ccu 45>, <&osc24M>, <&osc16Md512>; + clocks = <&ccu CLK_BUS_PIO>, <&osc24M>, <&osc16Md512>; clock-names = "apb", "hosc", "losc"; gpio-controller; interrupt-controller; @@ -247,8 +249,8 @@ "allwinner,sun8i-h3-spdif"; reg = <0x01c21000 0x400>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu 44>, <&ccu 76>; - resets = <&ccu 32>; + clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; + resets = <&ccu RST_BUS_SPDIF>; clock-names = "apb", "spdif"; dmas = <&dma 2>; dma-names = "tx"; @@ -263,8 +265,8 @@ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>; reg-shift = <2>; reg-io-width = <4>; - clocks = <&ccu 53>; - resets = <&ccu 40>; + clocks = <&ccu CLK_BUS_UART0>; + resets = <&ccu RST_BUS_UART0>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 6f2162608006..d38282b9e5d4 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -394,7 +394,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun8i-h3-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x104>; + reg = <0x01c30000 0x10000>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; diff --git a/arch/arm/boot/dts/tango4-vantage-1172.dts b/arch/arm/boot/dts/tango4-vantage-1172.dts index 86d8df98802f..13bcc460bcb2 100644 --- a/arch/arm/boot/dts/tango4-vantage-1172.dts +++ b/arch/arm/boot/dts/tango4-vantage-1172.dts @@ -22,7 +22,7 @@ }; ð0 { - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; phy-handle = <ð0_phy>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 3f2eb76243e3..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->fullmm = !(start | (end+1)); @@ -166,8 +167,14 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->range_start = start; + tlb->range_end = end; + } + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index b5625d009288..e568c8c6f69c 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1166,7 +1166,7 @@ static struct tvp514x_platform_data tvp5146_pdata = { #define TVP514X_STD_ALL (V4L2_STD_NTSC | V4L2_STD_PAL) -static const struct vpif_input da850_ch0_inputs[] = { +static struct vpif_input da850_ch0_inputs[] = { { .input = { .index = 0, @@ -1181,7 +1181,7 @@ static const struct vpif_input da850_ch0_inputs[] = { }, }; -static const struct vpif_input da850_ch1_inputs[] = { +static struct vpif_input da850_ch1_inputs[] = { { .input = { .index = 0, diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index f5dce9b4e617..f77a4f766050 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -218,6 +218,15 @@ int clk_set_parent(struct clk *clk, struct clk *parent) } EXPORT_SYMBOL(clk_set_parent); +struct clk *clk_get_parent(struct clk *clk) +{ + if (!clk) + return NULL; + + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + int clk_register(struct clk *clk) { if (clk == NULL || IS_ERR(clk)) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 39ef3b613912..beec5f16443a 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -475,6 +475,26 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + static char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; static char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index 7a0c13bf4269..844e8ac593e2 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -95,8 +95,10 @@ static inline void __indirect_writeb(u8 value, volatile void __iomem *p) } static inline void __indirect_writesb(volatile void __iomem *bus_addr, - const u8 *vaddr, int count) + const void *p, int count) { + const u8 *vaddr = p; + while (count--) writeb(*vaddr++, bus_addr); } @@ -118,8 +120,10 @@ static inline void __indirect_writew(u16 value, volatile void __iomem *p) } static inline void __indirect_writesw(volatile void __iomem *bus_addr, - const u16 *vaddr, int count) + const void *p, int count) { + const u16 *vaddr = p; + while (count--) writew(*vaddr++, bus_addr); } @@ -137,8 +141,9 @@ static inline void __indirect_writel(u32 value, volatile void __iomem *p) } static inline void __indirect_writesl(volatile void __iomem *bus_addr, - const u32 *vaddr, int count) + const void *p, int count) { + const u32 *vaddr = p; while (count--) writel(*vaddr++, bus_addr); } @@ -160,8 +165,10 @@ static inline u8 __indirect_readb(const volatile void __iomem *p) } static inline void __indirect_readsb(const volatile void __iomem *bus_addr, - u8 *vaddr, u32 count) + void *p, u32 count) { + u8 *vaddr = p; + while (count--) *vaddr++ = readb(bus_addr); } @@ -183,8 +190,10 @@ static inline u16 __indirect_readw(const volatile void __iomem *p) } static inline void __indirect_readsw(const volatile void __iomem *bus_addr, - u16 *vaddr, u32 count) + void *p, u32 count) { + u16 *vaddr = p; + while (count--) *vaddr++ = readw(bus_addr); } @@ -204,8 +213,10 @@ static inline u32 __indirect_readl(const volatile void __iomem *p) } static inline void __indirect_readsl(const volatile void __iomem *bus_addr, - u32 *vaddr, u32 count) + void *p, u32 count) { + u32 *vaddr = p; + while (count--) *vaddr++ = readl(bus_addr); } @@ -523,8 +534,15 @@ static inline void iowrite32_rep(void __iomem *addr, const void *vaddr, #endif } -#define ioport_map(port, nr) ((void __iomem*)(port + PIO_OFFSET)) -#define ioport_unmap(addr) +#define ioport_map(port, nr) ioport_map(port, nr) +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return ((void __iomem*)((port) + PIO_OFFSET)); +} +#define ioport_unmap(addr) ioport_unmap(addr) +static inline void ioport_unmap(void __iomem *addr) +{ +} #endif /* CONFIG_PCI */ #endif /* __ASM_ARM_ARCH_IO_H */ diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 3330ac7cfbef..671c7a09ab3d 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -238,7 +238,7 @@ void pxa_usb_phy_deinit(void __iomem *phy_reg) #endif #if IS_ENABLED(CONFIG_USB_SUPPORT) -static u64 usb_dma_mask = ~(u32)0; +static u64 __maybe_unused usb_dma_mask = ~(u32)0; #if IS_ENABLED(CONFIG_USB_MV_UDC) struct resource pxa168_u2o_resources[] = { diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c index e62273aacb43..4ffbbd217e82 100644 --- a/arch/arm/mach-mvebu/platsmp.c +++ b/arch/arm/mach-mvebu/platsmp.c @@ -211,7 +211,7 @@ static int mv98dx3236_resume_set_cpu_boot_addr(int hw_cpu, void *boot_addr) return PTR_ERR(base); writel(0, base + MV98DX3236_CPU_RESUME_CTRL_REG); - writel(virt_to_phys(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); + writel(__pa_symbol(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); iounmap(base); diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 6613a6ff5dbc..6cbc69c92913 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -510,6 +510,7 @@ static void __init ams_delta_init(void) static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) { struct modem_private_data *priv = port->private_data; + int ret; if (IS_ERR(priv->regulator)) return; @@ -518,9 +519,16 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) return; if (state == 0) - regulator_enable(priv->regulator); + ret = regulator_enable(priv->regulator); else if (old == 0) - regulator_disable(priv->regulator); + ret = regulator_disable(priv->regulator); + else + ret = 0; + + if (ret) + dev_warn(port->dev, + "ams_delta modem_pm: failed to %sable regulator: %d\n", + state ? "dis" : "en", ret); } static struct plat_serial8250_port ams_delta_modem_ports[] = { diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 4dfb99504810..95ac1929aede 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -441,13 +441,11 @@ static struct spi_board_info __initdata mistral_boardinfo[] = { { .chip_select = 0, } }; -#ifdef CONFIG_PM static irqreturn_t osk_mistral_wake_interrupt(int irq, void *ignored) { return IRQ_HANDLED; } -#endif static void __init osk_mistral_init(void) { @@ -515,7 +513,6 @@ static void __init osk_mistral_init(void) gpio_direction_input(OMAP_MPUIO(2)); irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING); -#ifdef CONFIG_PM /* share the IRQ in case someone wants to use the * button for more than wakeup from system sleep. */ @@ -529,7 +526,6 @@ static void __init osk_mistral_init(void) ret); } else enable_irq_wake(irq); -#endif } else printk(KERN_ERR "OSK+Mistral: wakeup button is awol\n"); diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index dc9e34e670a2..b1e661bb5521 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -28,7 +28,7 @@ static const struct of_device_id omap_dt_match_table[] __initconst = { { } }; -static void __init omap_generic_init(void) +static void __init __maybe_unused omap_generic_init(void) { pdata_quirks_init(omap_dt_match_table); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 1d739d1a0a65..1cd20e4d56b0 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -410,7 +410,7 @@ static int _set_hwmod_postsetup_state(struct omap_hwmod *oh, void *data) return omap_hwmod_set_postsetup_state(oh, *(u8 *)data); } -static void __init omap_hwmod_init_postsetup(void) +static void __init __maybe_unused omap_hwmod_init_postsetup(void) { u8 postsetup_state; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index d44e0e2f1106..841ba19d64a6 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -486,7 +486,6 @@ int __init omap3_pm_init(void) ret = request_irq(omap_prcm_event_to_irq("io"), _prcm_int_handle_io, IRQF_SHARED | IRQF_NO_SUSPEND, "pm_io", omap3_pm_init); - enable_irq(omap_prcm_event_to_irq("io")); if (ret) { pr_err("pm: Failed to request pm_io irq\n"); diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 382e236fbfd9..64f6451499a7 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -692,7 +692,6 @@ static int omap3xxx_prm_late_init(void) { struct device_node *np; int irq_num; - int ret; if (!(prm_features & PRM_HAS_IO_WAKEUP)) return 0; @@ -712,12 +711,8 @@ static int omap3xxx_prm_late_init(void) } omap3xxx_prm_enable_io_wakeup(); - ret = omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); - if (!ret) - irq_set_status_flags(omap_prcm_event_to_irq("io"), - IRQ_NOAUTOEN); - return ret; + return omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); } static void __exit omap3xxx_prm_exit(void) diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 87e86a4a9ead..3ab5df1ce900 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -337,6 +337,27 @@ static void omap44xx_prm_reconfigure_io_chain(void) } /** + * omap44xx_prm_enable_io_wakeup - enable wakeup events from I/O wakeup latches + * + * Activates the I/O wakeup event latches and allows events logged by + * those latches to signal a wakeup event to the PRCM. For I/O wakeups + * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and + * omap44xx_prm_reconfigure_io_chain() must be called. No return value. + */ +static void __init omap44xx_prm_enable_io_wakeup(void) +{ + s32 inst = omap4_prmst_get_prm_dev_inst(); + + if (inst == PRM_INSTANCE_UNKNOWN) + return; + + omap4_prm_rmw_inst_reg_bits(OMAP4430_GLOBAL_WUEN_MASK, + OMAP4430_GLOBAL_WUEN_MASK, + inst, + omap4_prcm_irq_setup.pm_ctrl); +} + +/** * omap44xx_prm_read_reset_sources - return the last SoC reset source * * Return a u32 representing the last reset sources of the SoC. The @@ -668,6 +689,8 @@ struct pwrdm_ops omap4_pwrdm_operations = { .pwrdm_has_voltdm = omap4_check_vcvp, }; +static int omap44xx_prm_late_init(void); + /* * XXX document */ @@ -675,6 +698,7 @@ static struct prm_ll_data omap44xx_prm_ll_data = { .read_reset_sources = &omap44xx_prm_read_reset_sources, .was_any_context_lost_old = &omap44xx_prm_was_any_context_lost_old, .clear_context_loss_flags_old = &omap44xx_prm_clear_context_loss_flags_old, + .late_init = &omap44xx_prm_late_init, .assert_hardreset = omap4_prminst_assert_hardreset, .deassert_hardreset = omap4_prminst_deassert_hardreset, .is_hardreset_asserted = omap4_prminst_is_hardreset_asserted, @@ -711,6 +735,37 @@ int __init omap44xx_prm_init(const struct omap_prcm_init_data *data) return prm_register(&omap44xx_prm_ll_data); } +static int omap44xx_prm_late_init(void) +{ + int irq_num; + + if (!(prm_features & PRM_HAS_IO_WAKEUP)) + return 0; + + irq_num = of_irq_get(prm_init_data->np, 0); + /* + * Already have OMAP4 IRQ num. For all other platforms, we need + * IRQ numbers from DT + */ + if (irq_num < 0 && !(prm_init_data->flags & PRM_IRQ_DEFAULT)) { + if (irq_num == -EPROBE_DEFER) + return irq_num; + + /* Have nothing to do */ + return 0; + } + + /* Once OMAP4 DT is filled as well */ + if (irq_num >= 0) { + omap4_prcm_irq_setup.irq = irq_num; + omap4_prcm_irq_setup.xlate_irq = NULL; + } + + omap44xx_prm_enable_io_wakeup(); + + return omap_prcm_register_chain_handler(&omap4_prcm_irq_setup); +} + static void __exit omap44xx_prm_exit(void) { prm_unregister(&omap44xx_prm_ll_data); diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index 8cadb302a7d2..ffe05c27087e 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -15,7 +15,7 @@ #include <linux/of_platform.h> #include "common.h" -static void __init sirfsoc_init_late(void) +static void __init __maybe_unused sirfsoc_init_late(void) { sirfsoc_pm_init(); } diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 76fbc115ec33..ce7d97babb0f 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -566,6 +566,7 @@ config MACH_ICONTROL config ARCH_PXA_ESERIES bool "PXA based Toshiba e-series PDAs" select FB_W100 + select FB select PXA25x config MACH_E330 diff --git a/arch/arm/mach-pxa/include/mach/mtd-xip.h b/arch/arm/mach-pxa/include/mach/mtd-xip.h index 990d2bf2fb45..9bf4ea6a6f74 100644 --- a/arch/arm/mach-pxa/include/mach/mtd-xip.h +++ b/arch/arm/mach-pxa/include/mach/mtd-xip.h @@ -17,11 +17,15 @@ #include <mach/regs-ost.h> -#define xip_irqpending() (ICIP & ICMR) +/* restored July 2017, this did not build since 2011! */ + +#define ICIP io_p2v(0x40d00000) +#define ICMR io_p2v(0x40d00004) +#define xip_irqpending() (readl(ICIP) & readl(ICMR)) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl(OSCR) +#define xip_elapsed_since(x) (signed)((readl(OSCR) - (x)) / 4) /* * xip_cpu_idle() is used when waiting for a delay equal or larger than diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h index aa79fa47373a..622d4e5df029 100644 --- a/arch/arm/mach-rpc/include/mach/hardware.h +++ b/arch/arm/mach-rpc/include/mach/hardware.h @@ -25,8 +25,8 @@ * *_SIZE is the size of the region * *_BASE is the virtual address */ -#define RAM_SIZE 0x10000000 -#define RAM_START 0x10000000 +#define RPC_RAM_SIZE 0x10000000 +#define RPC_RAM_START 0x10000000 #define EASI_SIZE 0x08000000 /* EASI I/O */ #define EASI_START 0x08000000 diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index 0db46895c82a..7d52cd97d96e 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -35,6 +35,31 @@ struct clk clk_##_name = { \ static DEFINE_SPINLOCK(clocks_lock); +/* Dummy clk routine to build generic kernel parts that may be using them */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + return clk_get_rate(clk); +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + static void clk_gpio27_enable(struct clk *clk) { /* diff --git a/arch/arm/mach-sa1100/include/mach/mtd-xip.h b/arch/arm/mach-sa1100/include/mach/mtd-xip.h index b3d684098fbf..cb76096a2e36 100644 --- a/arch/arm/mach-sa1100/include/mach/mtd-xip.h +++ b/arch/arm/mach-sa1100/include/mach/mtd-xip.h @@ -20,7 +20,7 @@ #define xip_irqpending() (ICIP & ICMR) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl_relaxed(OSCR) +#define xip_elapsed_since(x) (signed)((readl_relaxed(OSCR) - (x)) / 4) #endif /* __ARCH_SA1100_MTD_XIP_H__ */ diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 73e3adbc1330..44438f344dc8 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -67,8 +67,12 @@ static int regulator_quirk_notify(struct notifier_block *nb, { struct device *dev = data; struct i2c_client *client; + static bool done; u32 mon; + if (done) + return 0; + mon = ioread32(irqc + IRQC_MONITOR); dev_dbg(dev, "%s: %ld, IRQC_MONITOR = 0x%x\n", __func__, action, mon); if (mon & REGULATOR_IRQ_MASK) @@ -99,7 +103,7 @@ static int regulator_quirk_notify(struct notifier_block *nb, remove: dev_info(dev, "IRQ2 is not asserted, removing quirk\n"); - bus_unregister_notifier(&i2c_bus_type, nb); + done = true; iounmap(irqc); return 0; } diff --git a/arch/arm/mach-w90x900/clock.c b/arch/arm/mach-w90x900/clock.c index ac6fd1a2cb59..3f93fac98d97 100644 --- a/arch/arm/mach-w90x900/clock.c +++ b/arch/arm/mach-w90x900/clock.c @@ -93,3 +93,32 @@ void nuc900_subclk_enable(struct clk *clk, int enable) __raw_writel(clken, W90X900_VA_CLKPWR + SUBCLK); } + +/* dummy functions, should not be called */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + WARN_ON(clk); + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 9d00622ce845..bd0f33b77f57 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -452,7 +452,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun50i-a64-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x100>; + reg = <0x01c30000 0x10000>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 35b8c88c3220..738ed689ff69 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -400,7 +400,7 @@ }; pwm_AO_ab: pwm@550 { - compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm"; + compatible = "amlogic,meson-gx-ao-pwm", "amlogic,meson-gxbb-ao-pwm"; reg = <0x0 0x00550 0x0 0x10>; #pwm-cells = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 72c5a9f64ca8..94567eb17875 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -109,8 +109,8 @@ status = "okay"; pinctrl-0 = <&pwm_ao_a_3_pins>, <&pwm_ao_b_pins>; pinctrl-names = "default"; - clocks = <&clkc CLKID_FCLK_DIV4>; - clock-names = "clkin0"; + clocks = <&xtal> , <&xtal>; + clock-names = "clkin0", "clkin1" ; }; &pwm_ef { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 890821d6e52b..266fbcf3e47f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -10,12 +10,20 @@ #include <dt-bindings/input/input.h> -#include "meson-gxl-s905x-p212.dtsi" +#include "meson-gxl-s905x.dtsi" / { compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl"; model = "Libre Technology CC"; + aliases { + serial0 = &uart_AO; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + cvbs-connector { compatible = "composite-video-connector"; @@ -26,6 +34,11 @@ }; }; + emmc_pwrseq: emmc-pwrseq { + compatible = "mmc-pwrseq-emmc"; + reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>; + }; + hdmi-connector { compatible = "hdmi-connector"; type = "a"; @@ -53,6 +66,39 @@ linux,default-trigger = "heartbeat"; }; }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x0 0x0 0x80000000>; + }; + + vcc_3v3: regulator-vcc_3v3 { + compatible = "regulator-fixed"; + regulator-name = "VCC_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + vcc_card: regulator-vcc-card { + compatible = "regulator-gpio"; + + regulator-name = "VCC_CARD"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + + gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_HIGH>; + gpios-states = <0>; + + states = <3300000 0>, + <1800000 1>; + }; + + vddio_boot: regulator-vddio_boot { + compatible = "regulator-fixed"; + regulator-name = "VDDIO_BOOT"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; }; &cvbs_vdac_port { @@ -61,6 +107,16 @@ }; }; +ðmac { + status = "okay"; +}; + +&ir { + status = "okay"; + pinctrl-0 = <&remote_input_ao_pins>; + pinctrl-names = "default"; +}; + &hdmi_tx { status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; @@ -73,20 +129,43 @@ }; }; -/* - * The following devices exists but are exposed on the general - * purpose GPIO header. End user may well decide to use those pins - * for another purpose - */ +/* SD card */ +&sd_emmc_b { + status = "okay"; + pinctrl-0 = <&sdcard_pins>; + pinctrl-names = "default"; + + bus-width = <4>; + cap-sd-highspeed; + max-frequency = <100000000>; + disable-wp; + + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; + cd-inverted; -&sd_emmc_a { - status = "disabled"; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vcc_card>; }; -&uart_A { - status = "disabled"; +/* eMMC */ +&sd_emmc_c { + status = "okay"; + pinctrl-0 = <&emmc_pins>; + pinctrl-names = "default"; + + bus-width = <8>; + cap-mmc-highspeed; + max-frequency = <50000000>; + non-removable; + disable-wp; + + mmc-pwrseq = <&emmc_pwrseq>; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vddio_boot>; }; -&wifi32k { - status = "disabled"; +&uart_AO { + status = "okay"; + pinctrl-0 = <&uart_ao_a_pins>; + pinctrl-names = "default"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index dbcc3d4e2ed5..51763d674050 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -219,7 +219,7 @@ reg = <0x18800 0x100>, <0x18C00 0x20>; gpiosb: gpio { #gpio-cells = <2>; - gpio-ranges = <&pinctrl_sb 0 0 29>; + gpio-ranges = <&pinctrl_sb 0 0 30>; gpio-controller; interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index 726528ce54e9..4c68605675a8 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -270,6 +270,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cpm_clk 1 26>; + dma-coherent; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 95f8e5f607f6..923f354b02f0 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -64,7 +64,7 @@ compatible = "marvell,armada-8k-rtc"; reg = <0x284000 0x20>, <0x284080 0x24>; reg-names = "rtc", "rtc-soc"; - interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <ICU_GRP_NSR 77 IRQ_TYPE_LEVEL_HIGH>; }; cps_ethernet: ethernet@0 { @@ -261,6 +261,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cps_clk 1 26>; + dma-coherent; /* * The cryptographic engine found on the cp110 * master is enabled by default at the SoC diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index aef35e0b685a..a451996f590a 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -508,7 +508,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index b5c6ee07d7f9..d1a3f3b7a0ab 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -281,7 +281,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6c7d147eed54..b4ca115b3be1 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -476,6 +476,7 @@ CONFIG_QCOM_CLK_SMD_RPM=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8994=y CONFIG_MSM_MMCC_8996=y +CONFIG_HWSPINLOCK=y CONFIG_HWSPINLOCK_QCOM=y CONFIG_ARM_MHU=y CONFIG_PLATFORM_MHU=y diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 32f82723338a..ef39dcb9ca6a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -64,8 +64,10 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) << VA_BITS) -#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) +#define VA_START (UL(0xffffffffffffffff) - \ + (UL(1) << VA_BITS) + 1) +#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ + (UL(1) << (VA_BITS - 1)) + 1) #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d48f47080213..8a62648848e5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -523,7 +523,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; - pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0)); + pt_regs_write_reg(regs, rt, arch_timer_get_rate()); regs->pc += 4; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 77862881ae86..2e070d3baf9f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - kvm_pmu_overflow_set(vcpu, p->regval & mask); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c7861c9864e6..2509e4fe6992 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -163,26 +163,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* only preserve the access flags and write permission */ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - /* - * PTE_RDONLY is cleared by default in the asm below, so set it in - * back if necessary (read-only or clean PTE). - */ + /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* * Setting the flags must be done atomically to avoid racing with the - * hardware update of the access/dirty state. + * hardware update of the access/dirty state. The PTE_RDONLY bit must + * be set to the most permissive (lowest value) of *ptep and entry + * (calculated as: a & b == ~(~a | ~b)). */ + pte_val(entry) ^= PTE_RDONLY; asm volatile("// ptep_set_access_flags\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_RDONLY\n" + " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" " orr %0, %0, %4 // set flags\n" + " eor %0, %0, %3 // negate final PTE_RDONLY\n" " stxr %w1, %0, %2\n" " cbnz %w1, 1b\n" : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + : "L" (PTE_RDONLY), "r" (pte_val(entry))); flush_tlb_fix_spurious_fault(vma, address); return 1; diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index fced197b9626..cbe5ac3699bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); @@ -185,8 +186,11 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start * collected. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { + if (force) + tlb->need_flush = 1; /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/arch/mips/include/asm/mach-ralink/ralink_regs.h b/arch/mips/include/asm/mach-ralink/ralink_regs.h index 9df1a53bcb36..b4e7dfa214eb 100644 --- a/arch/mips/include/asm/mach-ralink/ralink_regs.h +++ b/arch/mips/include/asm/mach-ralink/ralink_regs.h @@ -13,6 +13,8 @@ #ifndef _RALINK_REGS_H_ #define _RALINK_REGS_H_ +#include <linux/io.h> + enum ralink_soc_type { RALINK_UNKNOWN = 0, RT2880_SOC, diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c new file mode 100644 index 000000000000..3f87b96da5c4 --- /dev/null +++ b/arch/mips/net/ebpf_jit.c @@ -0,0 +1,1950 @@ +/* + * Just-In-Time compiler for eBPF filters on MIPS + * + * Copyright (c) 2017 Cavium, Inc. + * + * Based on code from: + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/slab.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> +#include <asm/cacheflush.h> +#include <asm/cpu-features.h> +#include <asm/uasm.h> + +/* Registers used by JIT */ +#define MIPS_R_ZERO 0 +#define MIPS_R_AT 1 +#define MIPS_R_V0 2 /* BPF_R0 */ +#define MIPS_R_V1 3 +#define MIPS_R_A0 4 /* BPF_R1 */ +#define MIPS_R_A1 5 /* BPF_R2 */ +#define MIPS_R_A2 6 /* BPF_R3 */ +#define MIPS_R_A3 7 /* BPF_R4 */ +#define MIPS_R_A4 8 /* BPF_R5 */ +#define MIPS_R_T4 12 /* BPF_AX */ +#define MIPS_R_T5 13 +#define MIPS_R_T6 14 +#define MIPS_R_T7 15 +#define MIPS_R_S0 16 /* BPF_R6 */ +#define MIPS_R_S1 17 /* BPF_R7 */ +#define MIPS_R_S2 18 /* BPF_R8 */ +#define MIPS_R_S3 19 /* BPF_R9 */ +#define MIPS_R_S4 20 /* BPF_TCC */ +#define MIPS_R_S5 21 +#define MIPS_R_S6 22 +#define MIPS_R_S7 23 +#define MIPS_R_T8 24 +#define MIPS_R_T9 25 +#define MIPS_R_SP 29 +#define MIPS_R_RA 31 + +/* eBPF flags */ +#define EBPF_SAVE_S0 BIT(0) +#define EBPF_SAVE_S1 BIT(1) +#define EBPF_SAVE_S2 BIT(2) +#define EBPF_SAVE_S3 BIT(3) +#define EBPF_SAVE_S4 BIT(4) +#define EBPF_SAVE_RA BIT(5) +#define EBPF_SEEN_FP BIT(6) +#define EBPF_SEEN_TC BIT(7) +#define EBPF_TCC_IN_V1 BIT(8) + +/* + * For the mips64 ISA, we need to track the value range or type for + * each JIT register. The BPF machine requires zero extended 32-bit + * values, but the mips64 ISA requires sign extended 32-bit values. + * At each point in the BPF program we track the state of every + * register so that we can zero extend or sign extend as the BPF + * semantics require. + */ +enum reg_val_type { + /* uninitialized */ + REG_UNKNOWN, + /* not known to be 32-bit compatible. */ + REG_64BIT, + /* 32-bit compatible, no truncation needed for 64-bit ops. */ + REG_64BIT_32BIT, + /* 32-bit compatible, need truncation for 64-bit ops. */ + REG_32BIT, + /* 32-bit zero extended. */ + REG_32BIT_ZERO_EX, + /* 32-bit no sign/zero extension needed. */ + REG_32BIT_POS +}; + +/* + * high bit of offsets indicates if long branch conversion done at + * this insn. + */ +#define OFFSETS_B_CONV BIT(31) + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @stack_size: eBPF stack size + * @tmp_offset: eBPF $sp offset to 8-byte temporary memory + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + * @reg_val_types Packed enum reg_val_type for each register. + */ +struct jit_ctx { + const struct bpf_prog *skf; + int stack_size; + int tmp_offset; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; + u64 *reg_val_types; + unsigned int long_b_conversion:1; + unsigned int gen_b_offsets:1; +}; + +static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type) +{ + *rvt &= ~(7ull << (reg * 3)); + *rvt |= ((u64)type << (reg * 3)); +} + +static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, + int index, int reg) +{ + return (ctx->reg_val_types[index] >> (reg * 3)) & 7; +} + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +static unsigned int j_target(struct jit_ctx *ctx, int target_idx) +{ + unsigned long target_va, base_va; + unsigned int r; + + if (!ctx->target) + return 0; + + base_va = (unsigned long)ctx->target; + target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV); + + if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful)) + return (unsigned int)-1; + r = target_va & 0x0ffffffful; + return r; +} + +/* Compute the immediate value for PC-relative branches. */ +static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (!ctx->gen_b_offsets) + return 0; + + /* + * We want a pc-relative branch. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) - + (ctx->idx * 4) - 4; +} + +int bpf_jit_enable __read_mostly; + +enum which_ebpf_reg { + src_reg, + src_reg_no_fp, + dst_reg, + dst_reg_fp_ok +}; + +/* + * For eBPF, the register mapping naturally falls out of the + * requirements of eBPF and the MIPS n64 ABI. We don't maintain a + * separate frame pointer, so BPF_REG_10 relative accesses are + * adjusted to be $sp relative. + */ +int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn, + enum which_ebpf_reg w) +{ + int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? + insn->src_reg : insn->dst_reg; + + switch (ebpf_reg) { + case BPF_REG_0: + return MIPS_R_V0; + case BPF_REG_1: + return MIPS_R_A0; + case BPF_REG_2: + return MIPS_R_A1; + case BPF_REG_3: + return MIPS_R_A2; + case BPF_REG_4: + return MIPS_R_A3; + case BPF_REG_5: + return MIPS_R_A4; + case BPF_REG_6: + ctx->flags |= EBPF_SAVE_S0; + return MIPS_R_S0; + case BPF_REG_7: + ctx->flags |= EBPF_SAVE_S1; + return MIPS_R_S1; + case BPF_REG_8: + ctx->flags |= EBPF_SAVE_S2; + return MIPS_R_S2; + case BPF_REG_9: + ctx->flags |= EBPF_SAVE_S3; + return MIPS_R_S3; + case BPF_REG_10: + if (w == dst_reg || w == src_reg_no_fp) + goto bad_reg; + ctx->flags |= EBPF_SEEN_FP; + /* + * Needs special handling, return something that + * cannot be clobbered just in case. + */ + return MIPS_R_ZERO; + case BPF_REG_AX: + return MIPS_R_T4; + default: +bad_reg: + WARN(1, "Illegal bpf reg: %d\n", ebpf_reg); + return -EINVAL; + } +} +/* + * eBPF stack frame will be something like: + * + * Entry $sp ------> +--------------------------------+ + * | $ra (optional) | + * +--------------------------------+ + * | $s0 (optional) | + * +--------------------------------+ + * | $s1 (optional) | + * +--------------------------------+ + * | $s2 (optional) | + * +--------------------------------+ + * | $s3 (optional) | + * +--------------------------------+ + * | $s4 (optional) | + * +--------------------------------+ + * | tmp-storage (if $ra saved) | + * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10 + * | BPF_REG_10 relative storage | + * | MAX_BPF_STACK (optional) | + * | . | + * | . | + * | . | + * $sp --------> +--------------------------------+ + * + * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized + * area is not allocated. + */ +static int gen_int_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0; + int store_offset; + int locals_size; + + if (ctx->flags & EBPF_SAVE_RA) + /* + * If RA we are doing a function call and may need + * extra 8-byte tmp area. + */ + stack_adjust += 16; + if (ctx->flags & EBPF_SAVE_S0) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S1) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S2) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S3) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S4) + stack_adjust += 8; + + BUILD_BUG_ON(MAX_BPF_STACK & 7); + locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; + + stack_adjust += locals_size; + ctx->tmp_offset = locals_size; + + ctx->stack_size = stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in $v1 from the caller. + */ + emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust); + else + return 0; + + store_offset = stack_adjust - 8; + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + + if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) + emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); + + return 0; +} + +static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) +{ + const struct bpf_prog *prog = ctx->skf; + int stack_adjust = ctx->stack_size; + int store_offset = stack_adjust - 8; + int r0 = MIPS_R_V0; + + if (dest_reg == MIPS_R_RA && + get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX) + /* Don't let zero extended value escape. */ + emit_instr(ctx, sll, r0, r0, 0); + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + emit_instr(ctx, jr, dest_reg); + + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust); + else + emit_instr(ctx, nop); + + return 0; +} + +static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, + struct jit_ctx *ctx) +{ + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm); + } else { + int lower = (s16)(insn->imm & 0xffff); + int upper = insn->imm - lower; + + emit_instr(ctx, lui, reg, upper >> 16); + emit_instr(ctx, addiu, reg, reg, lower); + } + +} + +static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int idx) +{ + int upper_bound, lower_bound; + int dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + + if (dst < 0) + return dst; + + switch (BPF_OP(insn->code)) { + case BPF_MOV: + case BPF_ADD: + upper_bound = S16_MAX; + lower_bound = S16_MIN; + break; + case BPF_SUB: + upper_bound = -(int)S16_MIN; + lower_bound = -(int)S16_MAX; + break; + case BPF_AND: + case BPF_OR: + case BPF_XOR: + upper_bound = 0xffff; + lower_bound = 0; + break; + case BPF_RSH: + case BPF_LSH: + case BPF_ARSH: + /* Shift amounts are truncated, no need for bounds */ + upper_bound = S32_MAX; + lower_bound = S32_MIN; + break; + default: + return -EINVAL; + } + + /* + * Immediate move clobbers the register, so no sign/zero + * extension needed. + */ + if (BPF_CLASS(insn->code) == BPF_ALU64 && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + /* BPF_ALU | BPF_LSH doesn't need separate sign extension */ + if (BPF_CLASS(insn->code) == BPF_ALU && + BPF_OP(insn->code) != BPF_LSH && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT) + emit_instr(ctx, sll, dst, dst, 0); + + if (insn->imm >= lower_bound && insn->imm <= upper_bound) { + /* single insn immediate case */ + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_MOV: + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, andi, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, ori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddiu, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, daddiu, dst, dst, -insn->imm); + break; + case BPF_ALU64 | BPF_RSH: + emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_RSH: + emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_LSH: + emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_LSH: + emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_ARSH: + emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_ARSH: + emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU | BPF_MOV: + emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addiu, dst, dst, insn->imm); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, addiu, dst, dst, -insn->imm); + break; + default: + return -EINVAL; + } + } else { + /* multi insn immediate case */ + if (BPF_OP(insn->code) == BPF_MOV) { + gen_imm_to_reg(insn, dst, ctx); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, and, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, or, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xor, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, subu, dst, dst, MIPS_R_AT); + break; + default: + return -EINVAL; + } + } + } + + return 0; +} + +static void * __must_check +ool_skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return skb_header_pointer(skb, offset, len, buffer); +} + +static int size_to_len(const struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + return 1; + case BPF_H: + return 2; + case BPF_W: + return 4; + case BPF_DW: + return 8; + } + return 0; +} + +static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) +{ + if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value); + } else if (value >= 0xffffffff80000000ull || + (value < 0x80000000 && value > 0xffff)) { + emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16)); + emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff)); + } else { + int i; + bool seen_part = false; + int needed_shift = 0; + + for (i = 0; i < 4; i++) { + u64 part = (value >> (16 * (3 - i))) & 0xffff; + + if (seen_part && needed_shift > 0 && (part || i == 3)) { + emit_instr(ctx, dsll_safe, dst, dst, needed_shift); + needed_shift = 0; + } + if (part) { + if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) { + emit_instr(ctx, lui, dst, (s32)(s16)part); + needed_shift = -16; + } else { + emit_instr(ctx, ori, dst, + seen_part ? dst : MIPS_R_ZERO, + (unsigned int)part); + } + seen_part = true; + } + if (seen_part) + needed_shift += 16; + } + } +} + +static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) +{ + int off, b_off; + + ctx->flags |= EBPF_SEEN_TC; + /* + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1); + emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* + * if (--TCC < 0) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, daddiu, MIPS_R_T5, + (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bltz, MIPS_R_T5, b_off); + /* + * prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1); + off = offsetof(struct bpf_array, ptrs); + emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* Delay slot */ + emit_instr(ctx, nop); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT); + /* All systems are go... propagate TCC */ + emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO); + /* Skip first instruction (TCC initialization) */ + emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4); + return build_int_epilogue(ctx, MIPS_R_T9); +} + +static bool use_bbit_insns(void) +{ + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return true; + default: + return false; + } +} + +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + +/* Returns the number of insn slots consumed. */ +static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int this_idx, int exit_idx) +{ + int src, dst, r, td, ts, mem_off, b_off; + bool need_swap, did_move, cmp_eq; + unsigned int target; + u64 t64; + s64 t64s; + + switch (insn->code) { + case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */ + r = gen_imm_insn(insn, ctx, this_idx); + if (r < 0) + return r; + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, dmultu, MIPS_R_AT, dst); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, multu, dst, MIPS_R_AT); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, divu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, ddivu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + did_move = false; + if (insn->src_reg == BPF_REG_10) { + if (BPF_OP(insn->code) == BPF_MOV) { + emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK); + did_move = true; + } else { + emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK); + src = MIPS_R_AT; + } + } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, daddu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, dmultu, dst, src); + emit_instr(ctx, mflo, dst); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, ddivu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, dsllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, dsrlv, dst, dst, src); + break; + case BPF_ARSH: + emit_instr(ctx, dsrav, dst, dst, src); + break; + default: + pr_err("ALU64_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + did_move = false; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + /* sign extend */ + emit_instr(ctx, sll, tmp_reg, src, 0); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, addu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, addu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, subu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, mul, dst, dst, src); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, divu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, sllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, srlv, dst, dst, src); + break; + default: + pr_err("ALU_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_JMP | BPF_EXIT: + if (this_idx + 1 < exit_idx) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + if (insn->imm == 0) { + src = MIPS_R_ZERO; + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + src = MIPS_R_AT; + } + goto jeq_common; + case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */ + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (td == REG_32BIT && ts != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } else if (ts == REG_32BIT && td != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, dst, 0); + dst = MIPS_R_AT; + } + if (BPF_OP(insn->code) == BPF_JSET) { + emit_instr(ctx, and, MIPS_R_AT, dst, src); + cmp_eq = false; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JSGT) { + emit_instr(ctx, dsubu, MIPS_R_AT, dst, src); + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, blez, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bgtz, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + break; + } else if (BPF_OP(insn->code) == BPF_JSGE) { + emit_instr(ctx, slt, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGT) { + /* dst or src could be AT */ + emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + /* SP known to be non-zero, movz becomes boolean not */ + emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8); + emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGE) { + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else { /* JNE/JEQ case */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + } +jeq_common: + /* + * If the next insn is EXIT and we are jumping arround + * only it, invert the sense of the compare and + * conditionally jump to the exit. Poor man's branch + * chaining. + */ + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, bne, dst, src, b_off); + else + emit_instr(ctx, beq, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, beq, dst, src, b_off); + else + emit_instr(ctx, bne, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JSGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (insn->imm == 0) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bltz, dst, b_off); + else + emit_instr(ctx, blez, dst, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bgez, dst, b_off); + else + emit_instr(ctx, bgtz, dst, b_off); + emit_instr(ctx, nop); + break; + } + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = insn->imm + (cmp_eq ? 0 : 1); + if (t64s >= S16_MIN && t64s <= S16_MAX) { + emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + cmp_eq = (BPF_OP(insn->code) == BPF_JGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1); + if (t64s >= 0 && t64s <= S16_MAX) { + emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + break; + } + t64 = (u32)insn->imm; + emit_const_to_reg(ctx, MIPS_R_AT, t64); + emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = false; + goto jeq_common; + + case BPF_JMP | BPF_JA: + /* + * Prefer relative branch for easier debugging, but + * fall back if needed. + */ + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + emit_instr(ctx, j, target); + } else { + emit_instr(ctx, b, b_off); + } + emit_instr(ctx, nop); + break; + case BPF_LD | BPF_DW | BPF_IMM: + if (insn->src_reg != 0) + return -EINVAL; + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32); + emit_const_to_reg(ctx, dst, t64); + return 2; /* Double slot insn */ + + case BPF_JMP | BPF_CALL: + ctx->flags |= EBPF_SAVE_RA; + t64s = (s64)insn->imm + (s64)__bpf_call_base; + emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot */ + emit_instr(ctx, nop); + break; + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(ctx, this_idx)) + return -EINVAL; + break; + + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_DW | BPF_ABS: + ctx->flags |= EBPF_SAVE_RA; + + gen_imm_to_reg(insn, MIPS_R_A1, ctx); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + + if (insn->imm < 0) { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); + } else { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + } + goto ld_skb_common; + + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_DW | BPF_IND: + ctx->flags |= EBPF_SAVE_RA; + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, MIPS_R_A1, src, 0); + src = MIPS_R_A1; + } + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); + } + /* truncate to 32-bit int */ + emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); + + emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); + +ld_skb_common: + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot move */ + emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); + + /* Check the error value */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); + emit_instr(ctx, nop); + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } else { + emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + +#ifdef __BIG_ENDIAN + need_swap = false; +#else + need_swap = true; +#endif + dst = MIPS_R_V0; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + break; + case BPF_W: + emit_instr(ctx, lw, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + break; + case BPF_DW: + emit_instr(ctx, ld, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + break; + } + + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (insn->imm == 64 && td == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm != 64 && + (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + +#ifdef __BIG_ENDIAN + need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE); +#else + need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE); +#endif + if (insn->imm == 16) { + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, andi, dst, dst, 0xffff); + } else if (insn->imm == 32) { + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + } else { /* 64-bit*/ + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + } + break; + + case BPF_ST | BPF_B | BPF_MEM: + case BPF_ST | BPF_H | BPF_MEM: + case BPF_ST | BPF_W | BPF_MEM: + case BPF_ST | BPF_DW | BPF_MEM: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst); + break; + case BPF_DW: + emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst); + break; + } + break; + + case BPF_LDX | BPF_B | BPF_MEM: + case BPF_LDX | BPF_H | BPF_MEM: + case BPF_LDX | BPF_W | BPF_MEM: + case BPF_LDX | BPF_DW | BPF_MEM: + if (insn->src_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + src = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + mem_off = insn->off; + } + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, mem_off, src); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, mem_off, src); + break; + case BPF_W: + emit_instr(ctx, lw, dst, mem_off, src); + break; + case BPF_DW: + emit_instr(ctx, ld, dst, mem_off, src); + break; + } + break; + + case BPF_STX | BPF_B | BPF_MEM: + case BPF_STX | BPF_H | BPF_MEM: + case BPF_STX | BPF_W | BPF_MEM: + case BPF_STX | BPF_DW | BPF_MEM: + case BPF_STX | BPF_W | BPF_XADD: + case BPF_STX | BPF_DW | BPF_XADD: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return dst; + if (BPF_MODE(insn->code) == BPF_XADD) { + switch (BPF_SIZE(insn->code)) { + case BPF_W: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } + emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst); + /* + * On failure back up to LL (-4 + * instructions of 4 bytes each + */ + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + } + } else { /* BPF_MEM */ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, src, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, src, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, src, mem_off, dst); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, sd, src, mem_off, dst); + break; + } + } + break; + + default: + pr_err("NOT HANDLED %d - (%02x)\n", + this_idx, (unsigned int)insn->code); + return -EINVAL; + } + return 1; +} + +#define RVT_VISITED_MASK 0xc000000000000000ull +#define RVT_FALL_THROUGH 0x4000000000000000ull +#define RVT_BRANCH_TAKEN 0x8000000000000000ull +#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN) + +static int build_int_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + int i, r; + + for (i = 0; i < prog->len; ) { + insn = prog->insnsi + i; + if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) { + /* dead instruction, don't emit it. */ + i++; + continue; + } + + if (ctx->target == NULL) + ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4); + + r = build_one_insn(insn, ctx, i, prog->len); + if (r < 0) + return r; + i += r; + } + /* epilogue offset */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + /* + * All exits have an offset of the epilogue, some offsets may + * not have been set due to banch-around threading, so set + * them now. + */ + if (ctx->target == NULL) + for (i = 0; i < prog->len; i++) { + insn = prog->insnsi + i; + if (insn->code == (BPF_JMP | BPF_EXIT)) + ctx->offsets[i] = ctx->idx * 4; + } + return 0; +} + +/* return the last idx processed, or negative for error */ +static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt, + int start_idx, bool follow_taken) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + u64 exit_rvt = initial_rvt; + u64 *rvt = ctx->reg_val_types; + int idx; + int reg; + + for (idx = start_idx; idx < prog->len; idx++) { + rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt; + insn = prog->insnsi + idx; + switch (BPF_CLASS(insn->code)) { + case BPF_ALU: + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_MUL: + case BPF_DIV: + case BPF_OR: + case BPF_AND: + case BPF_LSH: + case BPF_RSH: + case BPF_NEG: + case BPF_MOD: + case BPF_XOR: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + case BPF_MOV: + if (BPF_SRC(insn->code)) { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } + break; + case BPF_END: + if (insn->imm == 64) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + else if (insn->imm == 32) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + else /* insn->imm == 16 */ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_ALU64: + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (BPF_SRC(insn->code)) { + /* REG to REG move*/ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + } + break; + default: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LD: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + if (BPF_MODE(insn->code) == BPF_IMM) { + s64 val; + + val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32)); + if (val > 0 && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else if (val >= S32_MIN && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + rvt[idx] |= RVT_DONE; + idx++; + } else { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + if (BPF_MODE(insn->code) == BPF_IMM) + set_reg_val_type(&exit_rvt, insn->dst_reg, + insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LDX: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_JMP: + switch (BPF_OP(insn->code)) { + case BPF_EXIT: + rvt[idx] = RVT_DONE | exit_rvt; + rvt[prog->len] = exit_rvt; + return idx; + case BPF_JA: + rvt[idx] |= RVT_DONE; + idx += insn->off; + break; + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + case BPF_JNE: + case BPF_JSGT: + case BPF_JSGE: + if (follow_taken) { + rvt[idx] |= RVT_BRANCH_TAKEN; + idx += insn->off; + follow_taken = false; + } else { + rvt[idx] |= RVT_FALL_THROUGH; + } + break; + case BPF_CALL: + set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT); + /* Upon call return, argument registers are clobbered. */ + for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + rvt[idx] |= RVT_DONE; + break; + default: + WARN(1, "Unhandled BPF_JMP case.\n"); + rvt[idx] |= RVT_DONE; + break; + } + break; + default: + rvt[idx] |= RVT_DONE; + break; + } + } + return idx; +} + +/* + * Track the value range (i.e. 32-bit vs. 64-bit) of each register at + * each eBPF insn. This allows unneeded sign and zero extension + * operations to be omitted. + * + * Doesn't handle yet confluence of control paths with conflicting + * ranges, but it is good enough for most sane code. + */ +static int reg_val_propagate(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + u64 exit_rvt; + int reg; + int i; + + /* + * 11 registers * 3 bits/reg leaves top bits free for other + * uses. Bit-62..63 used to see if we have visited an insn. + */ + exit_rvt = 0; + + /* Upon entry, argument registers are 64-bit. */ + for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + /* + * First follow all conditional branches on the fall-through + * edge of control flow.. + */ + reg_val_propagate_range(ctx, exit_rvt, 0, false); +restart_search: + /* + * Then repeatedly find the first conditional branch where + * both edges of control flow have not been taken, and follow + * the branch taken edge. We will end up restarting the + * search once per conditional branch insn. + */ + for (i = 0; i < prog->len; i++) { + u64 rvt = ctx->reg_val_types[i]; + + if ((rvt & RVT_VISITED_MASK) == RVT_DONE || + (rvt & RVT_VISITED_MASK) == 0) + continue; + if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) { + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true); + } else { /* RVT_BRANCH_TAKEN */ + WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n"); + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false); + } + goto restart_search; + } + /* + * Eventually all conditional branches have been followed on + * both branches and we are done. Any insn that has not been + * visited at this point is dead. + */ + + return 0; +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *p; + + /* We are guaranteed to have aligned memory. */ + for (p = area; size >= sizeof(u32); size -= sizeof(u32)) + uasm_i_break(&p, BRK_BUG); /* Increments p */ +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *orig_prog = prog; + bool tmp_blinded = false; + struct bpf_prog *tmp; + struct bpf_binary_header *header = NULL; + struct jit_ctx ctx; + unsigned int image_size; + u8 *image_ptr; + + if (!bpf_jit_enable || !cpu_has_mips64r2) + return prog; + + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + goto out_err; + + ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL); + if (ctx.reg_val_types == NULL) + goto out_err; + + ctx.skf = prog; + + if (reg_val_propagate(&ctx)) + goto out_err; + + /* + * First pass discovers used resources and instruction offsets + * assuming short branches are used. + */ + if (build_int_body(&ctx)) + goto out_err; + + /* + * If no calls are made (EBPF_SAVE_RA), then tail call count + * in $v1, else we must save in n$s4. + */ + if (ctx.flags & EBPF_SEEN_TC) { + if (ctx.flags & EBPF_SAVE_RA) + ctx.flags |= EBPF_SAVE_S4; + else + ctx.flags |= EBPF_TCC_IN_V1; + } + + /* + * Second pass generates offsets, if any branches are out of + * range a jump-around long sequence is generated, and we have + * to try again from the beginning to generate the new + * offsets. This is done until no additional conversions are + * necessary. + */ + do { + ctx.idx = 0; + ctx.gen_b_offsets = 1; + ctx.long_b_conversion = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + } while (ctx.long_b_conversion); + + image_size = 4 * ctx.idx; + + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) + goto out_err; + + ctx.target = (u32 *)image_ptr; + + /* Third pass generates the code */ + ctx.idx = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + + /* Update the icache */ + flush_icache_range((unsigned long)ctx.target, + (unsigned long)(ctx.target + ctx.idx * sizeof(u32))); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(prog->len, image_size, 2, ctx.target); + + bpf_jit_binary_lock_ro(header); + prog->bpf_func = (void *)ctx.target; + prog->jited = 1; + prog->jited_len = image_size; +out_normal: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + kfree(ctx.offsets); + kfree(ctx.reg_val_types); + + return prog; + +out_err: + prog = orig_prog; + if (header) + bpf_jit_binary_free(header); + goto out_normal; +} diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 094a0ee4af46..9be8b08ae46b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/bug.h> #include <asm/mipsregs.h> #include <asm/mach-ralink/ralink_regs.h> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 531da9eb8f43..dda1f558ef35 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -47,6 +47,9 @@ config PARISC and later HP3000 series). The PA-RISC Linux project home page is at <http://www.parisc-linux.org/>. +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 88fe0aad4390..bc208136bbb2 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -34,7 +34,7 @@ struct thread_info { /* thread information allocation */ -#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 85a92db70afc..19c0c141bc3f 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -587,13 +587,12 @@ void flush_cache_range(struct vm_area_struct *vma, if (parisc_requires_coherency()) flush_tlb_range(vma, start, end); - if ((end - start) >= parisc_cache_flush_threshold) { + if ((end - start) >= parisc_cache_flush_threshold + || vma->vm_mm->context != mfsp(3)) { flush_cache_all(); return; } - BUG_ON(vma->vm_mm->context != mfsp(3)); - flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 5404e4086cb9..0ca254085a66 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -380,7 +380,7 @@ static inline int eirr_to_irq(unsigned long eirr) /* * IRQ STACK - used for irq handler */ -#define IRQ_STACK_SIZE (4096 << 2) /* 16k irq stack size */ +#define IRQ_STACK_SIZE (4096 << 3) /* 32k irq stack size */ union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7814a7b1523..6f952fe1f084 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -isystem $(shell $(CROSS32CC) -print-file-name=include) \ -D$(compress-y) +BOOTCC := $(CC) ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 +else +BOOTCFLAGS += -m32 +ifdef CROSS32_COMPILE + BOOTCC := $(CROSS32_COMPILE)gcc +endif endif + +BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) + ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian else @@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ - cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< quiet_cmd_bootas = BOOTAS $@ - cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 0695ce047d56..34fc9bbfca9e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5175028c56ce..c5246d29f385 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1a61aa20dfba..fd5d98a0b95c 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..818a58fc3f4f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -608,9 +608,17 @@ static inline pte_t pte_mkdevmap(pte_t pte) return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); } +/* + * This is potentially called with a pmd as the argument, in which case it's not + * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set. + * That's because the bit we use for _PAGE_DEVMAP is not reserved for software + * use in page directory entries (ie. non-ptes). + */ static inline int pte_devmap(pte_t pte) { - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP)); + u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); + + return (pte_raw(pte) & mask) == mask; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 49d8422767b4..e925c1c99c71 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,17 +223,27 @@ system_call_exit: andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- .Lsyscall_exit_work - /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ - li r7,MSR_FP + andi. r0,r8,MSR_FP + beq 2f #ifdef CONFIG_ALTIVEC - oris r7,r7,MSR_VEC@h + andis. r0,r8,MSR_VEC@h + bne 3f #endif - and r0,r8,r7 - cmpd r0,r7 - bne .Lsyscall_restore_math -.Lsyscall_restore_math_cont: +2: addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI + mtmsrd r10,1 /* Restore RI */ +#endif + bl restore_math +#ifdef CONFIG_PPC_BOOK3S + li r11,0 + mtmsrd r11,1 +#endif + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO - cmpld r3,r11 +3: cmpld r3,r11 ld r5,_CCR(r1) bge- .Lsyscall_error .Lsyscall_error_cont: @@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r5,_CCR(r1) b .Lsyscall_error_cont -.Lsyscall_restore_math: - /* - * Some initial tests from restore_math to avoid the heavyweight - * C code entry and MSR manipulations. - */ - LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK) - and. r0,r0,r8 - bne 1f - - ld r7,PACACURRENT(r13) - lbz r0,THREAD+THREAD_LOAD_FP(r7) -#ifdef CONFIG_ALTIVEC - lbz r6,THREAD+THREAD_LOAD_VEC(r7) - add r0,r0,r6 -#endif - cmpdi r0,0 - beq .Lsyscall_restore_math_cont - -1: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S - li r10,MSR_RI - mtmsrd r10,1 /* Restore RI */ -#endif - bl restore_math -#ifdef CONFIG_PPC_BOOK3S - li r11,0 - mtmsrd r11,1 -#endif - /* Restore volatiles, reload MSR from updated one */ - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - b .Lsyscall_restore_math_cont - /* Traced system call support */ .Lsyscall_dotrace: bl save_nvgprs diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9029afd1fa2a..f14f3c04ec7e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100) std r10,PACA_EXGEN+EX_R13(r13); \ EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) +/* + * Branch to soft_nmi_interrupt using the emergency stack. The emergency + * stack is one that is usable by maskable interrupts so long as MSR_EE + * remains off. It is used for recovery when something has corrupted the + * normal kernel stack, for example. The "soft NMI" must not use the process + * stack because we want irq disabled sections to avoid touching the stack + * at all (other than PMU interrupts), so use the emergency stack for this, + * and run it entirely with interrupts hard disabled. + */ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) - ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, system_reset, soft_nmi_interrupt, diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..e6252c5a57a4 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -460,11 +460,17 @@ pnv_restore_hyp_resource_arch300: /* * Workaround for POWER9, if we lost resources, the ERAT * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). + * to reload MMCR0 (see comment above). We also need to set + * then clear bit 60 in MMCRA to ensure the PMU starts running. */ blt cr3,1f PPC_INVALIDATE_ERAT ld r1,PACAR1(r13) + mfspr r4,SPRN_MMCRA + ori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 + xori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 ld r4,_MMCR0(r1) mtspr SPRN_MMCR0,r4 1: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0bcec745a672..f291f7826abc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void) /* Clear bit 0 which we wouldn't clear otherwise */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) { + local_paca->irq_happened |= PACA_IRQ_DEC; + happened |= PACA_IRQ_DEC; + } + } + } /* * Force the delivery of pending soft-disabled interrupts on PS3. @@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) + if (happened & PACA_IRQ_DEC) return 0x900; /* Finally check if an external interrupt happened */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..ec480966f9bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - /* - * Syscall exit makes a similar initial check before branching - * to restore_math. Keep them in synch. - */ if (!msr_tm_active(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 925a4ef90559..660ed39e9c9a 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * If task is not current, it will have been flushed already to * it's thread_struct during __switch_to(). * - * A reclaim flushes ALL the state. + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. */ - if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(TM_CAUSE_SIGNAL); + if (tsk != current) + return; + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&(tsk->thread)); + } } #else static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 997c88d54acf..8d3320562c70 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) hard_irq_disable(); while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) static void nmi_ipi_lock(void) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); } static void nmi_ipi_unlock(void) @@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_lock_start(&flags); while (nmi_ipi_busy_count) { nmi_ipi_unlock_end(&flags); - cpu_relax(); + spin_until_cond(nmi_ipi_busy_count == 0); nmi_ipi_lock_start(&flags); } @@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static __init long smp_setup_cpu_workfn(void *data __always_unused) -{ - smp_ops->setup_cpu(boot_cpuid); - return 0; -} - void __init smp_cpus_done(unsigned int max_cpus) { /* - * We want the setup_cpu() here to be called on the boot CPU, but - * init might run on any CPU, so make sure it's invoked on the boot - * CPU. + * We are running pinned to the boot CPU, see rest_init(). */ if (smp_ops && smp_ops->setup_cpu) - work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); + smp_ops->setup_cpu(boot_cpuid); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..34721a257a77 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) * This may be called from low level interrupt handlers at some * point in future. */ - local_irq_save(*flags); - while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) - cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); /* Make it soft-NMI safe */ + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) { + raw_local_irq_restore(*flags); + spin_until_cond(!test_bit(0, &__wd_smp_lock)); + raw_local_irq_save(*flags); + hard_irq_disable(); + } } static inline void wd_smp_unlock(unsigned long *flags) { clear_bit_unlock(0, &__wd_smp_lock); - local_irq_restore(*flags); + raw_local_irq_restore(*flags); } static void wd_lockup_ipi(struct pt_regs *regs) @@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) nmi_panic(regs, "Hard LOCKUP"); } -static void set_cpu_stuck(int cpu, u64 tb) +static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { - cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); - cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); + cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) &wd_smp_cpus_stuck); } } +static void set_cpu_stuck(int cpu, u64 tb) +{ + set_cpumask_stuck(cpumask_of(cpu), tb); +} static void watchdog_smp_panic(int cpu, u64 tb) { @@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) } smp_flush_nmi_ipi(1000000); - /* Take the stuck CPU out of the watch group */ - for_each_cpu(c, &wd_smp_cpus_pending) - set_cpu_stuck(c, tb); + /* Take the stuck CPUs out of the watch group */ + set_cpumask_stuck(&wd_smp_cpus_pending, tb); -out: wd_smp_unlock(&flags); printk_safe_flush(); @@ -152,6 +159,11 @@ out: if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + + return; + +out: + wd_smp_unlock(&flags); } static void wd_smp_clear_cpu_pending(int cpu, u64 tb) @@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data) void arch_touch_nmi_watchdog(void) { + unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - watchdog_timer_interrupt(cpu); + if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) + watchdog_timer_interrupt(cpu); } EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); return 0; @@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu) if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; + wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); if (cpumask_weight(&wd_cpus_enabled) == 1) { cpumask_set_cpu(cpu, &wd_smp_cpus_pending); wd_smp_last_reset_tb = get_tb(); } - smp_wmb(); + wd_smp_unlock(&flags); + start_watchdog_timer_on(cpu); return 0; @@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; /* Can happen in CPU unplug case */ stop_watchdog_timer_on(cpu); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_unlock(&flags); + wd_smp_clear_cpu_pending(cpu, get_tb()); return 0; diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index d7c9b186954d..763ffca9628d 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto err; ret = of_irq_to_resource(np, 0, &res[1]); - if (!ret) + if (ret <= 0) goto err; pdev = platform_device_alloc("mpc83xx_spi", i); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..a553aeea7af6 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; */ static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; static int pnv_save_sprs_for_deep_states(void) @@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) update_subcore_sibling_mask(); - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + int rc = pnv_save_sprs_for_deep_states(); + + if (likely(!rc)) + return; + + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = + pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = + pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } + } } u32 pnv_get_supported_cpuidle_states(void) @@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) pnv_deepest_stop_psscr_val; srr1 = power9_idle_stop(psscr); - } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { + } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && + (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { srr1 = power7_idle_insn(PNV_THREAD_WINKLE); } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { @@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, max_residency_ns = residency_ns[i]; pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i]; + pnv_deepest_stop_flag = flags[i]; deepest_stop_found = true; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 437613588df1..b900eb1d5e17 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1852,6 +1852,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) /* 4GB offset bypasses 32-bit space */ set_dma_offset(&pdev->dev, (1ULL << 32)); set_dma_ops(&pdev->dev, &dma_direct_ops); + } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) { + /* + * Fail the request if a DMA mask between 32 and 64 bits + * was requested but couldn't be fulfilled. Ideally we + * would do this for 64-bits but historically we have + * always fallen back to 32-bits. + */ + return -ENOMEM; } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 7317b3108a88..2eb8ff0d6fca 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -47,10 +47,9 @@ struct mmu_table_batch { extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_gather_mmu(struct mmu_gather *tlb, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline void +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -76,9 +75,15 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -static inline void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + } + tlb_flush_mmu(tlb); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 01c6fbc3e85b..1803797fc885 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1253,7 +1253,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 46e0d635e36f..51a8bc967e75 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -47,9 +48,10 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { - if (tlb->fullmm) + if (tlb->fullmm || force) flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index c74d3701ad68..207a43a2d8b3 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 @@ -23,7 +22,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -69,7 +67,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_PROC_KCORE=y @@ -82,7 +79,6 @@ CONFIG_NLS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_CRYPTO_NULL=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index b2e650d1764f..ca8609d7292f 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -1,5 +1,4 @@ CONFIG_64BIT=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -184,7 +183,6 @@ CONFIG_HID_TOPSEED=y CONFIG_HID_THRUSTMASTER=y CONFIG_HID_ZEROPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_TT_NEWSCHED is not set CONFIG_USB_OHCI_HCD=y @@ -210,8 +208,6 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENTS=y CONFIG_KEYS=y diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 2cddcda4f85f..87841d687f8d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[MM_TSB_BASE], @@ -40,9 +42,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[MM_TSB_BASE])); + , __pa(&mm->context.tsb_descr[MM_TSB_BASE]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 1d8321c827a8..1b1286d05069 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -47,10 +47,26 @@ #define SUN4V_CHIP_NIAGARA5 0x05 #define SUN4V_CHIP_SPARC_M6 0x06 #define SUN4V_CHIP_SPARC_M7 0x07 +#define SUN4V_CHIP_SPARC_M8 0x08 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff +/* + * The following CPU_ID_xxx constants are used + * to identify the CPU type in the setup phase + * (see head_64.S) + */ +#define CPU_ID_NIAGARA1 ('1') +#define CPU_ID_NIAGARA2 ('2') +#define CPU_ID_NIAGARA3 ('3') +#define CPU_ID_NIAGARA4 ('4') +#define CPU_ID_NIAGARA5 ('5') +#define CPU_ID_M6 ('6') +#define CPU_ID_M7 ('7') +#define CPU_ID_M8 ('8') +#define CPU_ID_SONOMA1 ('N') + #ifndef __ASSEMBLY__ enum ultra_tlb_layout { diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 493e023a468a..ef4f18f7a674 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "sparc-m7"; break; + case SUN4V_CHIP_SPARC_M8: + sparc_cpu_type = "SPARC-M8"; + sparc_fpu_type = "SPARC-M8 integrated FPU"; + sparc_pmu_type = "sparc-m8"; + break; + case SUN4V_CHIP_SPARC_SN: sparc_cpu_type = "SPARC-SN"; sparc_fpu_type = "SPARC-SN integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 45c820e1cba5..90d550bbfeef 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC_M6: case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 41a407328667..78e0211753d2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -424,22 +424,25 @@ EXPORT_SYMBOL(sun4v_chip_type) nop 70: ldub [%g1 + 7], %g2 - cmp %g2, '3' + cmp %g2, CPU_ID_NIAGARA3 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA3, %g4 - cmp %g2, '4' + cmp %g2, CPU_ID_NIAGARA4 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA4, %g4 - cmp %g2, '5' + cmp %g2, CPU_ID_NIAGARA5 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 - cmp %g2, '6' + cmp %g2, CPU_ID_M6 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M6, %g4 - cmp %g2, '7' + cmp %g2, CPU_ID_M7 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 - cmp %g2, 'N' + cmp %g2, CPU_ID_M8 + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M8, %g4 + cmp %g2, CPU_ID_SONOMA1 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_SN, %g4 ba,pt %xcc, 49f @@ -448,10 +451,10 @@ EXPORT_SYMBOL(sun4v_chip_type) 91: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 ldub [%g1 + 17], %g2 - cmp %g2, '1' + cmp %g2, CPU_ID_NIAGARA1 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA1, %g4 - cmp %g2, '2' + cmp %g2, CPU_ID_NIAGARA2 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA2, %g4 @@ -602,6 +605,9 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_SPARC_M7 be,pt %xcc, niagara4_patch nop + cmp %g1, SUN4V_CHIP_SPARC_M8 + be,pt %xcc, niagara4_patch + nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch nop diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 4d9c3e13c150..150ee7d4b059 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -288,10 +288,17 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); - if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || - sun4v_chip_type == SUN4V_CHIP_SPARC_SN) + + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: + case SUN4V_CHIP_SPARC_SN: sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, &__sun_m7_2insn_patch_end); + break; + default: + break; + } sun4v_hvapi_init(); } @@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; @@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; @@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | @@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 07c0df924960..db872dbfafe9 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -360,6 +360,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -372,6 +373,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3c40ebd50f92..afa0099f3748 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde } #ifdef CONFIG_HUGETLB_PAGE +static void __init add_huge_page_size(unsigned long size) +{ + unsigned int order; + + if (size_to_hstate(size)) + return; + + order = ilog2(size) - PAGE_SHIFT; + hugetlb_add_hstate(order); +} + +static int __init hugetlbpage_init(void) +{ + add_huge_page_size(1UL << HPAGE_64K_SHIFT); + add_huge_page_size(1UL << HPAGE_SHIFT); + add_huge_page_size(1UL << HPAGE_256MB_SHIFT); + add_huge_page_size(1UL << HPAGE_2GB_SHIFT); + + return 0; +} + +arch_initcall(hugetlbpage_init); + static int __init setup_hugepagesz(char *string) { unsigned long long hugepage_size; @@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char *string) goto out; } - hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT); + add_huge_page_size(hugepage_size); rc = 1; out: @@ -1921,12 +1944,22 @@ static void __init setup_page_offset(void) break; case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC_SN: - default: /* M7 and later support 52-bit virtual addresses. */ sparc64_va_hole_top = 0xfff8000000000000UL; sparc64_va_hole_bottom = 0x0008000000000000UL; max_phys_bits = 49; break; + case SUN4V_CHIP_SPARC_M8: + default: + /* M8 and later support 54-bit virtual addresses. + * However, restricting M8 and above VA bits to 53 + * as 4-level page table cannot support more than + * 53 VA bits. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + max_phys_bits = 51; + break; } } @@ -2138,6 +2171,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: pagecv_flag = 0x00; break; @@ -2290,6 +2324,7 @@ void __init paging_init(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: page_cache4v_flag = _PAGE_CP_4V; break; diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 600a2e9bfee2..344d95619d03 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -80,13 +81,19 @@ tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/* tlb_finish_mmu +/* arch_tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources * that were required. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + tlb->need_flush = 1; + } tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index d869c8671e36..7cf7c70b6ef2 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -8,20 +8,25 @@ * This file is licensed under GPLv2. */ -#include <linux/jiffies.h> +#include <linux/delay.h> +#include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> #include <linux/smp.h> struct aperfmperf_sample { unsigned int khz; - unsigned long jiffies; + ktime_t time; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU(struct aperfmperf_sample, samples); +#define APERFMPERF_CACHE_THRESHOLD_MS 10 +#define APERFMPERF_REFRESH_DELAY_MS 20 +#define APERFMPERF_STALE_THRESHOLD_MS 1000 + /* * aperfmperf_snapshot_khz() * On the current CPU, snapshot APERF, MPERF, and jiffies @@ -33,9 +38,11 @@ static void aperfmperf_snapshot_khz(void *dummy) u64 aperf, aperf_delta; u64 mperf, mperf_delta; struct aperfmperf_sample *s = this_cpu_ptr(&samples); + ktime_t now = ktime_get(); + s64 time_delta = ktime_ms_delta(now, s->time); - /* Don't bother re-computing within 10 ms */ - if (time_before(jiffies, s->jiffies + HZ/100)) + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; rdmsrl(MSR_IA32_APERF, aperf); @@ -51,22 +58,21 @@ static void aperfmperf_snapshot_khz(void *dummy) if (mperf_delta == 0) return; - /* - * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then - * khz = (cpu_khz * aperf_delta) / mperf_delta - */ - if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); - else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ - s->khz = div64_u64(aperf_delta, - div64_u64(mperf_delta, cpu_khz)); - s->jiffies = jiffies; + s->time = now; s->aperf = aperf; s->mperf = mperf; + + /* If the previous iteration was too long ago, discard it. */ + if (time_delta > APERFMPERF_STALE_THRESHOLD_MS) + s->khz = 0; + else + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); } unsigned int arch_freq_get_on_cpu(int cpu) { + unsigned int khz; + if (!cpu_khz) return 0; @@ -74,6 +80,12 @@ unsigned int arch_freq_get_on_cpu(int cpu) return 0; smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + khz = per_cpu(samples.khz, cpu); + if (khz) + return khz; + + msleep(APERFMPERF_REFRESH_DELAY_MS); + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); return per_cpu(samples.khz, cpu); } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 16f82a3aaec7..8ce4212e2b8d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -345,21 +345,10 @@ static int hpet_shutdown(struct clock_event_device *evt, int timer) return 0; } -static int hpet_resume(struct clock_event_device *evt, int timer) -{ - if (!timer) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_hardirq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } +static int hpet_resume(struct clock_event_device *evt) +{ + hpet_enable_legacy_int(); hpet_print_config(); - return 0; } @@ -417,7 +406,7 @@ static int hpet_legacy_set_periodic(struct clock_event_device *evt) static int hpet_legacy_resume(struct clock_event_device *evt) { - return hpet_resume(evt, 0); + return hpet_resume(evt); } static int hpet_legacy_next_event(unsigned long delta, @@ -510,8 +499,14 @@ static int hpet_msi_set_periodic(struct clock_event_device *evt) static int hpet_msi_resume(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + struct irq_data *data = irq_get_irq_data(hdev->irq); + struct msi_msg msg; - return hpet_resume(evt, hdev->num); + /* Restore the MSI msg and unmask the interrupt */ + irq_chip_compose_msi_msg(data, &msg); + hpet_msi_write(hdev, &msg); + hpet_msi_unmask(data); + return 0; } static int hpet_msi_next_event(unsigned long delta, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 71c17a5be983..d04e30e3c0ff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_swait(&n.wq, &wait); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4d8141e533c3..1107626938cc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = error_code; + + /* + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be + * written only when inject_pending_event runs (DR6 would written here + * too). This should be conditional on a new capability---if the + * capability is disabled, kvm_multiple_exception would write the + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + */ if (svm->vcpu.arch.exception.nested_apf) svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 39a6222bf968..9b21b1223035 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -416,13 +416,10 @@ struct nested_vmx { /* The guest-physical address of the current VMCS L1 keeps for L2 */ gpa_t current_vmptr; - /* The host-usable pointer to the above */ - struct page *current_vmcs12_page; - struct vmcs12 *current_vmcs12; /* * Cache of the guest's VMCS, existing outside of guest memory. * Loaded from guest memory during VMPTRLD. Flushed to guest - * memory during VMXOFF, VMCLEAR, VMPTRLD. + * memory during VMCLEAR and VMPTRLD. */ struct vmcs12 *cached_vmcs12; /* @@ -927,6 +924,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); +static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, + u16 error_code); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2428,6 +2429,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, + unsigned long exit_qual) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned int nr = vcpu->arch.exception.nr; + u32 intr_info = nr | INTR_INFO_VALID_MASK; + + if (vcpu->arch.exception.has_error_code) { + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } + + if (kvm_exception_is_soft(nr)) + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && + vmx_get_nmi_mask(vcpu)) + intr_info |= INTR_INFO_UNBLOCK_NMI; + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); +} + /* * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. @@ -2437,23 +2462,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); unsigned int nr = vcpu->arch.exception.nr; - if (!((vmcs12->exception_bitmap & (1u << nr)) || - (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) - return 0; + if (nr == PF_VECTOR) { + if (vcpu->arch.exception.nested_apf) { + nested_vmx_inject_exception_vmexit(vcpu, + vcpu->arch.apf.nested_apf_token); + return 1; + } + /* + * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 + * can be written only when inject_pending_event runs. This should be + * conditional on a new capability---if the capability is disabled, + * kvm_multiple_exception would write the ancillary information to + * CR2 or DR6, for backwards ABI-compatibility. + */ + if (nested_vmx_is_page_fault_vmexit(vmcs12, + vcpu->arch.exception.error_code)) { + nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); + return 1; + } + } else { + unsigned long exit_qual = 0; + if (nr == DB_VECTOR) + exit_qual = vcpu->arch.dr6; - if (vcpu->arch.exception.nested_apf) { - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, - vcpu->arch.apf.nested_apf_token); - return 1; + if (vmcs12->exception_bitmap & (1u << nr)) { + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + return 1; + } } - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; + return 0; } static void vmx_queue_exception(struct kvm_vcpu *vcpu) @@ -2667,7 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_exit_handled() will not pass related exits to L1. + * nested_vmx_exit_reflected() will not pass related exits to L1. * These rules have exceptions below. */ @@ -4955,6 +4995,28 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4962,18 +5024,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4985,6 +5044,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, @@ -7134,34 +7195,32 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } +static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) +{ + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmcs_write64(VMCS_LINK_POINTER, -1ull); +} + static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { if (vmx->nested.current_vmptr == -1ull) return; - /* current_vmptr and current_vmcs12 are always set/reset together */ - if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) - return; - if (enable_shadow_vmcs) { /* copy to memory all shadowed fields in case they were modified */ copy_shadow_to_vmcs12(vmx); vmx->nested.sync_shadow_vmcs = false; - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_SHADOW_VMCS); - vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; /* Flush VMCS12 to guest memory */ - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12, - VMCS12_SIZE); + kvm_vcpu_write_guest_page(&vmx->vcpu, + vmx->nested.current_vmptr >> PAGE_SHIFT, + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; } /* @@ -7175,12 +7234,14 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); - nested_release_vmcs12(vmx); + vmx->nested.posted_intr_nv = -1; + vmx->nested.current_vmptr = -1ull; if (vmx->nested.msr_bitmap) { free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } if (enable_shadow_vmcs) { + vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); vmx->vmcs01.shadow_vmcs = NULL; @@ -7579,14 +7640,14 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) } nested_release_vmcs12(vmx); - vmx->nested.current_vmcs12 = new_vmcs12; - vmx->nested.current_vmcs12_page = page; /* * Load VMCS12 from guest memory since it is not already * cached. */ - memcpy(vmx->nested.cached_vmcs12, - vmx->nested.current_vmcs12, VMCS12_SIZE); + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); + kunmap(page); + nested_release_page_clean(page); + set_current_vmptr(vmx, vmptr); } @@ -8019,12 +8080,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ -static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) +static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_reason = vmx->exit_reason; trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, vmcs_readl(EXIT_QUALIFICATION), @@ -8033,6 +8093,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU + * may write to these pages via their host physical address while + * L2 is running, bypassing any address-translation-based dirty + * tracking (e.g. EPT write protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; @@ -8169,6 +8241,29 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) } } +static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * At this point, the exit interruption info in exit_intr_info + * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT + * we need to query the in-kernel LAPIC. + */ + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); + if ((exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { *info1 = vmcs_readl(EXIT_QUALIFICATION); @@ -8415,12 +8510,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu, exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; - } + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) + return nested_vmx_reflect_vmexit(vcpu, exit_reason); if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { dump_vmcs(); @@ -9223,7 +9314,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; @@ -9509,12 +9599,15 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - else + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { + vmcs12->vm_exit_intr_error_code = fault->error_code; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | + INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, + fault->address); + } else { kvm_inject_page_fault(vcpu, fault); + } } static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, @@ -10094,12 +10187,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when * !enable_ept, EB.PF is 1, so the "or" will always be 1. - * - * A problem with this approach (when !enable_ept) is that L1 may be - * injected with more page faults than it asked for. This could have - * caused problems, but in practice existing hypervisors don't care. - * To fix this, we will need to emulate the PFEC checking (on the L1 - * page tables), using walk_addr(), when injecting PFs to L1. */ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, enable_ept ? vmcs12->page_fault_error_code_mask : 0); @@ -10847,13 +10934,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vm_exit_reason = exit_reason; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - if ((vmcs12->vm_exit_intr_info & - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) - vmcs12->vm_exit_intr_error_code = - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); @@ -11049,8 +11131,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); - if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - && nested_exit_intr_ack_set(vcpu)) { + /* + * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of + * the VM-exit interrupt information (valid interrupt) is always set to + * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need + * kvm_cpu_has_interrupt(). See the commit message for details. + */ + if (nested_exit_intr_ack_set(vcpu) && + exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + kvm_cpu_has_interrupt(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c97c82814c4..d734aa8c5b4f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3159,15 +3159,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_set_hflags(vcpu, hflags); vcpu->arch.smi_pending = events->smi.pending; - if (events->smi.smm_inside_nmi) - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; - else - vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (lapic_in_kernel(vcpu)) { - if (events->smi.latched_init) - set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + + if (events->smi.smm) { + if (events->smi.smm_inside_nmi) + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else - clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; + if (lapic_in_kernel(vcpu)) { + if (events->smi.latched_init) + set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + else + clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + } } } @@ -6215,6 +6218,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.shorthand = 0; lapic_irq.dest_mode = 0; + lapic_irq.level = 0; lapic_irq.dest_id = apicid; lapic_irq.msi_redir_hint = false; diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 2d716ebc5a5e..dff7cc39437c 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += bug.h generic-y += clkdev.h +generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h generic-y += emergency-restart.h @@ -17,6 +18,7 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h +generic-y += param.h generic-y += percpu.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h deleted file mode 100644 index 1deeb8ebbb1b..000000000000 --- a/arch/xtensa/include/asm/device.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_XTENSA_DEVICE_H -#define _ASM_XTENSA_DEVICE_H - -struct dev_archdata { -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_XTENSA_DEVICE_H */ diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h deleted file mode 100644 index 0a70e780ef2a..000000000000 --- a/arch/xtensa/include/asm/param.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/param.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ -#ifndef _XTENSA_PARAM_H -#define _XTENSA_PARAM_H - -#include <uapi/asm/param.h> - -# define HZ CONFIG_HZ /* internal timer frequency */ -# define USER_HZ 100 /* for user interfaces in "ticks" */ -# define CLOCKS_PER_SEC (USER_HZ) /* frequnzy at which times() counts */ -#endif /* _XTENSA_PARAM_H */ diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d159e9b9c018..672391003e40 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) } EXPORT_SYMBOL(__sync_fetch_and_or_4); -#ifdef CONFIG_NET /* * Networking support */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -#endif /* CONFIG_NET */ /* * Architecture-specific symbols diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 1a804a2f9a5b..3c75c4e597da 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) clear_page_alias(kvaddr, paddr); preempt_enable(); } +EXPORT_SYMBOL(clear_user_highpage); void copy_user_highpage(struct page *dst, struct page *src, unsigned long vaddr, struct vm_area_struct *vma) @@ -119,10 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src, copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr); preempt_enable(); } - -#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ - -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +EXPORT_SYMBOL(copy_user_highpage); /* * Any time the kernel writes to a user page cache page, or it is about to @@ -176,7 +174,7 @@ void flush_dcache_page(struct page *page) /* There shouldn't be an entry in the cache for this page anymore. */ } - +EXPORT_SYMBOL(flush_dcache_page); /* * For now, flush the whole cache. FIXME?? @@ -188,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma, __flush_invalidate_dcache_all(); __invalidate_icache_all(); } +EXPORT_SYMBOL(local_flush_cache_range); /* * Remove any entry in the cache for this page. @@ -207,8 +206,9 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); } +EXPORT_SYMBOL(local_flush_cache_page); -#endif +#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ void update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) @@ -225,7 +225,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) flush_tlb_page(vma, addr); -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { unsigned long phys = page_to_phys(page); @@ -256,7 +256,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) * flush_dcache_page() on the page. */ -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, |