diff options
445 files changed, 12347 insertions, 10098 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index c75e5d6b8fa8..a6eb7dcd4dd5 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \ kernel-api.xml filesystems.xml lsm.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - 80211.xml sh.xml regulator.xml w1.xml \ + sh.xml regulator.xml w1.xml \ writing_musb_glue_layer.xml iio.xml ifeq ($(DOCBOOKS),) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 21e2d8863705..be7c0d9506b1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -106,6 +106,16 @@ use by PCI Format: <irq>,<irq>... + acpi_mask_gpe= [HW,ACPI] + Due to the existence of _Lxx/_Exx, some GPEs triggered + by unsupported hardware/firmware features can result in + GPE floodings that cannot be automatically disabled by + the GPE dispatcher. + This facility can be used to prevent such uncontrolled + GPE floodings. + Format: <int> + Support masking of GPEs numbered from 0x00 to 0x7f. + acpi_no_auto_serialize [HW,ACPI] Disable auto-serialization of AML methods AML control methods that contain the opcodes to create @@ -3811,10 +3821,11 @@ it if 0 is given (See Documentation/cgroup-v1/memory.txt) swiotlb= [ARM,IA-64,PPC,MIPS,X86] - Format: { <int> | force } + Format: { <int> | force | noforce } <int> -- Number of I/O TLB slabs force -- force using of bounce buffers even if they wouldn't be automatically used by the kernel + noforce -- Never use bounce buffers (for debugging) switches= [HW,M68k] diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 51642159aedb..c0a3bb5a6e4e 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -54,9 +54,9 @@ This is the hardware sector size of the device, in bytes. io_poll (RW) ------------ -When read, this file shows the total number of block IO polls and how -many returned success. Writing '0' to this file will disable polling -for this device. Writing any non-zero value will enable this feature. +When read, this file shows whether polling is enabled (1) or disabled +(0). Writing '0' to this file will disable polling for this device. +Writing any non-zero value will enable this feature. io_poll_delay (RW) ------------------ diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt new file mode 100644 index 000000000000..c204725e5873 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt @@ -0,0 +1,27 @@ +MediaTek cryptographic accelerators + +Required properties: +- compatible: Should be "mediatek,eip97-crypto" +- reg: Address and length of the register set for the device +- interrupts: Should contain the five crypto engines interrupts in numeric + order. These are global system and four descriptor rings. +- clocks: the clock used by the core +- clock-names: the names of the clock listed in the clocks property. These are + "ethif", "cryp" +- power-domains: Must contain a reference to the PM domain. + + +Example: + crypto: crypto@1b240000 { + compatible = "mediatek,eip97-crypto"; + reg = <0 0x1b240000 0 0x20000>; + interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_ETHIF_SEL>, + <ðsys CLK_ETHSYS_CRYPTO>; + clock-names = "ethif","cryp"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>; + }; diff --git a/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt index 3e5b9793341f..8682ab6d4a50 100644 --- a/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt +++ b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt @@ -8,8 +8,9 @@ This driver provides a simple power button event via an Interrupt. Required properties: - compatible: should be "ti,tps65217-pwrbutton" or "ti,tps65218-pwrbutton" -Required properties for TPS65218: +Required properties: - interrupts: should be one of the following + - <2>: For controllers compatible with tps65217 - <3 IRQ_TYPE_EDGE_BOTH>: For controllers compatible with tps65218 Examples: @@ -17,6 +18,7 @@ Examples: &tps { tps65217-pwrbutton { compatible = "ti,tps65217-pwrbutton"; + interrupts = <2>; }; }; diff --git a/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt b/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt index 98d131acee95..a11072c5a866 100644 --- a/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt +++ b/Documentation/devicetree/bindings/power/supply/tps65217_charger.txt @@ -2,11 +2,16 @@ TPS65217 Charger Required Properties: -compatible: "ti,tps65217-charger" +-interrupts: TPS65217 interrupt numbers for the AC and USB charger input change. + Should be <0> for the USB charger and <1> for the AC adapter. +-interrupt-names: Should be "USB" and "AC" This node is a subnode of the tps65217 PMIC. Example: tps65217-charger { - compatible = "ti,tps65090-charger"; + compatible = "ti,tps65217-charger"; + interrupts = <0>, <1>; + interrupt-names = "USB", "AC"; }; diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index 0bb0b5fc9512..6d9ff316b608 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -55,21 +55,6 @@ Device Drivers DMA Management .. kernel-doc:: drivers/base/dma-mapping.c :export: -Device Drivers Power Management -------------------------------- - -.. kernel-doc:: drivers/base/power/main.c - :export: - -Device Drivers ACPI Support ---------------------------- - -.. kernel-doc:: drivers/acpi/scan.c - :export: - -.. kernel-doc:: drivers/acpi/scan.c - :internal: - Device drivers PnP support -------------------------- diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 9ed15f86c17c..15d8d16934fd 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -5,8 +5,8 @@ platform_labels - INTEGER possible to configure forwarding for label values equal to or greater than the number of platform labels. - A dense utliziation of the entries in the platform label table - is possible and expected aas the platform labels are locally + A dense utilization of the entries in the platform label table + is possible and expected as the platform labels are locally allocated. If the number of platform label table entries is set to 0 no diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt index a445da098bc6..3f76c0c37920 100644 --- a/Documentation/unaligned-memory-access.txt +++ b/Documentation/unaligned-memory-access.txt @@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2) #else const u16 *a = (const u16 *)addr1; const u16 *b = (const u16 *)addr2; - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index b38afec35edc..d226c7a5ba8b 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -127,22 +127,22 @@ the VFIO when devices are unbound from the driver. Physical Device Driver Interface -------------------------------- -The physical device driver interface provides the parent_ops[3] structure to -define the APIs to manage work in the mediated core driver that is related to -the physical device. +The physical device driver interface provides the mdev_parent_ops[3] structure +to define the APIs to manage work in the mediated core driver that is related +to the physical device. -The structures in the parent_ops structure are as follows: +The structures in the mdev_parent_ops structure are as follows: * dev_attr_groups: attributes of the parent device * mdev_attr_groups: attributes of the mediated device * supported_config: attributes to define supported configurations -The functions in the parent_ops structure are as follows: +The functions in the mdev_parent_ops structure are as follows: * create: allocate basic resources in a driver for a mediated device * remove: free resources in a driver when a mediated device is destroyed -The callbacks in the parent_ops structure are as follows: +The callbacks in the mdev_parent_ops structure are as follows: * open: open callback of mediated device * close: close callback of mediated device @@ -151,14 +151,14 @@ The callbacks in the parent_ops structure are as follows: * write: write emulation callback * mmap: mmap emulation callback -A driver should use the parent_ops structure in the function call to register -itself with the mdev core driver: +A driver should use the mdev_parent_ops structure in the function call to +register itself with the mdev core driver: extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); -However, the parent_ops structure is not required in the function call that a -driver should use to unregister itself with the mdev core driver: +However, the mdev_parent_ops structure is not required in the function call +that a driver should use to unregister itself with the mdev core driver: extern void mdev_unregister_device(struct device *dev); @@ -223,6 +223,9 @@ Directories and files under the sysfs for Each Physical Device sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name); + (or using mdev_parent_dev(mdev) to arrive at the parent device outside + of the core mdev code) + * device_api This attribute should show which device API is being created, for example, @@ -394,5 +397,5 @@ References [1] See Documentation/vfio.txt for more information on VFIO. [2] struct mdev_driver in include/linux/mdev.h -[3] struct parent_ops in include/linux/mdev.h +[3] struct mdev_parent_ops in include/linux/mdev.h [4] struct vfio_iommu_driver_ops in include/linux/vfio.h diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..5f0420a0da5b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3800,6 +3800,7 @@ F: include/linux/devcoredump.h DEVICE FREQUENCY (DEVFREQ) M: MyungJoo Ham <myungjoo.ham@samsung.com> M: Kyungmin Park <kyungmin.park@samsung.com> +R: Chanwoo Choi <cw00.choi@samsung.com> L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git S: Maintained @@ -5080,9 +5081,11 @@ F: drivers/net/wan/dlci.c F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER +M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> L: linux-fbdev@vger.kernel.org +T: git git://github.com/bzolnier/linux.git Q: http://patchwork.kernel.org/project/linux-fbdev/list/ -S: Orphan +S: Maintained F: Documentation/fb/ F: drivers/video/ F: include/video/ @@ -5504,6 +5507,7 @@ M: Alex Elder <elder@kernel.org> M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> S: Maintained F: drivers/staging/greybus/ +L: greybus-dev@lists.linaro.org GREYBUS AUDIO PROTOCOLS DRIVERS M: Vaibhav Agarwal <vaibhav.sr@gmail.com> @@ -5961,6 +5965,7 @@ F: drivers/media/platform/sti/hva Hyper-V CORE AND DRIVERS M: "K. Y. Srinivasan" <kys@microsoft.com> M: Haiyang Zhang <haiyangz@microsoft.com> +M: Stephen Hemminger <sthemmin@microsoft.com> L: devel@linuxdriverproject.org S: Maintained F: arch/x86/include/asm/mshyperv.h @@ -8852,17 +8857,22 @@ F: drivers/video/fbdev/nvidia/ NVM EXPRESS DRIVER M: Keith Busch <keith.busch@intel.com> M: Jens Axboe <axboe@fb.com> +M: Christoph Hellwig <hch@lst.de> +M: Sagi Grimberg <sagi@grimberg.me> L: linux-nvme@lists.infradead.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git -W: https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/ +T: git://git.infradead.org/nvme.git +W: http://git.infradead.org/nvme.git S: Supported F: drivers/nvme/host/ F: include/linux/nvme.h +F: include/uapi/linux/nvme_ioctl.h NVM EXPRESS TARGET DRIVER M: Christoph Hellwig <hch@lst.de> M: Sagi Grimberg <sagi@grimberg.me> L: linux-nvme@lists.infradead.org +T: git://git.infradead.org/nvme.git +W: http://git.infradead.org/nvme.git S: Supported F: drivers/nvme/target/ @@ -9842,7 +9852,7 @@ M: Mark Rutland <mark.rutland@arm.com> M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> L: linux-arm-kernel@lists.infradead.org S: Maintained -F: drivers/firmware/psci.c +F: drivers/firmware/psci*.c F: include/linux/psci.h F: include/uapi/linux/psci.h @@ -13527,11 +13537,11 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Dave Chinner <david@fromorbit.com> +M: Darrick J. Wong <darrick.wong@oracle.com> M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org W: http://xfs.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git +T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported F: Documentation/filesystems/xfs.txt F: fs/xfs/ @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc3 NAME = Roaring Lionus # *DOCUMENTATION* diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5fab553fd03a..186c4c214e0a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1502,8 +1502,7 @@ source kernel/Kconfig.preempt config HZ_FIXED int - default 200 if ARCH_EBSA110 || ARCH_S3C24XX || \ - ARCH_S5PV210 || ARCH_EXYNOS4 + default 200 if ARCH_EBSA110 default 128 if SOC_AT91RM9200 default 0 diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index cccdbcb557b6..7327250f0bb6 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -501,6 +501,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ logicpd-torpedo-37xx-devkit.dtb \ + logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ omap3-beagle-xm.dtb \ diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index dc561d505bbe..3e32dd18fd25 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -6,8 +6,6 @@ * published by the Free Software Foundation. */ -#include <dt-bindings/mfd/tps65217.h> - / { cpus { cpu@0 { @@ -319,13 +317,13 @@ ti,pmic-shutdown-controller; charger { - interrupts = <TPS65217_IRQ_AC>, <TPS65217_IRQ_USB>; - interrupts-names = "AC", "USB"; + interrupts = <0>, <1>; + interrupt-names = "USB", "AC"; status = "okay"; }; pwrbutton { - interrupts = <TPS65217_IRQ_PB>; + interrupts = <2>; status = "okay"; }; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 64c8aa9057a3..18d72a245e88 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c0; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index ac55f93fc91e..2df9e6050c2f 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; memory@0 { device_type = "memory"; diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index d6e43e5184c1..ad68d1eb3bc3 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -62,11 +62,6 @@ linux,default-trigger = "mmc0"; }; }; - - extcon_usb2: extcon_usb2 { - compatible = "linux,extcon-usb-gpio"; - id-gpio = <&gpio5 7 GPIO_ACTIVE_HIGH>; - }; }; &mmc1 { @@ -79,3 +74,8 @@ &omap_dwc3_2 { extcon = <&extcon_usb2>; }; + +&extcon_usb2 { + id-gpio = <&gpio5 7 GPIO_ACTIVE_HIGH>; + vbus-gpio = <&gpio7 22 GPIO_ACTIVE_HIGH>; +}; diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts index 27d9149cedba..8350b4b34b08 100644 --- a/arch/arm/boot/dts/am572x-idk.dts +++ b/arch/arm/boot/dts/am572x-idk.dts @@ -23,11 +23,6 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; - extcon_usb2: extcon_usb2 { - compatible = "linux,extcon-usb-gpio"; - id-gpio = <&gpio3 16 GPIO_ACTIVE_HIGH>; - }; - status-leds { compatible = "gpio-leds"; cpu0-led { @@ -76,6 +71,11 @@ extcon = <&extcon_usb2>; }; +&extcon_usb2 { + id-gpio = <&gpio3 16 GPIO_ACTIVE_HIGH>; + vbus-gpio = <&gpio3 26 GPIO_ACTIVE_HIGH>; +}; + &mmc1 { status = "okay"; vmmc-supply = <&v3_3d>; @@ -87,3 +87,7 @@ &sn65hvs882 { load-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; }; + +&pcie1 { + gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; +}; diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index 555ae21f2b9a..814a720d5c3d 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -303,6 +303,13 @@ gpio-controller; #gpio-cells = <2>; }; + + extcon_usb2: tps659038_usb { + compatible = "ti,palmas-usb-vid"; + ti,enable-vbus-detection; + ti,enable-id-detection; + /* ID & VBUS GPIOs provided in board dts */ + }; }; }; @@ -369,7 +376,7 @@ }; &usb2 { - dr_mode = "otg"; + dr_mode = "peripheral"; }; &mmc2 { diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi index 1facc5f12cef..81b8cecb5820 100644 --- a/arch/arm/boot/dts/dm814x.dtsi +++ b/arch/arm/boot/dts/dm814x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 61dd2f6b02bc..6db652ae9bd5 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index addb7530cfbe..1faf24acd521 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -18,6 +18,7 @@ compatible = "ti,dra7xx"; interrupt-parent = <&crossbar_mpu>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/dra72-evm-tps65917.dtsi b/arch/arm/boot/dts/dra72-evm-tps65917.dtsi index ee6dac44edf1..e6df676886c0 100644 --- a/arch/arm/boot/dts/dra72-evm-tps65917.dtsi +++ b/arch/arm/boot/dts/dra72-evm-tps65917.dtsi @@ -132,3 +132,19 @@ ti,palmas-long-press-seconds = <6>; }; }; + +&usb2_phy1 { + phy-supply = <&ldo4_reg>; +}; + +&usb2_phy2 { + phy-supply = <&ldo4_reg>; +}; + +&dss { + vdda_video-supply = <&ldo5_reg>; +}; + +&mmc1 { + vmmc_aux-supply = <&ldo1_reg>; +}; diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 685916e3d8a1..85cd8be22f71 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -31,11 +31,11 @@ }; }; - avic: avic-interrupt-controller@60000000 { + avic: interrupt-controller@68000000 { compatible = "fsl,imx31-avic", "fsl,avic"; interrupt-controller; #interrupt-cells = <1>; - reg = <0x60000000 0x100000>; + reg = <0x68000000 0x100000>; }; soc { diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index e476d01959ea..26d060484728 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -533,7 +533,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17071 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17071 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17071 - MX6QDL_PAD_NANDF_CS2__GPIO6_IO15 0x000b0 >; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 53e6e63cbb02..89b834f3fa17 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1100,6 +1100,7 @@ interrupts = <0 14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_EIM_SLOW>; fsl,weim-cs-gpr = <&gpr>; + status = "disabled"; }; ocotp: ocotp@021bc000 { diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 4fd6de29f07d..19cbd879c448 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -900,6 +900,7 @@ reg = <0x021b8000 0x4000>; interrupts = <0 14 IRQ_TYPE_LEVEL_HIGH>; fsl,weim-cs-gpr = <&gpr>; + status = "disabled"; }; ocotp: ocotp@021bc000 { diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 076a30f9bcae..10f333016197 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -977,6 +977,7 @@ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SX_CLK_EIM_SLOW>; fsl,weim-cs-gpr = <&gpr>; + status = "disabled"; }; ocotp: ocotp@021bc000 { diff --git a/arch/arm/boot/dts/omap2.dtsi b/arch/arm/boot/dts/omap2.dtsi index 4f793a025a72..f1d6de8b3c19 100644 --- a/arch/arm/boot/dts/omap2.dtsi +++ b/arch/arm/boot/dts/omap2.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { serial0 = &uart1; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 87ca50b53002..4d448f145ed1 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -734,6 +734,8 @@ vmmc_aux-supply = <&vsim>; bus-width = <8>; non-removable; + no-sdio; + no-sd; }; &mmc3 { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index ecf5eb584c75..a3ff4933dbc1 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 8087456b5fbe..578c53f08309 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -15,6 +15,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 968c67a49dbd..7cd92babc41a 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -17,6 +17,7 @@ compatible = "ti,omap5"; interrupt-parent = <&wakeupgen>; + chosen { }; aliases { i2c0 = &i2c1; diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 268bd470c865..407a4610f4a7 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -4,6 +4,7 @@ #include <dt-bindings/clock/qcom,gcc-msm8960.h> #include <dt-bindings/reset/qcom,gcc-msm8960.h> #include <dt-bindings/clock/qcom,mmcc-msm8960.h> +#include <dt-bindings/clock/qcom,rpmcc.h> #include <dt-bindings/soc/qcom,gsbi.h> #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -303,6 +304,9 @@ firmware { scm { compatible = "qcom,scm-apq8064"; + + clocks = <&rpmcc RPM_DAYTONA_FABRIC_CLK>; + clock-names = "core"; }; }; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts index 102838fcc588..15f4fd3f4695 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts @@ -81,7 +81,7 @@ #address-cells = <0>; interrupt-controller; reg = <0 0x2c001000 0 0x1000>, - <0 0x2c002000 0 0x1000>, + <0 0x2c002000 0 0x2000>, <0 0x2c004000 0 0x2000>, <0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 45d08cc37b01..bd107c5a0226 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -131,7 +131,7 @@ #address-cells = <0>; interrupt-controller; reg = <0 0x2c001000 0 0x1000>, - <0 0x2c002000 0 0x1000>, + <0 0x2c002000 0 0x2000>, <0 0x2c004000 0 0x2000>, <0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index 7ea617e47fe4..958b4c42d320 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -153,7 +153,8 @@ switch0phy1: switch1phy0@1 { reg = <1>; interrupt-parent = <&switch0>; - interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; }; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + }; switch0phy2: switch1phy0@2 { reg = <2>; interrupt-parent = <&switch0>; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 13f1b4c289d4..a8fce93137fb 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -62,35 +62,18 @@ config CRYPTO_SHA512_ARM using optimized ARM assembler and NEON, when available. config CRYPTO_AES_ARM - tristate "AES cipher algorithms (ARM-asm)" - depends on ARM + tristate "Scalar AES cipher for ARM" select CRYPTO_ALGAPI select CRYPTO_AES help Use optimized AES assembler routines for ARM platforms. - AES cipher algorithms (FIPS-197). AES uses the Rijndael - algorithm. - - Rijndael appears to be consistently a very good performer in - both hardware and software across a wide range of computing - environments regardless of its use in feedback or non-feedback - modes. Its key setup time is excellent, and its key agility is - good. Rijndael's very low memory requirements make it very well - suited for restricted-space environments, in which it also - demonstrates excellent performance. Rijndael's operations are - among the easiest to defend against power and timing attacks. - - The AES specifies three key sizes: 128, 192 and 256 bits - - See <http://csrc.nist.gov/encryption/aes/> for more information. - config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_AES_ARM select CRYPTO_BLKCIPHER select CRYPTO_SIMD + select CRYPTO_AES_ARM help Use a faster and more secure NEON based implementation of AES in CBC, CTR and XTS modes @@ -130,4 +113,10 @@ config CRYPTO_CRC32_ARM_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b578a1820ab1..1822c4697278 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -26,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m)) endif endif -aes-arm-y := aes-armv4.o aes_glue.o -aes-arm-bs-y := aesbs-core.o aesbs-glue.o +aes-arm-y := aes-cipher-core.o aes-cipher-glue.o +aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o @@ -40,17 +41,15 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) -$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl - $(call cmd,perl) - $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S +.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S deleted file mode 100644 index ebb9761fb572..000000000000 --- a/arch/arm/crypto/aes-armv4.S +++ /dev/null @@ -1,1089 +0,0 @@ -#define __ARM_ARCH__ __LINUX_ARM_ARCH__ -@ ==================================================================== -@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ AES for ARMv4 - -@ January 2007. -@ -@ Code uses single 1K S-box and is >2 times faster than code generated -@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -@ allows to merge logical or arithmetic operation with shift or rotate -@ in one instruction and emit combined result every cycle. The module -@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit -@ key [on single-issue Xscale PXA250 core]. - -@ May 2007. -@ -@ AES_set_[en|de]crypt_key is added. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 12% improvement on -@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~21.5 cycles per byte. - -@ A little glue here to select the correct code below for the ARM CPU -@ that is being targetted. - -#include <linux/linkage.h> -#include <asm/assembler.h> - -.text - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_encrypt) - adr r3,AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_encrypt-AES_Te @ Te -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_encrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_encrypt) - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0 - and r8,lr,r0,lsr#8 - and r9,lr,r0,lsr#16 - mov r0,r0,lsr#24 -.Lenc_loop: - ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] - and r8,lr,r1 - ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] - ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] - ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] - eor r0,r0,r7,ror#8 - ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,ror#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r6,r9,ror#8 - and r9,lr,r2 - ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] - eor r1,r1,r4,ror#24 - ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,ror#8 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r6,r9,ror#16 - and r9,lr,r3,lsr#16 @ i2 - ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] - eor r2,r2,r5,ror#16 - ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] - eor r0,r0,r7,ror#24 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] - eor r2,r2,r9,ror#8 - ldr r4,[r11,#-12] - eor r3,r3,r6,ror#8 - - ldr r5,[r11,#-8] - eor r0,r0,r7 - ldr r6,[r11,#-4] - and r7,lr,r0 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0,lsr#16 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Lenc_loop - - add r10,r10,#2 - - ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] - and r8,lr,r1 - ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] - and r9,lr,r1,lsr#8 - ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] - mov r1,r1,lsr#24 - - ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] - ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] - ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] - eor r0,r7,r0,lsl#8 - ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,lsl#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] - eor r1,r4,r1,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] - mov r2,r2,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] - eor r0,r7,r0,lsl#8 - ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r3,lsr#16 @ i2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] - eor r2,r5,r2,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] - mov r3,r3,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] - eor r0,r7,r0,lsl#8 - ldr r7,[r11,#0] - ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r2,r9,lsl#16 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#2 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.align 5 -ENTRY(private_AES_set_encrypt_key) -_armv4_AES_set_encrypt_key: - adr r3,_armv4_AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov r12,r0 @ inp - mov lr,r1 @ bits - mov r11,r2 @ key - -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - str r0,[r11],#16 - orr r3,r3,r5,lsl#16 - str r1,[r11,#-12] - orr r3,r3,r6,lsl#24 - str r2,[r11,#-8] - str r3,[r11,#-4] -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r11],#16 - str r1,[r11,#-12] - str r2,[r11,#-8] - str r3,[r11,#-4] -#endif - - teq lr,#128 - bne .Lnot128 - mov r12,#10 - str r12,[r11,#240-16] - add r6,r10,#256 @ rcon - mov lr,#255 - -.L128_loop: - and r5,lr,r3,lsr#24 - and r7,lr,r3,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r3 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r5,r5,r4 - eor r0,r0,r5 @ rk[4]=rk[0]^... - eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] - str r0,[r11],#16 - eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] - str r1,[r11,#-12] - eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] - str r2,[r11,#-8] - subs r12,r12,#1 - str r3,[r11,#-4] - bne .L128_loop - sub r2,r11,#176 - b .Ldone - -.Lnot128: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#19] - ldrb r4,[r12,#18] - ldrb r5,[r12,#17] - ldrb r6,[r12,#16] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#23] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#22] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#21] - ldrb r6,[r12,#20] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#16] - ldr r9,[r12,#20] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - teq lr,#192 - bne .Lnot192 - mov r12,#12 - str r12,[r11,#240-24] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#8 - -.L192_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[6]=rk[0]^... - eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] - str r0,[r11],#24 - eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] - str r1,[r11,#-20] - eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] - str r2,[r11,#-16] - subs r12,r12,#1 - str r3,[r11,#-12] - subeq r2,r11,#216 - beq .Ldone - - ldr r7,[r11,#-32] - ldr r8,[r11,#-28] - eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] - eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] - str r7,[r11,#-8] - str r9,[r11,#-4] - b .L192_loop - -.Lnot192: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#27] - ldrb r4,[r12,#26] - ldrb r5,[r12,#25] - ldrb r6,[r12,#24] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#31] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#30] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#29] - ldrb r6,[r12,#28] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#24] - ldr r9,[r12,#28] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - mov r12,#14 - str r12,[r11,#240-32] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#7 - -.L256_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[8]=rk[0]^... - eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] - str r0,[r11],#32 - eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] - str r1,[r11,#-28] - eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] - str r2,[r11,#-24] - subs r12,r12,#1 - str r3,[r11,#-20] - subeq r2,r11,#256 - beq .Ldone - - and r5,lr,r3 - and r7,lr,r3,lsr#8 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#16 - ldrb r7,[r10,r7] - and r9,lr,r3,lsr#24 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#8 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r11,#-48] - orr r5,r5,r9,lsl#24 - - ldr r7,[r11,#-44] - ldr r8,[r11,#-40] - eor r4,r4,r5 @ rk[12]=rk[4]^... - ldr r9,[r11,#-36] - eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] - str r4,[r11,#-16] - eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] - str r7,[r11,#-12] - eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] - str r8,[r11,#-8] - str r9,[r11,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: ret lr -ENDPROC(private_AES_set_encrypt_key) - -.align 5 -ENTRY(private_AES_set_decrypt_key) - str lr,[sp,#-4]! @ push lr -#if 0 - @ kernel does both of these in setkey so optimise this bit out by - @ expecting the key to already have the enc_key work done (see aes_glue.c) - bl _armv4_AES_set_encrypt_key -#else - mov r0,#0 -#endif - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov r11,r2 @ which is AES_KEY *key - mov r7,r2 - add r8,r2,r12,lsl#4 - -.Linv: ldr r0,[r7] - ldr r1,[r7,#4] - ldr r2,[r7,#8] - ldr r3,[r7,#12] - ldr r4,[r8] - ldr r5,[r8,#4] - ldr r6,[r8,#8] - ldr r9,[r8,#12] - str r0,[r8],#-16 - str r1,[r8,#16+4] - str r2,[r8,#16+8] - str r3,[r8,#16+12] - str r4,[r7],#16 - str r5,[r7,#-12] - str r6,[r7,#-8] - str r9,[r7,#-4] - teq r7,r8 - bne .Linv - ldr r0,[r11,#16]! @ prefetch tp1 - mov r7,#0x80 - mov r8,#0x1b - orr r7,r7,#0x8000 - orr r8,r8,#0x1b00 - orr r7,r7,r7,lsl#16 - orr r8,r8,r8,lsl#16 - sub r12,r12,#1 - mvn r9,r7 - mov r12,r12,lsl#2 @ (rounds-1)*4 - -.Lmix: and r4,r0,r7 - and r1,r0,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r1,r4,r1,lsl#1 @ tp2 - - and r4,r1,r7 - and r2,r1,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r2,r4,r2,lsl#1 @ tp4 - - and r4,r2,r7 - and r3,r2,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r3,r4,r3,lsl#1 @ tp8 - - eor r4,r1,r2 - eor r5,r0,r3 @ tp9 - eor r4,r4,r3 @ tpe - eor r4,r4,r1,ror#24 - eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor r4,r4,r2,ror#16 - eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) - - ldr r0,[r11,#4] @ prefetch tp1 - str r4,[r11],#4 - subs r12,r12,#1 - bne .Lmix - - mov r0,#0 - ldmia sp!,{r4-r12,pc} -ENDPROC(private_AES_set_decrypt_key) - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_decrypt) - adr r3,AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_decrypt-AES_Td @ Td -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_decrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_decrypt) - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0,lsr#16 - and r8,lr,r0,lsr#8 - and r9,lr,r0 - mov r0,r0,lsr#24 -.Ldec_loop: - ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] - and r7,lr,r1 @ i0 - ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] - and r8,lr,r1,lsr#16 - ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] - ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] - ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] - eor r0,r0,r7,ror#24 - ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,ror#8 - and r8,lr,r2 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r2,lsr#16 - ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] - eor r1,r1,r4,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] - and r7,lr,r3,lsr#16 @ i0 - eor r1,r1,r8,ror#24 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r3 @ i2 - ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] - eor r2,r2,r5,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] - eor r0,r0,r7,ror#8 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] - eor r2,r2,r9,ror#24 - - ldr r4,[r11,#-12] - eor r0,r0,r7 - ldr r5,[r11,#-8] - eor r3,r3,r6,ror#8 - ldr r6,[r11,#-4] - and r7,lr,r0,lsr#16 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Ldec_loop - - add r10,r10,#1024 - - ldr r5,[r10,#0] @ prefetch Td4 - ldr r6,[r10,#32] - ldr r4,[r10,#64] - ldr r5,[r10,#96] - ldr r6,[r10,#128] - ldr r4,[r10,#160] - ldr r5,[r10,#192] - ldr r6,[r10,#224] - - ldrb r0,[r10,r0] @ Td4[s0>>24] - ldrb r4,[r10,r7] @ Td4[s0>>16] - and r7,lr,r1 @ i0 - ldrb r5,[r10,r8] @ Td4[s0>>8] - and r8,lr,r1,lsr#16 - ldrb r6,[r10,r9] @ Td4[s0>>0] - and r9,lr,r1,lsr#8 - - ldrb r7,[r10,r7] @ Td4[s1>>0] - ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] - THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] - THUMB( ldrb r1,[r1] ) - ldrb r8,[r10,r8] @ Td4[s1>>16] - eor r0,r7,r0,lsl#24 - ldrb r9,[r10,r9] @ Td4[s1>>8] - eor r1,r4,r1,lsl#8 - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,lsl#8 - and r8,lr,r2 @ i1 - ldrb r7,[r10,r7] @ Td4[s2>>8] - eor r6,r6,r9,lsl#8 - ldrb r8,[r10,r8] @ Td4[s2>>0] - and r9,lr,r2,lsr#16 - - ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] - THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] - THUMB( ldrb r2,[r2] ) - eor r0,r0,r7,lsl#8 - ldrb r9,[r10,r9] @ Td4[s2>>16] - eor r1,r8,r1,lsl#16 - and r7,lr,r3,lsr#16 @ i0 - eor r2,r5,r2,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - ldrb r7,[r10,r7] @ Td4[s3>>16] - eor r6,r6,r9,lsl#16 - ldrb r8,[r10,r8] @ Td4[s3>>8] - and r9,lr,r3 @ i2 - - ldrb r9,[r10,r9] @ Td4[s3>>0] - ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] - THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] - THUMB( ldrb r3,[r3] ) - eor r0,r0,r7,lsl#16 - ldr r7,[r11,#0] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r9,r2,lsl#8 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#1024 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>" -.align 2 diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..c817a86c4ca8 --- /dev/null +++ b/arch/arm/crypto/aes-cipher-core.S @@ -0,0 +1,179 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + ttab .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr \out, [ttab, \in, lsr #(8 * \idx) - 2] + .else + ldr \out, [ttab, \in, lsl #2] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0 + __load t0, t0, 1 + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0 + __select t2, \in2, 2 + __load t1, t1, 1 + __load t2, t2, 2 + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2 + __load t0, t0, 3 + __load \t4, \t4, 3 + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro __rev, out, in + .if __LINUX_ARM_ARCH__ < 6 + lsl t0, \in, #24 + and t1, \in, #0xff00 + and t2, \in, #0xff0000 + orr \out, t0, \in, lsr #24 + orr \out, \out, t1, lsl #8 + orr \out, \out, t2, lsr #8 + .else + rev \out, \in + .endif + .endm + + .macro __adrl, out, sym, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \out, =\sym + .else + movw\c \out, #:lower16:\sym + movt\c \out, #:upper16:\sym + .endif + .endm + + .macro do_crypt, round, ttab, ltab + push {r3-r11, lr} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + + ldm rk!, {r8-r11} + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + __adrl ttab, \ttab + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + __adrl ttab, \ltab, ls + \round r4, r5, r6, r7, r8, r9, r10, r11 + bhi 0b + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm_encrypt) + +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..c222f6e072ad --- /dev/null +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -0,0 +1,74 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_encrypt); + +asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_encrypt(ctx->key_enc, rounds, in, out); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_decrypt(ctx->key_dec, rounds, in, out); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt, + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + .cra_alignmask = 3, +#endif +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for ARM"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..12da247164d1 --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -0,0 +1,1021 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu neon + + rounds .req ip + bskey .req r4 + + q0l .req d0 + q0h .req d1 + q1l .req d2 + q1h .req d3 + q2l .req d4 + q2h .req d5 + q3l .req d6 + q3h .req d7 + q4l .req d8 + q4h .req d9 + q5l .req d10 + q5h .req d11 + q6l .req d12 + q6h .req d13 + q7l .req d14 + q7h .req d15 + q8l .req d16 + q8h .req d17 + q9l .req d18 + q9h .req d19 + q10l .req d20 + q10h .req d21 + q11l .req d22 + q11h .req d23 + q12l .req d24 + q12h .req d25 + q13l .req d26 + q13h .req d27 + q14l .req d28 + q14h .req d29 + q15l .req d30 + q15h .req d31 + + .macro __tbl, out, tbl, in, tmp + .ifc \out, \tbl + .ifb \tmp + .error __tbl needs temp register if out == tbl + .endif + vmov \tmp, \out + .endif + vtbl.8 \out\()l, {\tbl}, \in\()l + .ifc \out, \tbl + vtbl.8 \out\()h, {\tmp}, \in\()h + .else + vtbl.8 \out\()h, {\tbl}, \in\()h + .endif + .endm + + .macro __ldr, out, sym + vldr \out\()l, \sym + vldr \out\()h, \sym + 8 + .endm + + .macro __adr, reg, lbl + adr \reg, \lbl +THUMB( orr \reg, \reg, #1 ) + .endm + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b2, \b2, \b1 + veor \b5, \b5, \b6 + veor \b3, \b3, \b0 + veor \b6, \b6, \b2 + veor \b5, \b5, \b0 + veor \b6, \b6, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b4, \b4, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + veor \b4, \b4, \b6 + veor \b2, \b2, \b0 + veor \b6, \b6, \b1 + veor \b1, \b1, \b5 + veor \b5, \b5, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b2, \b2, \b5 + veor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + veor \b1, \b1, \b7 + veor \b4, \b4, \b7 + veor \b7, \b7, \b5 + veor \b1, \b1, \b3 + veor \b2, \b2, \b5 + veor \b3, \b3, \b7 + veor \b6, \b6, \b1 + veor \b2, \b2, \b0 + veor \b5, \b5, \b3 + veor \b4, \b4, \b6 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + veor \b1, \b1, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b4, \b4, \b5 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b5, \b5, \b0 + veor \b3, \b3, \b7 + veor \b6, \b6, \b2 + veor \b2, \b2, \b1 + veor \b6, \b6, \b3 + veor \b3, \b3, \b0 + veor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + veor \t0, \y0, \y1 + vand \t0, \t0, \x0 + veor \x0, \x0, \x1 + vand \t1, \x1, \y0 + vand \x0, \x0, \y1 + veor \x1, \t1, \t0 + veor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + veor \t0, \y0, \y1 + veor \t1, \y2, \y3 + vand \t0, \t0, \x0 + vand \t1, \t1, \x2 + veor \x0, \x0, \x1 + veor \x2, \x2, \x3 + vand \x1, \x1, \y0 + vand \x3, \x3, \y2 + vand \x0, \x0, \y1 + vand \x2, \x2, \y3 + veor \x1, \x1, \x0 + veor \x2, \x2, \x3 + veor \x0, \x0, \t0 + veor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + veor \t0, \x0, \x2 + veor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + veor \x0, \x0, \t0 + veor \x2, \x2, \t0 + veor \x1, \x1, \t1 + veor \x3, \x3, \t1 + veor \t0, \x4, \x6 + veor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + veor \x4, \x4, \t0 + veor \x6, \x6, \t0 + veor \x5, \x5, \t1 + veor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + veor \t3, \x4, \x6 + veor \t0, \x5, \x7 + veor \t1, \x1, \x3 + veor \s1, \x7, \x6 + veor \s0, \x0, \x2 + veor \s3, \t3, \t0 + vorr \t2, \t0, \t1 + vand \s2, \t3, \s0 + vorr \t3, \t3, \s0 + veor \s0, \s0, \t1 + vand \t0, \t0, \t1 + veor \t1, \x3, \x2 + vand \s3, \s3, \s0 + vand \s1, \s1, \t1 + veor \t1, \x4, \x5 + veor \s0, \x1, \x0 + veor \t3, \t3, \s1 + veor \t2, \t2, \s1 + vand \s1, \t1, \s0 + vorr \t1, \t1, \s0 + veor \t3, \t3, \s3 + veor \t0, \t0, \s1 + veor \t2, \t2, \s2 + veor \t1, \t1, \s3 + veor \t0, \t0, \s2 + vand \s0, \x7, \x3 + veor \t1, \t1, \s2 + vand \s1, \x6, \x2 + vand \s2, \x5, \x1 + vorr \s3, \x4, \x0 + veor \t3, \t3, \s0 + veor \t1, \t1, \s2 + veor \s0, \t0, \s3 + veor \t2, \t2, \s1 + vand \s2, \t3, \t1 + veor \s1, \t2, \s2 + veor \s3, \s0, \s2 + vbsl \s1, \t1, \s0 + vmvn \t0, \s0 + vbsl \s0, \s1, \s3 + vbsl \t0, \s1, \s3 + vbsl \s3, \t3, \t2 + veor \t3, \t3, \t2 + vand \s2, \s0, \s3 + veor \t1, \t1, \t0 + veor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t0, \t0, \x0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t1, \t1, \x1 + __tbl \x0, \t0, \mask + veor \t2, \t2, \x2 + __tbl \x1, \t1, \mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t3, \t3, \x3 + __tbl \x2, \t2, \mask + __tbl \x3, \t3, \mask + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t0, \t0, \x4 + veor \t1, \t1, \x5 + __tbl \x4, \t0, \mask + veor \t2, \t2, \x6 + __tbl \x5, \t1, \mask + veor \t3, \t3, \x7 + __tbl \x6, \t2, \mask + __tbl \x7, \t3, \mask + .endm + + .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + __tbl \x0, \x0, \mask, \t0 + __tbl \x1, \x1, \mask, \t1 + __tbl \x2, \x2, \mask, \t2 + __tbl \x3, \x3, \mask, \t3 + __tbl \x4, \x4, \mask, \t0 + __tbl \x5, \x5, \mask, \t1 + __tbl \x6, \x6, \mask, \t2 + __tbl \x7, \x7, \mask, \t3 + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + vext.8 \t0, \x0, \x0, #12 + vext.8 \t1, \x1, \x1, #12 + veor \x0, \x0, \t0 + vext.8 \t2, \x2, \x2, #12 + veor \x1, \x1, \t1 + vext.8 \t3, \x3, \x3, #12 + veor \x2, \x2, \t2 + vext.8 \t4, \x4, \x4, #12 + veor \x3, \x3, \t3 + vext.8 \t5, \x5, \x5, #12 + veor \x4, \x4, \t4 + vext.8 \t6, \x6, \x6, #12 + veor \x5, \x5, \t5 + vext.8 \t7, \x7, \x7, #12 + veor \x6, \x6, \t6 + veor \t1, \t1, \x0 + veor.8 \x7, \x7, \t7 + vext.8 \x0, \x0, \x0, #8 + veor \t2, \t2, \x1 + veor \t0, \t0, \x7 + veor \t1, \t1, \x7 + vext.8 \x1, \x1, \x1, #8 + veor \t5, \t5, \x4 + veor \x0, \x0, \t0 + veor \t6, \t6, \x5 + veor \x1, \x1, \t1 + vext.8 \t0, \x4, \x4, #8 + veor \t4, \t4, \x3 + vext.8 \t1, \x5, \x5, #8 + veor \t7, \t7, \x6 + vext.8 \x4, \x3, \x3, #8 + veor \t3, \t3, \x2 + vext.8 \x5, \x7, \x7, #8 + veor \t4, \t4, \x7 + vext.8 \x3, \x6, \x6, #8 + veor \t3, \t3, \x7 + vext.8 \x6, \x2, \x2, #8 + veor \x7, \t1, \t5 + .ifb \inv + veor \x2, \t0, \t4 + veor \x4, \x4, \t3 + veor \x5, \x5, \t7 + veor \x3, \x3, \t6 + veor \x6, \x6, \t2 + .else + veor \t3, \t3, \x4 + veor \x5, \x5, \t7 + veor \x2, \x3, \t6 + veor \x3, \t0, \t4 + veor \x4, \x6, \t2 + vmov \x6, \t3 + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \x0, \x0, \t0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \x1, \x1, \t1 + vld1.8 {\t4-\t5}, [bskey, :256]! + veor \x2, \x2, \t2 + vld1.8 {\t6-\t7}, [bskey, :256] + sub bskey, bskey, #224 + veor \x3, \x3, \t3 + veor \x4, \x4, \t4 + veor \x5, \x5, \t5 + veor \x6, \x6, \t6 + veor \x7, \x7, \t7 + vext.8 \t0, \x0, \x0, #8 + vext.8 \t6, \x6, \x6, #8 + vext.8 \t7, \x7, \x7, #8 + veor \t0, \t0, \x0 + vext.8 \t1, \x1, \x1, #8 + veor \t6, \t6, \x6 + vext.8 \t2, \x2, \x2, #8 + veor \t7, \t7, \x7 + vext.8 \t3, \x3, \x3, #8 + veor \t1, \t1, \x1 + vext.8 \t4, \x4, \x4, #8 + veor \t2, \t2, \x2 + vext.8 \t5, \x5, \x5, #8 + veor \t3, \t3, \x3 + veor \t4, \t4, \x4 + veor \t5, \t5, \x5 + veor \x0, \x0, \t6 + veor \x1, \x1, \t6 + veor \x2, \x2, \t0 + veor \x4, \x4, \t2 + veor \x3, \x3, \t1 + veor \x1, \x1, \t7 + veor \x2, \x2, \t7 + veor \x4, \x4, \t6 + veor \x5, \x5, \t3 + veor \x3, \x3, \t6 + veor \x6, \x6, \t4 + veor \x4, \x4, \t7 + veor \x5, \x5, \t7 + veor \x7, \x7, \t5 + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + vshr.u64 \t0, \b0, #\n + vshr.u64 \t1, \b1, #\n + veor \t0, \t0, \a0 + veor \t1, \t1, \a1 + vand \t0, \t0, \mask + vand \t1, \t1, \mask + veor \a0, \a0, \t0 + vshl.s64 \t0, \t0, #\n + veor \a1, \a1, \t1 + vshl.s64 \t1, \t1, #\n + veor \b0, \b0, \t0 + veor \b1, \b1, \t1 + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + vmov.i8 \t0, #0x55 + vmov.i8 \t1, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + vmov.i8 \t0, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + .align 4 +M0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + vld1.32 {q7}, [r1]! // load round 0 key + vld1.32 {q15}, [r1]! // load round 1 key + + vmov.i8 q8, #0x01 // bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + __ldr q14, M0 + + sub r2, r2, #1 + vst1.8 {q7}, [r0, :128]! // save round 0 key + +.Lkey_loop: + __tbl q7, q15, q14 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.32 {q15}, [r1]! // load next round key + vmvn q0, q0 + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 + + subs r2, r2, #1 + vst1.8 {q0-q1}, [r0, :256]! + vst1.8 {q2-q3}, [r0, :256]! + vst1.8 {q4-q5}, [r0, :256]! + vst1.8 {q6-q7}, [r0, :256]! + bne .Lkey_loop + + vmov.i8 q7, #0x63 // compose .L63 + veor q15, q15, q7 + vst1.8 {q15}, [r0, :128] + bx lr +ENDPROC(aesbs_convert_key) + + .align 4 +M0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 + +aesbs_encrypt8: + vld1.8 {q9}, [bskey, :128]! // round 0 key + __ldr q8, M0SR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Lenc_sbox + + .align 5 +SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +SRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d + +.Lenc_last: + __ldr q12, SRM0 +.Lenc_loop: + shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Lenc_sbox: + sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Lenc_done + + mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Lenc_last + __ldr q12, SR + b .Lenc_loop + +.Lenc_done: + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q4, q4, q12 + veor q6, q6, q12 + veor q3, q3, q12 + veor q7, q7, q12 + veor q2, q2, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_encrypt8) + + .align 4 +M0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 + +aesbs_decrypt8: + add bskey, bskey, rounds, lsl #7 + sub bskey, bskey, #112 + vld1.8 {q9}, [bskey, :128] // round 0 key + sub bskey, bskey, #128 + __ldr q8, M0ISR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Ldec_sbox + + .align 5 +ISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +ISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d + +.Ldec_last: + __ldr q12, ISRM0 +.Ldec_loop: + inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Ldec_sbox: + inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Ldec_done + + inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Ldec_last + __ldr q12, ISR + b .Ldec_loop + +.Ldec_done: + add bskey, bskey, #112 + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q6, q6, q12 + veor q4, q4, q12 + veor q2, q2, q12 + veor q7, q7, q12 + veor q3, q3, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r6, lr} + ldr r5, [sp, #16] // number of blocks + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + vld1.8 {q1}, [r1]! + vld1.8 {q2}, [r1]! + vld1.8 {q3}, [r1]! + vld1.8 {q4}, [r1]! + vld1.8 {q5}, [r1]! + vld1.8 {q6}, [r1]! + vld1.8 {q7}, [r1]! + +0: mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vst1.8 {\o0}, [r0]! + vst1.8 {\o1}, [r0]! + vst1.8 {\o2}, [r0]! + vst1.8 {\o3}, [r0]! + vst1.8 {\o4}, [r0]! + vst1.8 {\o5}, [r0]! + vst1.8 {\o6}, [r0]! + vst1.8 {\o7}, [r0]! + +1: subs r5, r5, #8 + bgt 99b + + pop {r4-r6, pc} + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + mov ip, sp + push {r4-r6, lr} + ldm ip, {r5-r6} // load args 4-5 + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + mov lr, r1 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [lr]! + vld1.8 {q1}, [lr]! + vld1.8 {q2}, [lr]! + vld1.8 {q3}, [lr]! + vld1.8 {q4}, [lr]! + vld1.8 {q5}, [lr]! + vld1.8 {q6}, [lr]! + vld1.8 {q7}, [lr] + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_decrypt8 + + vld1.8 {q8}, [r6] + vmov q9, q8 + vmov q10, q8 + vmov q11, q8 + vmov q12, q8 + vmov q13, q8 + vmov q14, q8 + vmov q15, q8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + vld1.8 {q15}, [r1]! + W(nop) + +1: __adr ip, 2f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q6, q6, q10 + vst1.8 {q6}, [r0]! + veor q4, q4, q11 + vst1.8 {q4}, [r0]! + veor q2, q2, q12 + vst1.8 {q2}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q3, q3, q14 + vst1.8 {q3}, [r0]! + veor q5, q5, q15 + vld1.8 {q8}, [r1]! // load next round's iv +2: vst1.8 {q5}, [r0]! + + subs r5, r5, #8 + vst1.8 {q8}, [r6] // store next round's iv + bgt 99b + + pop {r4-r6, pc} +ENDPROC(aesbs_cbc_decrypt) + + .macro next_ctr, q + vmov.32 \q\()h[1], r10 + adds r10, r10, #1 + vmov.32 \q\()h[0], r9 + adcs r9, r9, #0 + vmov.32 \q\()l[1], r8 + adcs r8, r8, #0 + vmov.32 \q\()l[0], r7 + adc r7, r7, #0 + vrev32.8 \q, \q + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 ctr[], bool final) + */ +ENTRY(aesbs_ctr_encrypt) + mov ip, sp + push {r4-r10, lr} + + ldm ip, {r5-r7} // load args 4-6 + add r5, r5, r7 // one extra block if final == 1 + + vld1.8 {q0}, [r6] // load counter + vrev32.8 q1, q0 + vmov r9, r10, d3 + vmov r7, r8, d2 + + adds r10, r10, #1 + adcs r9, r9, #0 + adcs r8, r8, #0 + adc r7, r7, #0 + +99: vmov q1, q0 + vmov q2, q0 + vmov q3, q0 + vmov q4, q0 + vmov q5, q0 + vmov q6, q0 + vmov q7, q0 + + __adr ip, 0f + sub lr, r5, #1 + and lr, lr, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #5 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + next_ctr q1 + next_ctr q2 + next_ctr q3 + next_ctr q4 + next_ctr q5 + next_ctr q6 + next_ctr q7 + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_encrypt8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + movgt r4, #0 + ldrle r4, [sp, #40] // load final in the last round + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r1]! + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + teq r4, #0 // skip last block if 'final' +1: bne 2f + vld1.8 {q15}, [r1]! + +2: __adr ip, 3f + cmp r5, #8 + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q4, q4, q10 + vst1.8 {q4}, [r0]! + veor q6, q6, q11 + vst1.8 {q6}, [r0]! + veor q3, q3, q12 + vst1.8 {q3}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q2, q2, q14 + vst1.8 {q2}, [r0]! + teq r4, #0 // skip last block if 'final' + W(bne) 4f +3: veor q5, q5, q15 + vst1.8 {q5}, [r0]! + + next_ctr q0 + + subs r5, r5, #8 + bgt 99b + + vmov q5, q0 + +4: vst1.8 {q5}, [r6] + pop {r4-r10, pc} +ENDPROC(aesbs_ctr_encrypt) + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 4 +.Lxts_mul_x: + .quad 1, 0x87 + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_prepare8: + vld1.8 {q14}, [r7] // load iv + __ldr q15, .Lxts_mul_x // load tweak mask + vmov q12, q14 + + __adr ip, 0f + and r4, r6, #7 + cmp r6, #8 + sub ip, ip, r4, lsl #5 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + next_tweak q12, q14, q15, q13 + veor q0, q0, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q1}, [r1]! + next_tweak q14, q12, q15, q13 + veor q1, q1, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q2}, [r1]! + next_tweak q12, q14, q15, q13 + veor q2, q2, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q3}, [r1]! + next_tweak q14, q12, q15, q13 + veor q3, q3, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q4}, [r1]! + next_tweak q12, q14, q15, q13 + veor q4, q4, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q5}, [r1]! + next_tweak q14, q12, q15, q13 + veor q5, q5, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q6}, [r1]! + next_tweak q12, q14, q15, q13 + veor q6, q6, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q7}, [r1]! + next_tweak q14, q12, q15, q13 + veor q7, q7, q12 + vst1.8 {q12}, [r4, :128] + +0: vst1.8 {q14}, [r7] // store next iv + bx lr +ENDPROC(__xts_prepare8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r8, lr} + mov r5, sp // preserve sp + ldrd r6, r7, [sp, #24] // get blocks and iv args + sub ip, sp, #128 // make room for 8x tweak + bic ip, ip, #0xf // align sp to 16 bytes + mov sp, ip + +99: bl __xts_prepare8 + + mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 0f + and lr, r6, #7 + cmp r6, #8 + sub ip, ip, lr, lsl #2 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r4, :128]! + vld1.8 {q9}, [r4, :128]! + vld1.8 {q10}, [r4, :128]! + vld1.8 {q11}, [r4, :128]! + vld1.8 {q12}, [r4, :128]! + vld1.8 {q13}, [r4, :128]! + vld1.8 {q14}, [r4, :128]! + vld1.8 {q15}, [r4, :128] + +0: __adr ip, 1f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor \o0, \o0, q8 + vst1.8 {\o0}, [r0]! + veor \o1, \o1, q9 + vst1.8 {\o1}, [r0]! + veor \o2, \o2, q10 + vst1.8 {\o2}, [r0]! + veor \o3, \o3, q11 + vst1.8 {\o3}, [r0]! + veor \o4, \o4, q12 + vst1.8 {\o4}, [r0]! + veor \o5, \o5, q13 + vst1.8 {\o5}, [r0]! + veor \o6, \o6, q14 + vst1.8 {\o6}, [r0]! + veor \o7, \o7, q15 + vst1.8 {\o7}, [r0]! + +1: subs r6, r6, #8 + bgt 99b + + mov sp, r5 + pop {r4-r8, pc} + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_xts_decrypt) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..e262f99a44d3 --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -0,0 +1,405 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/cbc.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/xts.h> +#include <linux/module.h> + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], bool final); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[], + u8 out[]); + +struct aesbs_ctx { + int rounds; + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE); +}; + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + __aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + bool final = (walk.total % AES_BLOCK_SIZE) != 0; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = false; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + __aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c deleted file mode 100644 index 0409b8f89782..000000000000 --- a/arch/arm/crypto/aes_glue.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Glue Code for the asm optimized version of the AES Cipher Algorithm - */ - -#include <linux/module.h> -#include <linux/crypto.h> -#include <crypto/aes.h> - -#include "aes_glue.h" - -EXPORT_SYMBOL(AES_encrypt); -EXPORT_SYMBOL(AES_decrypt); -EXPORT_SYMBOL(private_AES_set_encrypt_key); -EXPORT_SYMBOL(private_AES_set_decrypt_key); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_encrypt(src, dst, &ctx->enc_key); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_decrypt(src, dst, &ctx->dec_key); -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - - switch (key_len) { - case AES_KEYSIZE_128: - key_len = 128; - break; - case AES_KEYSIZE_192: - key_len = 192; - break; - case AES_KEYSIZE_256: - key_len = 256; - break; - default: - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* private_AES_set_decrypt_key expects an encryption key as input */ - ctx->dec_key = ctx->enc_key; - if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - return 0; -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct AES_CTX), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("aes"); -MODULE_ALIAS_CRYPTO("aes-asm"); -MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h deleted file mode 100644 index cca3e51eb606..000000000000 --- a/arch/arm/crypto/aes_glue.h +++ /dev/null @@ -1,19 +0,0 @@ - -#define AES_MAXNR 14 - -struct AES_KEY { - unsigned int rd_key[4 * (AES_MAXNR + 1)]; - int rounds; -}; - -struct AES_CTX { - struct AES_KEY enc_key; - struct AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped deleted file mode 100644 index 1d1800f71c5b..000000000000 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ /dev/null @@ -1,2548 +0,0 @@ - -@ ==================================================================== -@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ -@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel -@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is -@ granted. -@ ==================================================================== - -@ Bit-sliced AES for ARM NEON -@ -@ February 2012. -@ -@ This implementation is direct adaptation of bsaes-x86_64 module for -@ ARM NEON. Except that this module is endian-neutral [in sense that -@ it can be compiled for either endianness] by courtesy of vld1.8's -@ neutrality. Initial version doesn't implement interface to OpenSSL, -@ only low-level primitives and unsupported entry points, just enough -@ to collect performance results, which for Cortex-A8 core are: -@ -@ encrypt 19.5 cycles per byte processed with 128-bit key -@ decrypt 22.1 cycles per byte processed with 128-bit key -@ key conv. 440 cycles per 128-bit key/0.18 of 8x block -@ -@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -@ which is [much] worse than anticipated (for further details see -@ http://www.openssl.org/~appro/Snapdragon-S4.html). -@ -@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -@ manages in 20.0 cycles]. -@ -@ When comparing to x86_64 results keep in mind that NEON unit is -@ [mostly] single-issue and thus can't [fully] benefit from -@ instruction-level parallelism. And when comparing to aes-armv4 -@ results keep in mind key schedule conversion overhead (see -@ bsaes-x86_64.pl for further details)... -@ -@ <appro@openssl.org> - -@ April-August 2013 -@ -@ Add CBC, CTR and XTS subroutines, adapt for kernel use. -@ -@ <ard.biesheuvel@linaro.org> - -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr r6,_bsaes_decrypt8 - vldmia r4!, {q9} @ round 0 key - add r6,r6,#.LM0ISR-_bsaes_decrypt8 - - vldmia r6!, {q8} @ .LM0ISR - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Ldec_sbox: - veor q1, q1, q4 - veor q3, q3, q4 - - veor q4, q4, q7 - veor q1, q1, q6 - veor q2, q2, q7 - veor q6, q6, q4 - - veor q0, q0, q1 - veor q2, q2, q5 - veor q7, q7, q6 - veor q3, q3, q0 - veor q5, q5, q0 - veor q1, q1, q3 - veor q11, q3, q0 - veor q10, q7, q4 - veor q9, q1, q6 - veor q13, q4, q0 - vmov q8, q10 - veor q12, q5, q2 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q6, q2 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q3, q7 - veor q12, q1, q5 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q4, q6 - veor q9, q9, q14 - vand q13, q0, q2 - vand q14, q7, q1 - vorr q15, q3, q5 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q5, q2 - veor q8, q1, q6 - veor q10, q15, q14 - vand q10, q10, q5 - veor q5, q5, q1 - vand q11, q1, q15 - vand q5, q5, q14 - veor q1, q11, q10 - veor q5, q5, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q2 - veor q12, q12, q8 - veor q2, q2, q6 - vand q8, q8, q15 - vand q6, q6, q13 - vand q12, q12, q14 - vand q2, q2, q9 - veor q8, q8, q12 - veor q2, q2, q6 - veor q12, q12, q11 - veor q6, q6, q10 - veor q5, q5, q12 - veor q2, q2, q12 - veor q1, q1, q8 - veor q6, q6, q8 - - veor q12, q3, q0 - veor q8, q7, q4 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q4 - vand q8, q8, q15 - vand q4, q4, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q4 - veor q12, q12, q11 - veor q4, q4, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q3 - veor q3, q3, q7 - vand q11, q7, q15 - vand q3, q3, q14 - veor q7, q11, q10 - veor q3, q3, q11 - veor q3, q3, q12 - veor q0, q0, q12 - veor q7, q7, q8 - veor q4, q4, q8 - veor q1, q1, q7 - veor q6, q6, q5 - - veor q4, q4, q1 - veor q2, q2, q7 - veor q5, q5, q7 - veor q4, q4, q2 - veor q7, q7, q0 - veor q4, q4, q5 - veor q3, q3, q6 - veor q6, q6, q1 - veor q3, q3, q4 - - veor q4, q4, q0 - veor q7, q7, q3 - subs r5,r5,#1 - bcc .Ldec_done - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 q8, q0, q0, #8 - vext.8 q14, q3, q3, #8 - vext.8 q15, q5, q5, #8 - veor q8, q8, q0 - vext.8 q9, q1, q1, #8 - veor q14, q14, q3 - vext.8 q10, q6, q6, #8 - veor q15, q15, q5 - vext.8 q11, q4, q4, #8 - veor q9, q9, q1 - vext.8 q12, q2, q2, #8 - veor q10, q10, q6 - vext.8 q13, q7, q7, #8 - veor q11, q11, q4 - veor q12, q12, q2 - veor q13, q13, q7 - - veor q0, q0, q14 - veor q1, q1, q14 - veor q6, q6, q8 - veor q2, q2, q10 - veor q4, q4, q9 - veor q1, q1, q15 - veor q6, q6, q15 - veor q2, q2, q14 - veor q7, q7, q11 - veor q4, q4, q14 - veor q3, q3, q12 - veor q2, q2, q15 - veor q7, q7, q15 - veor q5, q5, q13 - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q6, q6, #12 - veor q1, q1, q9 - vext.8 q11, q4, q4, #12 - veor q6, q6, q10 - vext.8 q12, q2, q2, #12 - veor q4, q4, q11 - vext.8 q13, q7, q7, #12 - veor q2, q2, q12 - vext.8 q14, q3, q3, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q3, q3, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q2 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q2, q2, #8 - veor q12, q12, q4 - vext.8 q9, q7, q7, #8 - veor q15, q15, q3 - vext.8 q2, q4, q4, #8 - veor q11, q11, q6 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q4, q3, q3, #8 - veor q11, q11, q5 - vext.8 q3, q6, q6, #8 - veor q5, q9, q13 - veor q11, q11, q2 - veor q7, q7, q15 - veor q6, q4, q14 - veor q4, q8, q12 - veor q2, q3, q10 - vmov q3, q11 - @ vmov q5, q9 - vldmia r6, {q12} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq r6,r6,#0x10 - bne .Ldec_loop - vldmia r6, {q12} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q3, #1 - vshr.u64 q11, q2, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q4 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q4, q4, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q2, #2 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q3, q3, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q4 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q4, q4, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q4, #4 - vshr.u64 q11, q6, #4 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q4, q4, q10 - veor q6, q6, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q6, q6, q8 - veor q4, q4, q8 - veor q2, q2, q8 - veor q7, q7, q8 - veor q3, q3, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>" -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr r6,_bsaes_encrypt8 - vldmia r4!, {q9} @ round 0 key - sub r6,r6,#_bsaes_encrypt8-.LM0SR - - vldmia r6!, {q8} @ .LM0SR -_bsaes_encrypt8_alt: - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 -_bsaes_encrypt8_bitslice: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Lenc_sbox: - veor q2, q2, q1 - veor q5, q5, q6 - veor q3, q3, q0 - veor q6, q6, q2 - veor q5, q5, q0 - - veor q6, q6, q3 - veor q3, q3, q7 - veor q7, q7, q5 - veor q3, q3, q4 - veor q4, q4, q5 - - veor q2, q2, q7 - veor q3, q3, q1 - veor q1, q1, q5 - veor q11, q7, q4 - veor q10, q1, q2 - veor q9, q5, q3 - veor q13, q2, q4 - vmov q8, q10 - veor q12, q6, q0 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q3, q0 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q7, q1 - veor q12, q5, q6 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q2, q3 - veor q9, q9, q14 - vand q13, q4, q0 - vand q14, q1, q5 - vorr q15, q7, q6 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q6, q0 - veor q8, q5, q3 - veor q10, q15, q14 - vand q10, q10, q6 - veor q6, q6, q5 - vand q11, q5, q15 - vand q6, q6, q14 - veor q5, q11, q10 - veor q6, q6, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q3 - vand q8, q8, q15 - vand q3, q3, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q3 - veor q12, q12, q11 - veor q3, q3, q10 - veor q6, q6, q12 - veor q0, q0, q12 - veor q5, q5, q8 - veor q3, q3, q8 - - veor q12, q7, q4 - veor q8, q1, q2 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q4 - veor q12, q12, q8 - veor q4, q4, q2 - vand q8, q8, q15 - vand q2, q2, q13 - vand q12, q12, q14 - vand q4, q4, q9 - veor q8, q8, q12 - veor q4, q4, q2 - veor q12, q12, q11 - veor q2, q2, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q7 - veor q7, q7, q1 - vand q11, q1, q15 - vand q7, q7, q14 - veor q1, q11, q10 - veor q7, q7, q11 - veor q7, q7, q12 - veor q4, q4, q12 - veor q1, q1, q8 - veor q2, q2, q8 - veor q7, q7, q0 - veor q1, q1, q6 - veor q6, q6, q0 - veor q4, q4, q7 - veor q0, q0, q1 - - veor q1, q1, q5 - veor q5, q5, q2 - veor q2, q2, q3 - veor q3, q3, q5 - veor q4, q4, q5 - - veor q6, q6, q3 - subs r5,r5,#1 - bcc .Lenc_done - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q4, q4, #12 - veor q1, q1, q9 - vext.8 q11, q6, q6, #12 - veor q4, q4, q10 - vext.8 q12, q3, q3, #12 - veor q6, q6, q11 - vext.8 q13, q7, q7, #12 - veor q3, q3, q12 - vext.8 q14, q2, q2, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q2, q2, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q3 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q3, q3, #8 - veor q12, q12, q6 - vext.8 q9, q7, q7, #8 - veor q15, q15, q2 - vext.8 q3, q6, q6, #8 - veor q11, q11, q4 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q6, q2, q2, #8 - veor q11, q11, q5 - vext.8 q2, q4, q4, #8 - veor q5, q9, q13 - veor q4, q8, q12 - veor q3, q3, q11 - veor q7, q7, q15 - veor q6, q6, q14 - @ vmov q4, q8 - veor q2, q2, q10 - @ vmov q5, q9 - vldmia r6, {q12} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq r6,r6,#0x10 - bne .Lenc_loop - vldmia r6, {q12} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q3, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q3, q3, q11 - vshr.u64 q10, q4, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q6 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q6, q6, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q4, q4, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q3, #2 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q3, q3, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q6 - veor q11, q11, q4 - vand q10, q10, q9 - vand q11, q11, q9 - veor q6, q6, q10 - vshl.u64 q10, q10, #2 - veor q4, q4, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q6, #4 - vshr.u64 q11, q4, #4 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q4, q4, q8 - veor q6, q6, q8 - veor q3, q3, q8 - veor q7, q7, q8 - veor q2, q2, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr r6,_bsaes_key_convert - vld1.8 {q7}, [r4]! @ load round 0 key - sub r6,r6,#_bsaes_key_convert-.LM0 - vld1.8 {q15}, [r4]! @ load round 1 key - - vmov.i8 q8, #0x01 @ bit masks - vmov.i8 q9, #0x02 - vmov.i8 q10, #0x04 - vmov.i8 q11, #0x08 - vmov.i8 q12, #0x10 - vmov.i8 q13, #0x20 - vldmia r6, {q14} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 q7, q7 - vrev32.8 q15, q15 -#endif - sub r5,r5,#1 - vstmia r12!, {q7} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 d14,{q15},d28 - vtbl.8 d15,{q15},d29 - vmov.i8 q6, #0x40 - vmov.i8 q15, #0x80 - - vtst.8 q0, q7, q8 - vtst.8 q1, q7, q9 - vtst.8 q2, q7, q10 - vtst.8 q3, q7, q11 - vtst.8 q4, q7, q12 - vtst.8 q5, q7, q13 - vtst.8 q6, q7, q6 - vtst.8 q7, q7, q15 - vld1.8 {q15}, [r4]! @ load next round key - vmvn q0, q0 @ "pnot" - vmvn q1, q1 - vmvn q5, q5 - vmvn q6, q6 -#ifdef __ARMEL__ - vrev32.8 q15, q15 -#endif - subs r5,r5,#1 - vstmia r12!,{q0-q7} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 q7,#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp r2, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ IV is 1st arg on the stack - mov r2, r2, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - vldmia sp, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia sp, {q7} -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r3, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: -#endif - - vld1.8 {q15}, [r8] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs r2, r2, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {q0-q1}, [r0]! @ load input - vld1.8 {q2-q3}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - vld1.8 {q4-q5}, [r0]! - mov r5, r10 - vld1.8 {q6-q7}, [r0] - sub r0, r0, #0x60 - vstmia r9, {q15} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q14-q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - veor q5, q5, q14 - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - vst1.8 {q5}, [r1]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds r2, r2, #8 - beq .Lcbc_dec_done - - vld1.8 {q0}, [r0]! @ load input - cmp r2, #2 - blo .Lcbc_dec_one - vld1.8 {q1}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - mov r5, r10 - vstmia r9, {q15} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {q2}, [r0]! - cmp r2, #4 - blo .Lcbc_dec_three - vld1.8 {q3}, [r0]! - beq .Lcbc_dec_four - vld1.8 {q4}, [r0]! - cmp r2, #6 - blo .Lcbc_dec_five - vld1.8 {q5}, [r0]! - beq .Lcbc_dec_six - vld1.8 {q6}, [r0]! - sub r0, r0, #0x70 - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub r0, r0, #0x60 - bl _bsaes_decrypt8 - vldmia r9,{q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub r0, r0, #0x50 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - veor q2, q2, q11 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub r0, r0, #0x40 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub r0, r0, #0x30 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub r0, r0, #0x20 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! @ reload input - veor q1, q1, q8 - vst1.8 {q0-q1}, [r1]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub r0, r0, #0x10 - mov r10, r1 @ save original out pointer - mov r1, r9 @ use the iv scratch space as out buffer - mov r2, r3 - vmov q4,q15 @ just in case ensure that IV - vmov q5,q0 @ and input are preserved - bl AES_decrypt - vld1.8 {q0}, [r9,:64] @ load result - veor q0, q0, q4 @ ^= IV - vmov q15, q5 @ q5 holds input - vst1.8 {q0}, [r10] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lcbc_dec_bzero -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - vst1.8 {q15}, [r8] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp r2, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - - vld1.8 {q0}, [r8] @ load counter - add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 - vldmia sp, {q4} @ load round0 key -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - -.align 2 -0: add r12, r3, #248 - vld1.8 {q0}, [r8] @ load counter - adrl r8, .LREVM0SR @ borrow r8 - vldmia r12, {q4} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 q8,#1 @ compose 1<<96 - veor q9,q9,q9 - vrev32.8 q0,q0 - vext.8 q8,q9,q8,#4 - vrev32.8 q4,q4 - vadd.u32 q9,q8,q8 @ compose 2<<96 - vstmia sp, {q4} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 q10, q8, q9 @ compose 3<<96 - vadd.u32 q1, q0, q8 @ +1 - vadd.u32 q2, q0, q9 @ +2 - vadd.u32 q3, q0, q10 @ +3 - vadd.u32 q4, q1, q10 - vadd.u32 q5, q2, q10 - vadd.u32 q6, q3, q10 - vadd.u32 q7, q4, q10 - vadd.u32 q10, q5, q10 @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia sp, {q9} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x10 @ pass next round key -#else - add r4, r3, #264 -#endif - vldmia r8, {q8} @ .LREVM0SR - mov r5, r10 @ pass rounds - vstmia r9, {q10} @ save next counter - sub r6, r8, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs r2, r2, #8 - blo .Lctr_enc_loop_done - - vld1.8 {q8-q9}, [r0]! @ load input - vld1.8 {q10-q11}, [r0]! - veor q0, q8 - veor q1, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q10 - veor q6, q11 - vld1.8 {q14-q15}, [r0]! - veor q3, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q7, q13 - veor q2, q14 - vst1.8 {q4}, [r1]! - veor q5, q15 - vst1.8 {q6}, [r1]! - vmov.i32 q8, #1 @ compose 1<<96 - vst1.8 {q3}, [r1]! - veor q9, q9, q9 - vst1.8 {q7}, [r1]! - vext.8 q8, q9, q8, #4 - vst1.8 {q2}, [r1]! - vadd.u32 q9,q8,q8 @ compose 2<<96 - vst1.8 {q5}, [r1]! - vldmia r9, {q0} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add r2, r2, #8 - vld1.8 {q8}, [r0]! @ load input - veor q0, q8 - vst1.8 {q0}, [r1]! @ write output - cmp r2, #2 - blo .Lctr_enc_done - vld1.8 {q9}, [r0]! - veor q1, q9 - vst1.8 {q1}, [r1]! - beq .Lctr_enc_done - vld1.8 {q10}, [r0]! - veor q4, q10 - vst1.8 {q4}, [r1]! - cmp r2, #4 - blo .Lctr_enc_done - vld1.8 {q11}, [r0]! - veor q6, q11 - vst1.8 {q6}, [r1]! - beq .Lctr_enc_done - vld1.8 {q12}, [r0]! - veor q3, q12 - vst1.8 {q3}, [r1]! - cmp r2, #6 - blo .Lctr_enc_done - vld1.8 {q13}, [r0]! - veor q7, q13 - vst1.8 {q7}, [r1]! - beq .Lctr_enc_done - vld1.8 {q14}, [r0] - veor q2, q14 - vst1.8 {q2}, [r1]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lctr_enc_bzero -#else - vstmia sp, {q0-q1} -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, r0 @ copy arguments - mov r5, r1 - mov r6, r2 - mov r7, r3 - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {q1}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {q1}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {q0}, [r4]! @ load input - vld1.8 {q1}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor q0,q0,q1 - vst1.8 {q0}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} @ save last round key -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - - subs r9, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds r9, #0x70 - bmi .Lxts_enc_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - - vmov q8, q9 @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_enc_ret - sub r6, r8, #0x10 - -.Lxts_enc_steal: - ldrb r0, [r7], #1 - ldrb r1, [r8, #-0x10] - strb r0, [r8, #-0x10] - strb r1, [r8], #1 - - subs r9, #1 - bhi .Lxts_enc_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_enc_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r10, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst r9, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne r9, #0x10 @ subtract another 16 bytes -#endif - subs r9, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds r9, #0x70 - bmi .Lxts_dec_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - mov r5, r2 @ preserve magic - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - mov r2, r5 - - vmov q8, q9 @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia r2, {q5} - vshr.s64 q6, q8, #63 - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vswp d13,d12 - veor q9, q9, q6 - - @ perform the final decryption with the last tweak value - vld1.8 {q0}, [r7]! - mov r0, sp - veor q0, q0, q9 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q9 - vst1.8 {q0}, [r8] - - mov r6, r8 -.Lxts_dec_steal: - ldrb r1, [r8] - ldrb r0, [r7], #1 - strb r1, [r8, #0x10] - strb r0, [r8], #1 - - subs r9, #1 - bhi .Lxts_dec_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_dec_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c deleted file mode 100644 index d8e06de72ef3..000000000000 --- a/arch/arm/crypto/aesbs-glue.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES - * - * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <asm/neon.h> -#include <crypto/aes.h> -#include <crypto/cbc.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <linux/module.h> -#include <crypto/xts.h> - -#include "aes_glue.h" - -#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) - -struct BS_KEY { - struct AES_KEY rk; - int converted; - u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; -} __aligned(8); - -asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); -asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); - -asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 iv[]); - -asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, - struct BS_KEY *key, u8 const iv[]); - -asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -struct aesbs_cbc_ctx { - struct AES_KEY enc; - struct BS_KEY dec; -}; - -struct aesbs_ctr_ctx { - struct BS_KEY enc; -}; - -struct aesbs_xts_ctx { - struct BS_KEY enc; - struct BS_KEY dec; - struct AES_KEY twkey; -}; - -static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - ctx->dec.converted = 0; - return 0; -} - -static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->enc.converted = 0; - return 0; -} - -static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 4; - int err; - - err = xts_verify_key(tfm, in_key, key_len); - if (err) - return err; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc.rk; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); - ctx->enc.converted = ctx->dec.converted = 0; - return 0; -} - -static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_encrypt(src, dst, &ctx->enc); -} - -static int aesbs_cbc_encrypt(struct skcipher_request *req) -{ - return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one); -} - -static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_decrypt(src, dst, &ctx->dec.rk); -} - -static int aesbs_cbc_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - for (err = skcipher_walk_virt(&walk, req, false); - (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) { - u32 blocks = nbytes / AES_BLOCK_SIZE; - u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; - u8 *iv = walk.iv; - - if (blocks >= 8) { - kernel_neon_begin(); - bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv); - kernel_neon_end(); - nbytes %= AES_BLOCK_SIZE; - continue; - } - - nbytes = crypto_cbc_decrypt_blocks(&walk, tfm, - aesbs_decrypt_one); - } - return err; -} - -static void inc_be128_ctr(__be32 ctr[], u32 addend) -{ - int i; - - for (i = 3; i >= 0; i--, addend = 1) { - u32 n = be32_to_cpu(ctr[i]) + addend; - - ctr[i] = cpu_to_be32(n); - if (n >= addend) - break; - } -} - -static int aesbs_ctr_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 blocks; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - __be32 *ctr = (__be32 *)walk.iv; - u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); - - /* avoid 32 bit counter overflow in the NEON code */ - if (unlikely(headroom < blocks)) { - blocks = headroom + 1; - tail = walk.nbytes - blocks * AES_BLOCK_SIZE; - } - kernel_neon_begin(); - bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, - walk.dst.virt.addr, blocks, - &ctx->enc, walk.iv); - kernel_neon_end(); - inc_be128_ctr(ctr, blocks); - - err = skcipher_walk_done(&walk, tail); - } - if (walk.nbytes) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - u8 ks[AES_BLOCK_SIZE]; - - AES_encrypt(walk.iv, ks, &ctx->enc.rk); - if (tdst != tsrc) - memcpy(tdst, tsrc, walk.nbytes); - crypto_xor(tdst, ks, walk.nbytes); - err = skcipher_walk_done(&walk, 0); - } - return err; -} - -static int aesbs_xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->enc, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static int aesbs_xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->dec, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static struct skcipher_alg aesbs_algs[] = { { - .base = { - .cra_name = "__cbc(aes)", - .cra_driver_name = "__cbc-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_set_key, - .encrypt = aesbs_cbc_encrypt, - .decrypt = aesbs_cbc_decrypt, -}, { - .base = { - .cra_name = "__ctr(aes)", - .cra_driver_name = "__ctr-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_set_key, - .encrypt = aesbs_ctr_encrypt, - .decrypt = aesbs_ctr_encrypt, -}, { - .base = { - .cra_name = "__xts(aes)", - .cra_driver_name = "__xts-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_xts_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_xts_set_key, - .encrypt = aesbs_xts_encrypt, - .decrypt = aesbs_xts_decrypt, -} }; - -struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)]; - -static void aesbs_mod_exit(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++) - simd_skcipher_free(aesbs_simd_algs[i]); - - crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); -} - -static int __init aesbs_mod_init(void) -{ - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - - if (!cpu_has_neon()) - return -ENODEV; - - err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) { - algname = aesbs_algs[i].base.cra_name + 2; - drvname = aesbs_algs[i].base.cra_driver_name + 2; - basename = aesbs_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesbs_simd_algs[i] = simd; - } - - return 0; - -unregister_simds: - aesbs_mod_exit(); - return err; -} - -module_init(aesbs_mod_init); -module_exit(aesbs_mod_exit); - -MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl deleted file mode 100644 index a4d3856e7d24..000000000000 --- a/arch/arm/crypto/bsaes-armv7.pl +++ /dev/null @@ -1,2471 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# -# Specific modes and adaptation for Linux kernel by Ard Biesheuvel -# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is -# granted. -# ==================================================================== - -# Bit-sliced AES for ARM NEON -# -# February 2012. -# -# This implementation is direct adaptation of bsaes-x86_64 module for -# ARM NEON. Except that this module is endian-neutral [in sense that -# it can be compiled for either endianness] by courtesy of vld1.8's -# neutrality. Initial version doesn't implement interface to OpenSSL, -# only low-level primitives and unsupported entry points, just enough -# to collect performance results, which for Cortex-A8 core are: -# -# encrypt 19.5 cycles per byte processed with 128-bit key -# decrypt 22.1 cycles per byte processed with 128-bit key -# key conv. 440 cycles per 128-bit key/0.18 of 8x block -# -# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -# which is [much] worse than anticipated (for further details see -# http://www.openssl.org/~appro/Snapdragon-S4.html). -# -# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -# manages in 20.0 cycles]. -# -# When comparing to x86_64 results keep in mind that NEON unit is -# [mostly] single-issue and thus can't [fully] benefit from -# instruction-level parallelism. And when comparing to aes-armv4 -# results keep in mind key schedule conversion overhead (see -# bsaes-x86_64.pl for further details)... -# -# <appro@openssl.org> - -# April-August 2013 -# -# Add CBC, CTR and XTS subroutines, adapt for kernel use. -# -# <ard.biesheuvel@linaro.org> - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); -my @XMM=map("q$_",(0..15)); - -{ -my ($key,$rounds,$const)=("r4","r5","r6"); - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - -sub Sbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InBasisChange (@b); - &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); - &OutBasisChange (@b[7,1,4,2,6,5,0,3]); -} - -sub InBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[2], @b[2], @b[1] - veor @b[5], @b[5], @b[6] - veor @b[3], @b[3], @b[0] - veor @b[6], @b[6], @b[2] - veor @b[5], @b[5], @b[0] - - veor @b[6], @b[6], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[4], @b[4], @b[5] - - veor @b[2], @b[2], @b[7] - veor @b[3], @b[3], @b[1] - veor @b[1], @b[1], @b[5] -___ -} - -sub OutBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] - veor @b[4], @b[4], @b[6] - veor @b[2], @b[2], @b[0] - veor @b[6], @b[6], @b[1] - - veor @b[1], @b[1], @b[5] - veor @b[5], @b[5], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[2], @b[2], @b[5] - - veor @b[4], @b[4], @b[7] -___ -} - -sub InvSbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InvInBasisChange (@b); - &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); - &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); -} - -sub InvInBasisChange { # OutBasisChange in reverse (with twist) -my @b=@_[5,1,2,6,3,7,0,4]; -$code.=<<___ - veor @b[1], @b[1], @b[7] - veor @b[4], @b[4], @b[7] - - veor @b[7], @b[7], @b[5] - veor @b[1], @b[1], @b[3] - veor @b[2], @b[2], @b[5] - veor @b[3], @b[3], @b[7] - - veor @b[6], @b[6], @b[1] - veor @b[2], @b[2], @b[0] - veor @b[5], @b[5], @b[3] - veor @b[4], @b[4], @b[6] - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] -___ -} - -sub InvOutBasisChange { # InBasisChange in reverse -my @b=@_[2,5,7,3,6,1,0,4]; -$code.=<<___; - veor @b[1], @b[1], @b[5] - veor @b[2], @b[2], @b[7] - - veor @b[3], @b[3], @b[1] - veor @b[4], @b[4], @b[5] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[5], @b[5], @b[0] - veor @b[3], @b[3], @b[7] - veor @b[6], @b[6], @b[2] - veor @b[2], @b[2], @b[1] - veor @b[6], @b[6], @b[3] - - veor @b[3], @b[3], @b[0] - veor @b[5], @b[5], @b[6] -___ -} - -sub Mul_GF4 { -#;************************************************************* -#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * -#;************************************************************* -my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $t1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $t1, $t0 - veor $x0, $x0, $t1 -___ -} - -sub Mul_GF4_N { # not used, see next subroutine -# multiply and scale by N -my ($x0,$x1,$y0,$y1,$t0)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $x1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $x1, $x0 - veor $x0, $x0, $t0 -___ -} - -sub Mul_GF4_N_GF4 { -# interleaved Mul_GF4_N and Mul_GF4 -my ($x0,$x1,$y0,$y1,$t0, - $x2,$x3,$y2,$y3,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - veor $t1, $y2, $y3 - vand $t0, $t0, $x0 - vand $t1, $t1, $x2 - veor $x0, $x0, $x1 - veor $x2, $x2, $x3 - vand $x1, $x1, $y0 - vand $x3, $x3, $y2 - vand $x0, $x0, $y1 - vand $x2, $x2, $y3 - veor $x1, $x1, $x0 - veor $x2, $x2, $x3 - veor $x0, $x0, $t0 - veor $x3, $x3, $t1 -___ -} -sub Mul_GF16_2 { -my @x=@_[0..7]; -my @y=@_[8..11]; -my @t=@_[12..15]; -$code.=<<___; - veor @t[0], @x[0], @x[2] - veor @t[1], @x[1], @x[3] -___ - &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[2], @x[3], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @x[0], @x[0], @t[0] - veor @x[2], @x[2], @t[0] - veor @x[1], @x[1], @t[1] - veor @x[3], @x[3], @t[1] - - veor @t[0], @x[4], @x[6] - veor @t[1], @x[5], @x[7] -___ - &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[6], @x[7], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @x[4], @x[4], @t[0] - veor @x[6], @x[6], @t[0] - veor @x[5], @x[5], @t[1] - veor @x[7], @x[7], @t[1] -___ -} -sub Inv_GF256 { -#;******************************************************************** -#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * -#;******************************************************************** -my @x=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; -# direct optimizations from hardware -$code.=<<___; - veor @t[3], @x[4], @x[6] - veor @t[2], @x[5], @x[7] - veor @t[1], @x[1], @x[3] - veor @s[1], @x[7], @x[6] - vmov @t[0], @t[2] - veor @s[0], @x[0], @x[2] - - vorr @t[2], @t[2], @t[1] - veor @s[3], @t[3], @t[0] - vand @s[2], @t[3], @s[0] - vorr @t[3], @t[3], @s[0] - veor @s[0], @s[0], @t[1] - vand @t[0], @t[0], @t[1] - veor @t[1], @x[3], @x[2] - vand @s[3], @s[3], @s[0] - vand @s[1], @s[1], @t[1] - veor @t[1], @x[4], @x[5] - veor @s[0], @x[1], @x[0] - veor @t[3], @t[3], @s[1] - veor @t[2], @t[2], @s[1] - vand @s[1], @t[1], @s[0] - vorr @t[1], @t[1], @s[0] - veor @t[3], @t[3], @s[3] - veor @t[0], @t[0], @s[1] - veor @t[2], @t[2], @s[2] - veor @t[1], @t[1], @s[3] - veor @t[0], @t[0], @s[2] - vand @s[0], @x[7], @x[3] - veor @t[1], @t[1], @s[2] - vand @s[1], @x[6], @x[2] - vand @s[2], @x[5], @x[1] - vorr @s[3], @x[4], @x[0] - veor @t[3], @t[3], @s[0] - veor @t[1], @t[1], @s[2] - veor @t[0], @t[0], @s[3] - veor @t[2], @t[2], @s[1] - - @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - - @ new smaller inversion - - vand @s[2], @t[3], @t[1] - vmov @s[0], @t[0] - - veor @s[1], @t[2], @s[2] - veor @s[3], @t[0], @s[2] - veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] - - vbsl @s[1], @t[1], @t[0] - vbsl @s[3], @t[3], @t[2] - veor @t[3], @t[3], @t[2] - - vbsl @s[0], @s[1], @s[2] - vbsl @t[0], @s[2], @s[1] - - vand @s[2], @s[0], @s[3] - veor @t[1], @t[1], @t[0] - - veor @s[2], @s[2], @t[3] -___ -# output in s3, s2, s1, t1 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); - -### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb -} - -# AES linear components - -sub ShiftRows { -my @x=@_[0..7]; -my @t=@_[8..11]; -my $mask=pop; -$code.=<<___; - vldmia $key!, {@t[0]-@t[3]} - veor @t[0], @t[0], @x[0] - veor @t[1], @t[1], @x[1] - vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` - vldmia $key!, {@t[0]} - veor @t[2], @t[2], @x[2] - vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` - vldmia $key!, {@t[1]} - veor @t[3], @t[3], @x[3] - vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` - vldmia $key!, {@t[2]} - vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` - vldmia $key!, {@t[3]} - veor @t[0], @t[0], @x[4] - veor @t[1], @t[1], @x[5] - vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` - veor @t[2], @t[2], @x[6] - vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` - veor @t[3], @t[3], @x[7] - vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` - vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` -___ -} - -sub MixColumns { -# modified to emit output in order suitable for feeding back to aesenc[last] -my @x=@_[0..7]; -my @t=@_[8..15]; -my $inv=@_[16]; # optional -$code.=<<___; - vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) - vext.8 @t[2], @x[2], @x[2], #12 - veor @x[1], @x[1], @t[1] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[2], @x[2], @t[2] - vext.8 @t[4], @x[4], @x[4], #12 - veor @x[3], @x[3], @t[3] - vext.8 @t[5], @x[5], @x[5], #12 - veor @x[4], @x[4], @t[4] - vext.8 @t[6], @x[6], @x[6], #12 - veor @x[5], @x[5], @t[5] - vext.8 @t[7], @x[7], @x[7], #12 - veor @x[6], @x[6], @t[6] - - veor @t[1], @t[1], @x[0] - veor @x[7], @x[7], @t[7] - vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor @t[2], @t[2], @x[1] - veor @t[0], @t[0], @x[7] - veor @t[1], @t[1], @x[7] - vext.8 @x[1], @x[1], @x[1], #8 - veor @t[5], @t[5], @x[4] - veor @x[0], @x[0], @t[0] - veor @t[6], @t[6], @x[5] - veor @x[1], @x[1], @t[1] - vext.8 @t[0], @x[4], @x[4], #8 - veor @t[4], @t[4], @x[3] - vext.8 @t[1], @x[5], @x[5], #8 - veor @t[7], @t[7], @x[6] - vext.8 @x[4], @x[3], @x[3], #8 - veor @t[3], @t[3], @x[2] - vext.8 @x[5], @x[7], @x[7], #8 - veor @t[4], @t[4], @x[7] - vext.8 @x[3], @x[6], @x[6], #8 - veor @t[3], @t[3], @x[7] - vext.8 @x[6], @x[2], @x[2], #8 - veor @x[7], @t[1], @t[5] -___ -$code.=<<___ if (!$inv); - veor @x[2], @t[0], @t[4] - veor @x[4], @x[4], @t[3] - veor @x[5], @x[5], @t[7] - veor @x[3], @x[3], @t[6] - @ vmov @x[2], @t[0] - veor @x[6], @x[6], @t[2] - @ vmov @x[7], @t[1] -___ -$code.=<<___ if ($inv); - veor @t[3], @t[3], @x[4] - veor @x[5], @x[5], @t[7] - veor @x[2], @x[3], @t[6] - veor @x[3], @t[0], @t[4] - veor @x[4], @x[6], @t[2] - vmov @x[6], @t[3] - @ vmov @x[7], @t[1] -___ -} - -sub InvMixColumns_orig { -my @x=@_[0..7]; -my @t=@_[8..15]; - -$code.=<<___; - @ multiplication by 0x0e - vext.8 @t[7], @x[7], @x[7], #12 - vmov @t[2], @x[2] - veor @x[2], @x[2], @x[5] @ 2 5 - veor @x[7], @x[7], @x[5] @ 7 5 - vext.8 @t[0], @x[0], @x[0], #12 - vmov @t[5], @x[5] - veor @x[5], @x[5], @x[0] @ 5 0 [1] - veor @x[0], @x[0], @x[1] @ 0 1 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[1], @x[1], @x[2] @ 1 25 - veor @x[0], @x[0], @x[6] @ 01 6 [2] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[1], @x[1], @x[3] @ 125 3 [4] - veor @x[2], @x[2], @x[0] @ 25 016 [3] - veor @x[3], @x[3], @x[7] @ 3 75 - veor @x[7], @x[7], @x[6] @ 75 6 [0] - vext.8 @t[6], @x[6], @x[6], #12 - vmov @t[4], @x[4] - veor @x[6], @x[6], @x[4] @ 6 4 - veor @x[4], @x[4], @x[3] @ 4 375 [6] - veor @x[3], @x[3], @x[7] @ 375 756=36 - veor @x[6], @x[6], @t[5] @ 64 5 [7] - veor @x[3], @x[3], @t[2] @ 36 2 - vext.8 @t[5], @t[5], @t[5], #12 - veor @x[3], @x[3], @t[4] @ 362 4 [5] -___ - my @y = @x[7,5,0,2,1,3,4,6]; -$code.=<<___; - @ multiplication by 0x0b - veor @y[1], @y[1], @y[0] - veor @y[0], @y[0], @t[0] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[5] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[1], @y[1], @t[6] - veor @y[0], @y[0], @t[7] - veor @t[7], @t[7], @t[6] @ clobber t[7] - - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @y[0] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[2], @y[2], @t[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - veor @y[2], @y[2], @t[2] - veor @y[3], @y[3], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[7] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[3], @y[3], @t[3] - veor @y[6], @y[6], @t[3] - veor @y[4], @y[4], @t[3] - veor @y[7], @y[7], @t[4] - vext.8 @t[3], @t[3], @t[3], #12 - veor @y[5], @y[5], @t[4] - veor @y[7], @y[7], @t[7] - veor @t[7], @t[7], @t[5] @ clobber t[7] even more - veor @y[3], @y[3], @t[5] - veor @y[4], @y[4], @t[4] - - veor @y[5], @y[5], @t[7] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[6], @y[6], @t[7] - veor @y[4], @y[4], @t[7] - - veor @t[7], @t[7], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - - @ multiplication by 0x0d - veor @y[4], @y[4], @y[7] - veor @t[7], @t[7], @t[6] @ restore t[7] - veor @y[7], @y[7], @t[4] - vext.8 @t[6], @t[6], @t[6], #12 - veor @y[2], @y[2], @t[0] - veor @y[7], @y[7], @t[5] - vext.8 @t[7], @t[7], @t[7], #12 - veor @y[2], @y[2], @t[2] - - veor @y[3], @y[3], @y[1] - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[0] - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @t[5] - veor @y[0], @y[0], @t[5] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[1], @y[1], @t[7] - veor @y[0], @y[0], @t[6] - veor @y[3], @y[3], @y[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - - veor @y[7], @y[7], @t[7] - veor @y[4], @y[4], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[6] - veor @t[6], @t[6], @t[3] @ clobber t[6] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[4], @y[4], @y[7] - veor @y[3], @y[3], @t[6] - - veor @y[6], @y[6], @t[6] - veor @y[5], @y[5], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - veor @y[6], @y[6], @t[4] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[5], @y[5], @t[6] - veor @y[6], @y[6], @t[7] - vext.8 @t[7], @t[7], @t[7], #12 - veor @t[6], @t[6], @t[3] @ restore t[6] - vext.8 @t[3], @t[3], @t[3], #12 - - @ multiplication by 0x09 - veor @y[4], @y[4], @y[1] - veor @t[1], @t[1], @y[1] @ t[1]=y[1] - veor @t[0], @t[0], @t[5] @ clobber t[0] - vext.8 @t[6], @t[6], @t[6], #12 - veor @t[1], @t[1], @t[5] - veor @y[3], @y[3], @t[0] - veor @t[0], @t[0], @y[0] @ t[0]=y[0] - veor @t[1], @t[1], @t[6] - veor @t[6], @t[6], @t[7] @ clobber t[6] - veor @y[4], @y[4], @t[1] - veor @y[7], @y[7], @t[4] - veor @y[6], @y[6], @t[3] - veor @y[5], @y[5], @t[2] - veor @t[4], @t[4], @y[4] @ t[4]=y[4] - veor @t[3], @t[3], @y[3] @ t[3]=y[3] - veor @t[5], @t[5], @y[5] @ t[5]=y[5] - veor @t[2], @t[2], @y[2] @ t[2]=y[2] - veor @t[3], @t[3], @t[7] - veor @XMM[5], @t[5], @t[6] - veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] - veor @XMM[2], @t[2], @t[6] - veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] - - vmov @XMM[0], @t[0] - vmov @XMM[1], @t[1] - @ vmov @XMM[2], @t[2] - vmov @XMM[3], @t[3] - vmov @XMM[4], @t[4] - @ vmov @XMM[5], @t[5] - @ vmov @XMM[6], @t[6] - @ vmov @XMM[7], @t[7] -___ -} - -sub InvMixColumns { -my @x=@_[0..7]; -my @t=@_[8..15]; - -# Thanks to Jussi Kivilinna for providing pointer to -# -# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | -# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | -# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | -# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | - -$code.=<<___; - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 @t[0], @x[0], @x[0], #8 - vext.8 @t[6], @x[6], @x[6], #8 - vext.8 @t[7], @x[7], @x[7], #8 - veor @t[0], @t[0], @x[0] - vext.8 @t[1], @x[1], @x[1], #8 - veor @t[6], @t[6], @x[6] - vext.8 @t[2], @x[2], @x[2], #8 - veor @t[7], @t[7], @x[7] - vext.8 @t[3], @x[3], @x[3], #8 - veor @t[1], @t[1], @x[1] - vext.8 @t[4], @x[4], @x[4], #8 - veor @t[2], @t[2], @x[2] - vext.8 @t[5], @x[5], @x[5], #8 - veor @t[3], @t[3], @x[3] - veor @t[4], @t[4], @x[4] - veor @t[5], @t[5], @x[5] - - veor @x[0], @x[0], @t[6] - veor @x[1], @x[1], @t[6] - veor @x[2], @x[2], @t[0] - veor @x[4], @x[4], @t[2] - veor @x[3], @x[3], @t[1] - veor @x[1], @x[1], @t[7] - veor @x[2], @x[2], @t[7] - veor @x[4], @x[4], @t[6] - veor @x[5], @x[5], @t[3] - veor @x[3], @x[3], @t[6] - veor @x[6], @x[6], @t[4] - veor @x[4], @x[4], @t[7] - veor @x[5], @x[5], @t[7] - veor @x[7], @x[7], @t[5] -___ - &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 -} - -sub swapmove { -my ($a,$b,$n,$mask,$t)=@_; -$code.=<<___; - vshr.u64 $t, $b, #$n - veor $t, $t, $a - vand $t, $t, $mask - veor $a, $a, $t - vshl.u64 $t, $t, #$n - veor $b, $b, $t -___ -} -sub swapmove2x { -my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; -$code.=<<___; - vshr.u64 $t0, $b0, #$n - vshr.u64 $t1, $b1, #$n - veor $t0, $t0, $a0 - veor $t1, $t1, $a1 - vand $t0, $t0, $mask - vand $t1, $t1, $mask - veor $a0, $a0, $t0 - vshl.u64 $t0, $t0, #$n - veor $a1, $a1, $t1 - vshl.u64 $t1, $t1, #$n - veor $b0, $b0, $t0 - veor $b1, $b1, $t1 -___ -} - -sub bitslice { -my @x=reverse(@_[0..7]); -my ($t0,$t1,$t2,$t3)=@_[8..11]; -$code.=<<___; - vmov.i8 $t0,#0x55 @ compose .LBS0 - vmov.i8 $t1,#0x33 @ compose .LBS1 -___ - &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); - &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -$code.=<<___; - vmov.i8 $t0,#0x0f @ compose .LBS2 -___ - &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); - &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - - &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); - &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); -} - -$code.=<<___; -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr $const,_bsaes_decrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - add $const,$const,#.LM0ISR-_bsaes_decrypt8 - - vldmia $const!, {@XMM[8]} @ .LM0ISR - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Ldec_sbox:\n"; - &InvSbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Ldec_done -___ - &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq $const,$const,#0x10 - bne .Ldec_loop - vldmia $const, {@XMM[12]} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: -___ - &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr $const,_bsaes_encrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - sub $const,$const,#_bsaes_encrypt8-.LM0SR - - vldmia $const!, {@XMM[8]} @ .LM0SR -_bsaes_encrypt8_alt: - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -_bsaes_encrypt8_bitslice: -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Lenc_sbox:\n"; - &Sbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Lenc_done -___ - &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq $const,$const,#0x10 - bne .Lenc_loop - vldmia $const, {@XMM[12]} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: -___ - # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb - &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -___ -} -{ -my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); - -sub bitslice_key { -my @x=reverse(@_[0..7]); -my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; - - &swapmove (@x[0,1],1,$bs0,$t2,$t3); -$code.=<<___; - @ &swapmove(@x[2,3],1,$t0,$t2,$t3); - vmov @x[2], @x[0] - vmov @x[3], @x[1] -___ - #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); - - &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); -$code.=<<___; - @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - vmov @x[4], @x[0] - vmov @x[6], @x[2] - vmov @x[5], @x[1] - vmov @x[7], @x[3] -___ - &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); - &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); -} - -$code.=<<___; -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr $const,_bsaes_key_convert - vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key - sub $const,$const,#_bsaes_key_convert-.LM0 - vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key - - vmov.i8 @XMM[8], #0x01 @ bit masks - vmov.i8 @XMM[9], #0x02 - vmov.i8 @XMM[10], #0x04 - vmov.i8 @XMM[11], #0x08 - vmov.i8 @XMM[12], #0x10 - vmov.i8 @XMM[13], #0x20 - vldmia $const, {@XMM[14]} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 @XMM[7], @XMM[7] - vrev32.8 @XMM[15], @XMM[15] -#endif - sub $rounds,$rounds,#1 - vstmia $out!, {@XMM[7]} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` - vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` - vmov.i8 @XMM[6], #0x40 - vmov.i8 @XMM[15], #0x80 - - vtst.8 @XMM[0], @XMM[7], @XMM[8] - vtst.8 @XMM[1], @XMM[7], @XMM[9] - vtst.8 @XMM[2], @XMM[7], @XMM[10] - vtst.8 @XMM[3], @XMM[7], @XMM[11] - vtst.8 @XMM[4], @XMM[7], @XMM[12] - vtst.8 @XMM[5], @XMM[7], @XMM[13] - vtst.8 @XMM[6], @XMM[7], @XMM[6] - vtst.8 @XMM[7], @XMM[7], @XMM[15] - vld1.8 {@XMM[15]}, [$inp]! @ load next round key - vmvn @XMM[0], @XMM[0] @ "pnot" - vmvn @XMM[1], @XMM[1] - vmvn @XMM[5], @XMM[5] - vmvn @XMM[6], @XMM[6] -#ifdef __ARMEL__ - vrev32.8 @XMM[15], @XMM[15] -#endif - subs $rounds,$rounds,#1 - vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 @XMM[7],#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -___ -} - -if (0) { # following four functions are unsupported interface - # used for benchmarking... -$code.=<<___; -.globl bsaes_enc_key_convert -.type bsaes_enc_key_convert,%function -.align 4 -bsaes_enc_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_enc_key_convert,.-bsaes_enc_key_convert - -.globl bsaes_encrypt_128 -.type bsaes_encrypt_128,%function -.align 4 -bsaes_encrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Lenc128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_encrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Lenc128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_encrypt_128,.-bsaes_encrypt_128 - -.globl bsaes_dec_key_convert -.type bsaes_dec_key_convert,%function -.align 4 -bsaes_dec_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - vldmia $out, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $out, {@XMM[7]} - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_dec_key_convert,.-bsaes_dec_key_convert - -.globl bsaes_decrypt_128 -.type bsaes_decrypt_128,%function -.align 4 -bsaes_decrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Ldec128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_decrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Ldec128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_decrypt_128,.-bsaes_decrypt_128 -___ -} -{ -my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); -my ($keysched)=("sp"); - -$code.=<<___; -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp $len, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ivp, [ip] @ IV is 1st arg on the stack - mov $len, $len, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - vldmia $keysched, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $keysched, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: -#endif - - vld1.8 {@XMM[15]}, [$ivp] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs $len, $len, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5, $rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp] - sub $inp, $inp, #0x60 - vstmia $fp, {@XMM[15]} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - veor @XMM[5], @XMM[5], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[5]}, [$out]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds $len, $len, #8 - beq .Lcbc_dec_done - - vld1.8 {@XMM[0]}, [$inp]! @ load input - cmp $len, #2 - blo .Lcbc_dec_one - vld1.8 {@XMM[1]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - mov r5, $rounds - vstmia $fp, {@XMM[15]} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {@XMM[2]}, [$inp]! - cmp $len, #4 - blo .Lcbc_dec_three - vld1.8 {@XMM[3]}, [$inp]! - beq .Lcbc_dec_four - vld1.8 {@XMM[4]}, [$inp]! - cmp $len, #6 - blo .Lcbc_dec_five - vld1.8 {@XMM[5]}, [$inp]! - beq .Lcbc_dec_six - vld1.8 {@XMM[6]}, [$inp]! - sub $inp, $inp, #0x70 - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub $inp, $inp, #0x60 - bl _bsaes_decrypt8 - vldmia $fp,{@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub $inp, $inp, #0x50 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[2], @XMM[2], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub $inp, $inp, #0x40 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub $inp, $inp, #0x30 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub $inp, $inp, #0x20 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! @ reload input - veor @XMM[1], @XMM[1], @XMM[8] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub $inp, $inp, #0x10 - mov $rounds, $out @ save original out pointer - mov $out, $fp @ use the iv scratch space as out buffer - mov r2, $key - vmov @XMM[4],@XMM[15] @ just in case ensure that IV - vmov @XMM[5],@XMM[0] @ and input are preserved - bl AES_decrypt - vld1.8 {@XMM[0]}, [$fp,:64] @ load result - veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV - vmov @XMM[15], @XMM[5] @ @XMM[5] holds input - vst1.8 {@XMM[0]}, [$rounds] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lcbc_dec_bzero -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - vst1.8 {@XMM[15]}, [$ivp] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -___ -} -{ -my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); -my $const = "r6"; # shared with _bsaes_encrypt8_alt -my $keysched = "sp"; - -$code.=<<___; -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp $len, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ctr, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vld1.8 {@XMM[0]}, [$ctr] @ load counter - add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr - vldmia $keysched, {@XMM[4]} @ load round0 key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - -.align 2 -0: add r12, $key, #248 - vld1.8 {@XMM[0]}, [$ctr] @ load counter - adrl $ctr, .LREVM0SR @ borrow $ctr - vldmia r12, {@XMM[4]} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 @XMM[8],#1 @ compose 1<<96 - veor @XMM[9],@XMM[9],@XMM[9] - vrev32.8 @XMM[0],@XMM[0] - vext.8 @XMM[8],@XMM[9],@XMM[8],#4 - vrev32.8 @XMM[4],@XMM[4] - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vstmia $keysched, {@XMM[4]} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 - vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 - vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 - vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 - vadd.u32 @XMM[4], @XMM[1], @XMM[10] - vadd.u32 @XMM[5], @XMM[2], @XMM[10] - vadd.u32 @XMM[6], @XMM[3], @XMM[10] - vadd.u32 @XMM[7], @XMM[4], @XMM[10] - vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia $keysched, {@XMM[9]} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, $keysched, #0x10 @ pass next round key -#else - add r4, $key, #`248+16` -#endif - vldmia $ctr, {@XMM[8]} @ .LREVM0SR - mov r5, $rounds @ pass rounds - vstmia $fp, {@XMM[10]} @ save next counter - sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs $len, $len, #8 - blo .Lctr_enc_loop_done - - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[0], @XMM[8] - veor @XMM[1], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[10] - veor @XMM[6], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[7], @XMM[13] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - veor @XMM[5], @XMM[15] - vst1.8 {@XMM[6]}, [$out]! - vmov.i32 @XMM[8], #1 @ compose 1<<96 - vst1.8 {@XMM[3]}, [$out]! - veor @XMM[9], @XMM[9], @XMM[9] - vst1.8 {@XMM[7]}, [$out]! - vext.8 @XMM[8], @XMM[9], @XMM[8], #4 - vst1.8 {@XMM[2]}, [$out]! - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vst1.8 {@XMM[5]}, [$out]! - vldmia $fp, {@XMM[0]} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add $len, $len, #8 - vld1.8 {@XMM[8]}, [$inp]! @ load input - veor @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! @ write output - cmp $len, #2 - blo .Lctr_enc_done - vld1.8 {@XMM[9]}, [$inp]! - veor @XMM[1], @XMM[9] - vst1.8 {@XMM[1]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[4], @XMM[10] - vst1.8 {@XMM[4]}, [$out]! - cmp $len, #4 - blo .Lctr_enc_done - vld1.8 {@XMM[11]}, [$inp]! - veor @XMM[6], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[3]}, [$out]! - cmp $len, #6 - blo .Lctr_enc_done - vld1.8 {@XMM[13]}, [$inp]! - veor @XMM[7], @XMM[13] - vst1.8 {@XMM[7]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[14]}, [$inp] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[2]}, [$out]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lctr_enc_bzero -#else - vstmia $keysched, {q0-q1} -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, $inp @ copy arguments - mov r5, $out - mov r6, $len - mov r7, $key - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {@XMM[1]}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [r4]! @ load input - vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor @XMM[0],@XMM[0],@XMM[1] - vst1.8 {@XMM[0]}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -___ -} -{ -###################################################################### -# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); -my $const="r6"; # returned by _bsaes_key_convert -my $twmask=@XMM[5]; -my @T=@XMM[6..7]; - -$code.=<<___; -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - - subs $len, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds $len, #0x70 - bmi .Lxts_enc_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_enc_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_enc_ret - sub r6, $out, #0x10 - -.Lxts_enc_steal: - ldrb r0, [$inp], #1 - ldrb r1, [$out, #-0x10] - strb r0, [$out, #-0x10] - strb r1, [$out], #1 - - subs $len, #1 - bhi .Lxts_enc_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_enc_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst $len, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne $len, #0x10 @ subtract another 16 bytes -#endif - subs $len, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds $len, #0x70 - bmi .Lxts_dec_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_dec_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - mov r5, $magic @ preserve magic - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - mov $magic, r5 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia $magic, {$twmask} - vshr.s64 @XMM[6], @XMM[8], #63 - vand @XMM[6], @XMM[6], $twmask - vadd.u64 @XMM[9], @XMM[8], @XMM[8] - vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` - veor @XMM[9], @XMM[9], @XMM[6] - - @ perform the final decryption with the last tweak value - vld1.8 {@XMM[0]}, [$inp]! - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[9] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[9] - vst1.8 {@XMM[0]}, [$out] - - mov r6, $out -.Lxts_dec_steal: - ldrb r1, [$out] - ldrb r0, [$inp], #1 - strb r1, [$out, #0x10] - strb r0, [$out], #1 - - subs $len, #1 - bhi .Lxts_dec_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_dec_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -___ -} -$code.=<<___; -#endif -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -open SELF,$0; -while(<SELF>) { - next if (/^#!/); - last if (!s/^#/@/ and !/^$/); - print; -} -close SELF; - -print $code; - -close STDOUT; diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..3fecb2124c35 --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -0,0 +1,523 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> + + .text + .fpu neon + .align 5 + +ENTRY(chacha20_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requireds shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + mov r3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vext.8 q1, q1, q1, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vext.8 q3, q3, q3, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vext.8 q1, q1, q1, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vext.8 q3, q3, q3, #4 + + subs r3, r3, #1 + bne .Ldoubleround + + add ip, r2, #0x20 + vld1.8 {q4-q5}, [r2] + vld1.8 {q6-q7}, [ip] + + // o0 = i0 ^ (x0 + s0) + vadd.i32 q0, q0, q8 + veor q0, q0, q4 + + // o1 = i1 ^ (x1 + s1) + vadd.i32 q1, q1, q9 + veor q1, q1, q5 + + // o2 = i2 ^ (x2 + s2) + vadd.i32 q2, q2, q10 + veor q2, q2, q6 + + // o3 = i3 ^ (x3 + s3) + vadd.i32 q3, q3, q11 + veor q3, q3, q7 + + add ip, r1, #0x20 + vst1.8 {q0-q1}, [r1] + vst1.8 {q2-q3}, [ip] + + bx lr +ENDPROC(chacha20_block_xor_neon) + + .align 5 +ENTRY(chacha20_4block_xor_neon) + push {r4-r6, lr} + mov ip, sp // preserve the stack pointer + sub r3, sp, #0x20 // allocate a 32 byte buffer + bic r3, r3, #0x1f // aligned to 32 bytes + mov sp, r3 + + // r0: Input state matrix, s + // r1: 4 data blocks output, o + // r2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + + // x0..15[0-3] = s0..3[0..3] + add r3, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [r3] + + adr r3, CTRINC + vdup.32 q15, d7[1] + vdup.32 q14, d7[0] + vld1.32 {q11}, [r3, :128] + vdup.32 q13, d6[1] + vdup.32 q12, d6[0] + vadd.i32 q12, q12, q11 // x12 += counter values 0-3 + vdup.32 q11, d5[1] + vdup.32 q10, d5[0] + vdup.32 q9, d4[1] + vdup.32 q8, d4[0] + vdup.32 q7, d3[1] + vdup.32 q6, d3[0] + vdup.32 q5, d2[1] + vdup.32 q4, d2[0] + vdup.32 q3, d1[1] + vdup.32 q2, d1[0] + vdup.32 q1, d0[1] + vdup.32 q0, d0[0] + + mov r3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + vrev32.16 q15, q15 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #12 + vshl.u32 q5, q9, #12 + vsri.u32 q4, q8, #20 + vsri.u32 q5, q9, #20 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #12 + vshl.u32 q7, q9, #12 + vsri.u32 q6, q8, #20 + vsri.u32 q7, q9, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q8, q12, q0 + veor q9, q13, q1 + vshl.u32 q12, q8, #8 + vshl.u32 q13, q9, #8 + vsri.u32 q12, q8, #24 + vsri.u32 q13, q9, #24 + + veor q8, q14, q2 + veor q9, q15, q3 + vshl.u32 q14, q8, #8 + vshl.u32 q15, q9, #8 + vsri.u32 q14, q8, #24 + vsri.u32 q15, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #7 + vshl.u32 q5, q9, #7 + vsri.u32 q4, q8, #25 + vsri.u32 q5, q9, #25 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #7 + vshl.u32 q7, q9, #7 + vsri.u32 q6, q8, #25 + vsri.u32 q7, q9, #25 + + vld1.32 {q8-q9}, [sp, :256] + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vrev32.16 q15, q15 + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #12 + vshl.u32 q4, q9, #12 + vsri.u32 q7, q8, #20 + vsri.u32 q4, q9, #20 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #12 + vshl.u32 q6, q9, #12 + vsri.u32 q5, q8, #20 + vsri.u32 q6, q9, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q8, q15, q0 + veor q9, q12, q1 + vshl.u32 q15, q8, #8 + vshl.u32 q12, q9, #8 + vsri.u32 q15, q8, #24 + vsri.u32 q12, q9, #24 + + veor q8, q13, q2 + veor q9, q14, q3 + vshl.u32 q13, q8, #8 + vshl.u32 q14, q9, #8 + vsri.u32 q13, q8, #24 + vsri.u32 q14, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #7 + vshl.u32 q4, q9, #7 + vsri.u32 q7, q8, #25 + vsri.u32 q4, q9, #25 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #7 + vshl.u32 q6, q9, #7 + vsri.u32 q5, q8, #25 + vsri.u32 q6, q9, #25 + + subs r3, r3, #1 + beq 0f + + vld1.32 {q8-q9}, [sp, :256] + b .Ldoubleround4 + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] +0: ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q0, q0, q8 + vadd.i32 q1, q1, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q2, q2, q8 + vadd.i32 q3, q3, q9 + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q4, q4, q8 + vadd.i32 q5, q5, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q6, q6, q8 + vadd.i32 q7, q7, q9 + + // interleave 32-bit words in state n, n+1 + vzip.32 q0, q1 + vzip.32 q2, q3 + vzip.32 q4, q5 + vzip.32 q6, q7 + + // interleave 64-bit words in state n, n+2 + vswp d1, d4 + vswp d3, d6 + vswp d9, d12 + vswp d11, d14 + + // xor with corresponding input, write to output + vld1.8 {q8-q9}, [r2]! + veor q8, q8, q0 + veor q9, q9, q4 + vst1.8 {q8-q9}, [r1]! + + vld1.32 {q8-q9}, [sp, :256] + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + vadd.i32 q8, q8, q0 + vadd.i32 q9, q9, q4 + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q10, q10, q0 + vadd.i32 q11, q11, q4 + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + adr r3, CTRINC + vadd.i32 q12, q12, q0 + vld1.32 {q0}, [r3, :128] + vadd.i32 q13, q13, q4 + vadd.i32 q12, q12, q0 // x12 += counter values 0-3 + + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q14, q14, q0 + vadd.i32 q15, q15, q4 + + // interleave 32-bit words in state n, n+1 + vzip.32 q8, q9 + vzip.32 q10, q11 + vzip.32 q12, q13 + vzip.32 q14, q15 + + // interleave 64-bit words in state n, n+2 + vswp d17, d20 + vswp d19, d22 + vswp d25, d28 + vswp d27, d30 + + vmov q4, q1 + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q8 + veor q1, q1, q12 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q2 + veor q1, q1, q6 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q10 + veor q1, q1, q14 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q4 + veor q1, q1, q5 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q9 + veor q1, q1, q13 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q3 + veor q1, q1, q7 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2] + veor q0, q0, q11 + veor q1, q1, q15 + vst1.8 {q0-q1}, [r1] + + mov sp, ip + pop {r4-r6, pc} +ENDPROC(chacha20_4block_xor_neon) + + .align 4 +CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..592f75ae4fa1 --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -0,0 +1,128 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/algapi.h> +#include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index df42c93a93d6..f5dce9b4e617 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -31,10 +31,10 @@ static LIST_HEAD(clocks); static DEFINE_MUTEX(clocks_mutex); static DEFINE_SPINLOCK(clockfw_lock); -static void __clk_enable(struct clk *clk) +void davinci_clk_enable(struct clk *clk) { if (clk->parent) - __clk_enable(clk->parent); + davinci_clk_enable(clk->parent); if (clk->usecount++ == 0) { if (clk->flags & CLK_PSC) davinci_psc_config(clk->domain, clk->gpsc, clk->lpsc, @@ -44,7 +44,7 @@ static void __clk_enable(struct clk *clk) } } -static void __clk_disable(struct clk *clk) +void davinci_clk_disable(struct clk *clk) { if (WARN_ON(clk->usecount == 0)) return; @@ -56,7 +56,7 @@ static void __clk_disable(struct clk *clk) clk->clk_disable(clk); } if (clk->parent) - __clk_disable(clk->parent); + davinci_clk_disable(clk->parent); } int davinci_clk_reset(struct clk *clk, bool reset) @@ -103,7 +103,7 @@ int clk_enable(struct clk *clk) return -EINVAL; spin_lock_irqsave(&clockfw_lock, flags); - __clk_enable(clk); + davinci_clk_enable(clk); spin_unlock_irqrestore(&clockfw_lock, flags); return 0; @@ -118,7 +118,7 @@ void clk_disable(struct clk *clk) return; spin_lock_irqsave(&clockfw_lock, flags); - __clk_disable(clk); + davinci_clk_disable(clk); spin_unlock_irqrestore(&clockfw_lock, flags); } EXPORT_SYMBOL(clk_disable); diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h index e2a5437a1aee..fa2b83752e03 100644 --- a/arch/arm/mach-davinci/clock.h +++ b/arch/arm/mach-davinci/clock.h @@ -132,6 +132,8 @@ int davinci_set_sysclk_rate(struct clk *clk, unsigned long rate); int davinci_set_refclk_rate(unsigned long rate); int davinci_simple_set_rate(struct clk *clk, unsigned long rate); int davinci_clk_reset(struct clk *clk, bool reset); +void davinci_clk_enable(struct clk *clk); +void davinci_clk_disable(struct clk *clk); extern struct platform_device davinci_wdt_device; extern void davinci_watchdog_reset(struct platform_device *); diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index e770c97ea45c..1d873d15b545 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -319,6 +319,16 @@ static struct clk emac_clk = { .gpsc = 1, }; +/* + * In order to avoid adding the emac_clk to the clock lookup table twice (and + * screwing up the linked list in the process) create a separate clock for + * mdio inheriting the rate from emac_clk. + */ +static struct clk mdio_clk = { + .name = "mdio", + .parent = &emac_clk, +}; + static struct clk mcasp_clk = { .name = "mcasp", .parent = &async3_clk, @@ -367,6 +377,16 @@ static struct clk aemif_clk = { .flags = ALWAYS_ENABLED, }; +/* + * In order to avoid adding the aemif_clk to the clock lookup table twice (and + * screwing up the linked list in the process) create a separate clock for + * nand inheriting the rate from aemif_clk. + */ +static struct clk aemif_nand_clk = { + .name = "nand", + .parent = &aemif_clk, +}; + static struct clk usb11_clk = { .name = "usb11", .parent = &pll0_sysclk4, @@ -529,7 +549,7 @@ static struct clk_lookup da850_clks[] = { CLK(NULL, "arm", &arm_clk), CLK(NULL, "rmii", &rmii_clk), CLK("davinci_emac.1", NULL, &emac_clk), - CLK("davinci_mdio.0", "fck", &emac_clk), + CLK("davinci_mdio.0", "fck", &mdio_clk), CLK("davinci-mcasp.0", NULL, &mcasp_clk), CLK("davinci-mcbsp.0", NULL, &mcbsp0_clk), CLK("davinci-mcbsp.1", NULL, &mcbsp1_clk), @@ -537,7 +557,15 @@ static struct clk_lookup da850_clks[] = { CLK("da830-mmc.0", NULL, &mmcsd0_clk), CLK("da830-mmc.1", NULL, &mmcsd1_clk), CLK("ti-aemif", NULL, &aemif_clk), - CLK(NULL, "aemif", &aemif_clk), + /* + * The only user of this clock is davinci_nand and it get's it through + * con_id. The nand node itself is created from within the aemif + * driver to guarantee that it's probed after the aemif timing + * parameters are configured. of_dev_auxdata is not accessible from + * the aemif driver and can't be passed to of_platform_populate(). For + * that reason we're leaving the dev_id here as NULL. + */ + CLK(NULL, "aemif", &aemif_nand_clk), CLK("ohci-da8xx", "usb11", &usb11_clk), CLK("musb-da8xx", "usb20", &usb20_clk), CLK("spi_davinci.0", NULL, &spi0_clk), diff --git a/arch/arm/mach-davinci/usb-da8xx.c b/arch/arm/mach-davinci/usb-da8xx.c index c6feecf7ae24..9a6af0bd5dc3 100644 --- a/arch/arm/mach-davinci/usb-da8xx.c +++ b/arch/arm/mach-davinci/usb-da8xx.c @@ -22,6 +22,8 @@ #define DA8XX_USB0_BASE 0x01e00000 #define DA8XX_USB1_BASE 0x01e25000 +static struct clk *usb20_clk; + static struct platform_device da8xx_usb_phy = { .name = "da8xx-usb-phy", .id = -1, @@ -158,26 +160,13 @@ int __init da8xx_register_usb_refclkin(int rate) static void usb20_phy_clk_enable(struct clk *clk) { - struct clk *usb20_clk; - int err; u32 val; u32 timeout = 500000; /* 500 msec */ val = readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG)); - usb20_clk = clk_get(&da8xx_usb20_dev.dev, "usb20"); - if (IS_ERR(usb20_clk)) { - pr_err("could not get usb20 clk: %ld\n", PTR_ERR(usb20_clk)); - return; - } - /* The USB 2.O PLL requires that the USB 2.O PSC is enabled as well. */ - err = clk_prepare_enable(usb20_clk); - if (err) { - pr_err("failed to enable usb20 clk: %d\n", err); - clk_put(usb20_clk); - return; - } + davinci_clk_enable(usb20_clk); /* * Turn on the USB 2.0 PHY, but just the PLL, and not OTG. The USB 1.1 @@ -197,8 +186,7 @@ static void usb20_phy_clk_enable(struct clk *clk) pr_err("Timeout waiting for USB 2.0 PHY clock good\n"); done: - clk_disable_unprepare(usb20_clk); - clk_put(usb20_clk); + davinci_clk_disable(usb20_clk); } static void usb20_phy_clk_disable(struct clk *clk) @@ -285,11 +273,19 @@ static struct clk_lookup usb20_phy_clk_lookup = int __init da8xx_register_usb20_phy_clk(bool use_usb_refclkin) { struct clk *parent; - int ret = 0; + int ret; + + usb20_clk = clk_get(&da8xx_usb20_dev.dev, "usb20"); + ret = PTR_ERR_OR_ZERO(usb20_clk); + if (ret) + return ret; parent = clk_get(NULL, use_usb_refclkin ? "usb_refclkin" : "pll0_aux"); - if (IS_ERR(parent)) - return PTR_ERR(parent); + ret = PTR_ERR_OR_ZERO(parent); + if (ret) { + clk_put(usb20_clk); + return ret; + } usb20_phy_clk.parent = parent; ret = clk_register(&usb20_phy_clk); diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 98ffe1e62ad5..a5d68411a037 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -385,36 +385,6 @@ fail: return pen_release != -1 ? ret : 0; } -/* - * Initialise the CPU possible map early - this describes the CPUs - * which may be present or become present in the system. - */ - -static void __init exynos_smp_init_cpus(void) -{ - void __iomem *scu_base = scu_base_addr(); - unsigned int i, ncores; - - if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) - ncores = scu_base ? scu_get_core_count(scu_base) : 1; - else - /* - * CPU Nodes are passed thru DT and set_cpu_possible - * is set by "arm_dt_init_cpu_maps". - */ - return; - - /* sanity check */ - if (ncores > nr_cpu_ids) { - pr_warn("SMP: %u cores greater than maximum (%u), clipping\n", - ncores, nr_cpu_ids); - ncores = nr_cpu_ids; - } - - for (i = 0; i < ncores; i++) - set_cpu_possible(i, true); -} - static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) { int i; @@ -479,7 +449,6 @@ static void exynos_cpu_die(unsigned int cpu) #endif /* CONFIG_HOTPLUG_CPU */ const struct smp_operations exynos_smp_ops __initconst = { - .smp_init_cpus = exynos_smp_init_cpus, .smp_prepare_cpus = exynos_smp_prepare_cpus, .smp_secondary_init = exynos_secondary_init, .smp_boot_secondary = exynos_boot_secondary, diff --git a/arch/arm/mach-imx/mach-imx1.c b/arch/arm/mach-imx/mach-imx1.c index de5ab8d88549..3a8406e45b65 100644 --- a/arch/arm/mach-imx/mach-imx1.c +++ b/arch/arm/mach-imx/mach-imx1.c @@ -37,7 +37,6 @@ static const char * const imx1_dt_board_compat[] __initconst = { }; DT_MACHINE_START(IMX1_DT, "Freescale i.MX1 (Device Tree Support)") - .map_io = debug_ll_io_init, .init_early = imx1_init_early, .init_irq = imx1_init_irq, .dt_compat = imx1_dt_board_compat, diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 469894082fea..093458b62c8d 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -7,7 +7,7 @@ ccflags-y := -I$(srctree)/$(src)/include \ # Common support obj-y := id.o io.o control.o devices.o fb.o timer.o pm.o \ - common.o gpio.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \ + common.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \ omap_device.o omap-headsmp.o sram.o drm.o hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 36d9943205ca..dc9e34e670a2 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -304,7 +304,7 @@ DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)") .init_late = am43xx_init_late, .init_irq = omap_gic_of_init, .init_machine = omap_generic_init, - .init_time = omap4_local_timer_init, + .init_time = omap3_gptimer_timer_init, .dt_compat = am43_boards_compat, .restart = omap44xx_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/gpio.c b/arch/arm/mach-omap2/gpio.c deleted file mode 100644 index 7a577145b68b..000000000000 --- a/arch/arm/mach-omap2/gpio.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * OMAP2+ specific gpio initialization - * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ - * - * Author: - * Charulatha V <charu@ti.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/gpio.h> -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/of.h> -#include <linux/platform_data/gpio-omap.h> - -#include "soc.h" -#include "omap_hwmod.h" -#include "omap_device.h" -#include "omap-pm.h" - -#include "powerdomain.h" - -static int __init omap2_gpio_dev_init(struct omap_hwmod *oh, void *unused) -{ - struct platform_device *pdev; - struct omap_gpio_platform_data *pdata; - struct omap_gpio_dev_attr *dev_attr; - char *name = "omap_gpio"; - int id; - struct powerdomain *pwrdm; - - /* - * extract the device id from name field available in the - * hwmod database and use the same for constructing ids for - * gpio devices. - * CAUTION: Make sure the name in the hwmod database does - * not change. If changed, make corresponding change here - * or make use of static variable mechanism to handle this. - */ - sscanf(oh->name, "gpio%d", &id); - - pdata = kzalloc(sizeof(struct omap_gpio_platform_data), GFP_KERNEL); - if (!pdata) { - pr_err("gpio%d: Memory allocation failed\n", id); - return -ENOMEM; - } - - dev_attr = (struct omap_gpio_dev_attr *)oh->dev_attr; - pdata->bank_width = dev_attr->bank_width; - pdata->dbck_flag = dev_attr->dbck_flag; - pdata->get_context_loss_count = omap_pm_get_dev_context_loss_count; - pdata->regs = kzalloc(sizeof(struct omap_gpio_reg_offs), GFP_KERNEL); - if (!pdata->regs) { - pr_err("gpio%d: Memory allocation failed\n", id); - kfree(pdata); - return -ENOMEM; - } - - switch (oh->class->rev) { - case 0: - if (id == 1) - /* non-wakeup GPIO pins for OMAP2 Bank1 */ - pdata->non_wakeup_gpios = 0xe203ffc0; - else if (id == 2) - /* non-wakeup GPIO pins for OMAP2 Bank2 */ - pdata->non_wakeup_gpios = 0x08700040; - /* fall through */ - - case 1: - pdata->regs->revision = OMAP24XX_GPIO_REVISION; - pdata->regs->direction = OMAP24XX_GPIO_OE; - pdata->regs->datain = OMAP24XX_GPIO_DATAIN; - pdata->regs->dataout = OMAP24XX_GPIO_DATAOUT; - pdata->regs->set_dataout = OMAP24XX_GPIO_SETDATAOUT; - pdata->regs->clr_dataout = OMAP24XX_GPIO_CLEARDATAOUT; - pdata->regs->irqstatus = OMAP24XX_GPIO_IRQSTATUS1; - pdata->regs->irqstatus2 = OMAP24XX_GPIO_IRQSTATUS2; - pdata->regs->irqenable = OMAP24XX_GPIO_IRQENABLE1; - pdata->regs->irqenable2 = OMAP24XX_GPIO_IRQENABLE2; - pdata->regs->set_irqenable = OMAP24XX_GPIO_SETIRQENABLE1; - pdata->regs->clr_irqenable = OMAP24XX_GPIO_CLEARIRQENABLE1; - pdata->regs->debounce = OMAP24XX_GPIO_DEBOUNCE_VAL; - pdata->regs->debounce_en = OMAP24XX_GPIO_DEBOUNCE_EN; - pdata->regs->ctrl = OMAP24XX_GPIO_CTRL; - pdata->regs->wkup_en = OMAP24XX_GPIO_WAKE_EN; - pdata->regs->leveldetect0 = OMAP24XX_GPIO_LEVELDETECT0; - pdata->regs->leveldetect1 = OMAP24XX_GPIO_LEVELDETECT1; - pdata->regs->risingdetect = OMAP24XX_GPIO_RISINGDETECT; - pdata->regs->fallingdetect = OMAP24XX_GPIO_FALLINGDETECT; - break; - case 2: - pdata->regs->revision = OMAP4_GPIO_REVISION; - pdata->regs->direction = OMAP4_GPIO_OE; - pdata->regs->datain = OMAP4_GPIO_DATAIN; - pdata->regs->dataout = OMAP4_GPIO_DATAOUT; - pdata->regs->set_dataout = OMAP4_GPIO_SETDATAOUT; - pdata->regs->clr_dataout = OMAP4_GPIO_CLEARDATAOUT; - pdata->regs->irqstatus_raw0 = OMAP4_GPIO_IRQSTATUSRAW0; - pdata->regs->irqstatus_raw1 = OMAP4_GPIO_IRQSTATUSRAW1; - pdata->regs->irqstatus = OMAP4_GPIO_IRQSTATUS0; - pdata->regs->irqstatus2 = OMAP4_GPIO_IRQSTATUS1; - pdata->regs->irqenable = OMAP4_GPIO_IRQSTATUSSET0; - pdata->regs->irqenable2 = OMAP4_GPIO_IRQSTATUSSET1; - pdata->regs->set_irqenable = OMAP4_GPIO_IRQSTATUSSET0; - pdata->regs->clr_irqenable = OMAP4_GPIO_IRQSTATUSCLR0; - pdata->regs->debounce = OMAP4_GPIO_DEBOUNCINGTIME; - pdata->regs->debounce_en = OMAP4_GPIO_DEBOUNCENABLE; - pdata->regs->ctrl = OMAP4_GPIO_CTRL; - pdata->regs->wkup_en = OMAP4_GPIO_IRQWAKEN0; - pdata->regs->leveldetect0 = OMAP4_GPIO_LEVELDETECT0; - pdata->regs->leveldetect1 = OMAP4_GPIO_LEVELDETECT1; - pdata->regs->risingdetect = OMAP4_GPIO_RISINGDETECT; - pdata->regs->fallingdetect = OMAP4_GPIO_FALLINGDETECT; - break; - default: - WARN(1, "Invalid gpio bank_type\n"); - kfree(pdata->regs); - kfree(pdata); - return -EINVAL; - } - - pwrdm = omap_hwmod_get_pwrdm(oh); - pdata->loses_context = pwrdm_can_ever_lose_context(pwrdm); - - pdev = omap_device_build(name, id - 1, oh, pdata, sizeof(*pdata)); - kfree(pdata); - - if (IS_ERR(pdev)) { - WARN(1, "Can't build omap_device for %s:%s.\n", - name, oh->name); - return PTR_ERR(pdev); - } - - return 0; -} - -/* - * gpio_init needs to be done before - * machine_init functions access gpio APIs. - * Hence gpio_init is a omap_postcore_initcall. - */ -static int __init omap2_gpio_init(void) -{ - /* If dtb is there, the devices will be created dynamically */ - if (of_have_populated_dt()) - return -ENODEV; - - return omap_hwmod_for_each_by_class("gpio", omap2_gpio_dev_init, NULL); -} -omap_postcore_initcall(omap2_gpio_init); diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 759e1d45ba25..e8b988714a09 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -741,14 +741,14 @@ static int _init_main_clk(struct omap_hwmod *oh) int ret = 0; char name[MOD_CLK_MAX_NAME_LEN]; struct clk *clk; + static const char modck[] = "_mod_ck"; - /* +7 magic comes from '_mod_ck' suffix */ - if (strlen(oh->name) + 7 > MOD_CLK_MAX_NAME_LEN) + if (strlen(oh->name) >= MOD_CLK_MAX_NAME_LEN - strlen(modck)) pr_warn("%s: warning: cropping name for %s\n", __func__, oh->name); - strncpy(name, oh->name, MOD_CLK_MAX_NAME_LEN - 7); - strcat(name, "_mod_ck"); + strlcpy(name, oh->name, MOD_CLK_MAX_NAME_LEN - strlen(modck)); + strlcat(name, modck, MOD_CLK_MAX_NAME_LEN); clk = clk_get(NULL, name); if (!IS_ERR(clk)) { diff --git a/arch/arm/mach-omap2/omap_hwmod_common_data.h b/arch/arm/mach-omap2/omap_hwmod_common_data.h index cdfbb44ceb0c..f22e9cb39f4a 100644 --- a/arch/arm/mach-omap2/omap_hwmod_common_data.h +++ b/arch/arm/mach-omap2/omap_hwmod_common_data.h @@ -121,10 +121,6 @@ extern struct omap_hwmod_irq_info omap2_uart3_mpu_irqs[]; extern struct omap_hwmod_irq_info omap2_dispc_irqs[]; extern struct omap_hwmod_irq_info omap2_i2c1_mpu_irqs[]; extern struct omap_hwmod_irq_info omap2_i2c2_mpu_irqs[]; -extern struct omap_hwmod_irq_info omap2_gpio1_irqs[]; -extern struct omap_hwmod_irq_info omap2_gpio2_irqs[]; -extern struct omap_hwmod_irq_info omap2_gpio3_irqs[]; -extern struct omap_hwmod_irq_info omap2_gpio4_irqs[]; extern struct omap_hwmod_irq_info omap2_dma_system_irqs[]; extern struct omap_hwmod_irq_info omap2_mcspi1_mpu_irqs[]; extern struct omap_hwmod_irq_info omap2_mcspi2_mpu_irqs[]; diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 5b2f5138d938..2b138b65129a 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -295,10 +295,8 @@ int omap_prcm_register_chain_handler(struct omap_prcm_irq_setup *irq_setup) GFP_KERNEL); if (!prcm_irq_chips || !prcm_irq_setup->saved_mask || - !prcm_irq_setup->priority_mask) { - pr_err("PRCM: kzalloc failed\n"); + !prcm_irq_setup->priority_mask) goto err; - } memset(mask, 0, sizeof(mask)); diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 56128da23c3a..07dd692c4737 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -510,18 +510,19 @@ void __init omap3_secure_sync32k_timer_init(void) } #endif /* CONFIG_ARCH_OMAP3 */ -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \ + defined(CONFIG_SOC_AM43XX) void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); - - clocksource_probe(); + if (of_have_populated_dt()) + clocksource_probe(); } #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ - defined(CONFIG_SOC_DRA7XX) || defined(CONFIG_SOC_AM43XX) + defined(CONFIG_SOC_DRA7XX) static void __init omap4_sync32k_timer_init(void) { __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", diff --git a/arch/arm/mach-s3c24xx/common.c b/arch/arm/mach-s3c24xx/common.c index f6c3f151d0d4..b59f4f4f256f 100644 --- a/arch/arm/mach-s3c24xx/common.c +++ b/arch/arm/mach-s3c24xx/common.c @@ -345,10 +345,40 @@ static struct s3c24xx_dma_channel s3c2410_dma_channels[DMACH_MAX] = { [DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), }, }; +static const struct dma_slave_map s3c2410_dma_slave_map[] = { + { "s3c2410-sdi", "rx-tx", (void *)DMACH_SDI }, + { "s3c2410-spi.0", "rx", (void *)DMACH_SPI0_RX }, + { "s3c2410-spi.0", "tx", (void *)DMACH_SPI0_TX }, + { "s3c2410-spi.1", "rx", (void *)DMACH_SPI1_RX }, + { "s3c2410-spi.1", "tx", (void *)DMACH_SPI1_TX }, + /* + * The DMA request source[1] (DMACH_UARTx_SRC2) are + * not used in the UART driver. + */ + { "s3c2410-uart.0", "rx", (void *)DMACH_UART0 }, + { "s3c2410-uart.0", "tx", (void *)DMACH_UART0 }, + { "s3c2410-uart.1", "rx", (void *)DMACH_UART1 }, + { "s3c2410-uart.1", "tx", (void *)DMACH_UART1 }, + { "s3c2410-uart.2", "rx", (void *)DMACH_UART2 }, + { "s3c2410-uart.2", "tx", (void *)DMACH_UART2 }, + { "s3c24xx-iis", "rx", (void *)DMACH_I2S_IN }, + { "s3c24xx-iis", "tx", (void *)DMACH_I2S_OUT }, + { "s3c-hsudc", "rx0", (void *)DMACH_USB_EP1 }, + { "s3c-hsudc", "tx0", (void *)DMACH_USB_EP1 }, + { "s3c-hsudc", "rx1", (void *)DMACH_USB_EP2 }, + { "s3c-hsudc", "tx1", (void *)DMACH_USB_EP2 }, + { "s3c-hsudc", "rx2", (void *)DMACH_USB_EP3 }, + { "s3c-hsudc", "tx2", (void *)DMACH_USB_EP3 }, + { "s3c-hsudc", "rx3", (void *)DMACH_USB_EP4 }, + { "s3c-hsudc", "tx3", (void *)DMACH_USB_EP4 } +}; + static struct s3c24xx_dma_platdata s3c2410_dma_platdata = { .num_phy_channels = 4, .channels = s3c2410_dma_channels, .num_channels = DMACH_MAX, + .slave_map = s3c2410_dma_slave_map, + .slavecnt = ARRAY_SIZE(s3c2410_dma_slave_map), }; struct platform_device s3c2410_device_dma = { @@ -388,10 +418,36 @@ static struct s3c24xx_dma_channel s3c2412_dma_channels[DMACH_MAX] = { [DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, 16 }, }; +static const struct dma_slave_map s3c2412_dma_slave_map[] = { + { "s3c2412-sdi", "rx-tx", (void *)DMACH_SDI }, + { "s3c2412-spi.0", "rx", (void *)DMACH_SPI0_RX }, + { "s3c2412-spi.0", "tx", (void *)DMACH_SPI0_TX }, + { "s3c2412-spi.1", "rx", (void *)DMACH_SPI1_RX }, + { "s3c2412-spi.1", "tx", (void *)DMACH_SPI1_TX }, + { "s3c2440-uart.0", "rx", (void *)DMACH_UART0 }, + { "s3c2440-uart.0", "tx", (void *)DMACH_UART0 }, + { "s3c2440-uart.1", "rx", (void *)DMACH_UART1 }, + { "s3c2440-uart.1", "tx", (void *)DMACH_UART1 }, + { "s3c2440-uart.2", "rx", (void *)DMACH_UART2 }, + { "s3c2440-uart.2", "tx", (void *)DMACH_UART2 }, + { "s3c2412-iis", "rx", (void *)DMACH_I2S_IN }, + { "s3c2412-iis", "tx", (void *)DMACH_I2S_OUT }, + { "s3c-hsudc", "rx0", (void *)DMACH_USB_EP1 }, + { "s3c-hsudc", "tx0", (void *)DMACH_USB_EP1 }, + { "s3c-hsudc", "rx1", (void *)DMACH_USB_EP2 }, + { "s3c-hsudc", "tx1", (void *)DMACH_USB_EP2 }, + { "s3c-hsudc", "rx2", (void *)DMACH_USB_EP3 }, + { "s3c-hsudc", "tx2", (void *)DMACH_USB_EP3 }, + { "s3c-hsudc", "rx3", (void *)DMACH_USB_EP4 }, + { "s3c-hsudc", "tx3", (void *)DMACH_USB_EP4 } +}; + static struct s3c24xx_dma_platdata s3c2412_dma_platdata = { .num_phy_channels = 4, .channels = s3c2412_dma_channels, .num_channels = DMACH_MAX, + .slave_map = s3c2412_dma_slave_map, + .slavecnt = ARRAY_SIZE(s3c2412_dma_slave_map), }; struct platform_device s3c2412_device_dma = { @@ -534,10 +590,30 @@ static struct s3c24xx_dma_channel s3c2443_dma_channels[DMACH_MAX] = { [DMACH_MIC_IN] = { S3C24XX_DMA_APB, true, 29 }, }; +static const struct dma_slave_map s3c2443_dma_slave_map[] = { + { "s3c2440-sdi", "rx-tx", (void *)DMACH_SDI }, + { "s3c2443-spi.0", "rx", (void *)DMACH_SPI0_RX }, + { "s3c2443-spi.0", "tx", (void *)DMACH_SPI0_TX }, + { "s3c2443-spi.1", "rx", (void *)DMACH_SPI1_RX }, + { "s3c2443-spi.1", "tx", (void *)DMACH_SPI1_TX }, + { "s3c2440-uart.0", "rx", (void *)DMACH_UART0 }, + { "s3c2440-uart.0", "tx", (void *)DMACH_UART0 }, + { "s3c2440-uart.1", "rx", (void *)DMACH_UART1 }, + { "s3c2440-uart.1", "tx", (void *)DMACH_UART1 }, + { "s3c2440-uart.2", "rx", (void *)DMACH_UART2 }, + { "s3c2440-uart.2", "tx", (void *)DMACH_UART2 }, + { "s3c2440-uart.3", "rx", (void *)DMACH_UART3 }, + { "s3c2440-uart.3", "tx", (void *)DMACH_UART3 }, + { "s3c24xx-iis", "rx", (void *)DMACH_I2S_IN }, + { "s3c24xx-iis", "tx", (void *)DMACH_I2S_OUT }, +}; + static struct s3c24xx_dma_platdata s3c2443_dma_platdata = { .num_phy_channels = 6, .channels = s3c2443_dma_channels, .num_channels = DMACH_MAX, + .slave_map = s3c2443_dma_slave_map, + .slavecnt = ARRAY_SIZE(s3c2443_dma_slave_map), }; struct platform_device s3c2443_device_dma = { diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index fc033c0d2a0f..eada0b58ba1c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -356,5 +356,21 @@ status = "disabled"; }; }; + + vpu: vpu@d0100000 { + compatible = "amlogic,meson-gx-vpu"; + reg = <0x0 0xd0100000 0x0 0x100000>, + <0x0 0xc883c000 0x0 0x1000>, + <0x0 0xc8838000 0x0 0x1000>; + reg-names = "vpu", "hhi", "dmc"; + interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>; + #address-cells = <1>; + #size-cells = <0>; + + /* CVBS VDAC output port */ + cvbs_vdac_port: port@0 { + reg = <0>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts index 969682092e0f..4cbd626a9e88 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts @@ -142,6 +142,16 @@ clocks = <&wifi32k>; clock-names = "ext_clock"; }; + + cvbs-connector { + compatible = "composite-video-connector"; + + port { + cvbs_connector_in: endpoint { + remote-endpoint = <&cvbs_vdac_out>; + }; + }; + }; }; &uart_AO { @@ -229,3 +239,9 @@ clocks = <&clkc CLKID_FCLK_DIV4>; clock-names = "clkin0"; }; + +&cvbs_vdac_port { + cvbs_vdac_out: endpoint { + remote-endpoint = <&cvbs_connector_in>; + }; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index 203be28978d5..4a96e0f6f926 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -125,6 +125,16 @@ clocks = <&wifi32k>; clock-names = "ext_clock"; }; + + cvbs-connector { + compatible = "composite-video-connector"; + + port { + cvbs_connector_in: endpoint { + remote-endpoint = <&cvbs_vdac_out>; + }; + }; + }; }; /* This UART is brought out to the DB9 connector */ @@ -234,3 +244,9 @@ clocks = <&clkc CLKID_FCLK_DIV4>; clock-names = "clkin0"; }; + +&cvbs_vdac_port { + cvbs_vdac_out: endpoint { + remote-endpoint = <&cvbs_connector_in>; + }; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 51edd5b5c460..596240c38a9c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -506,3 +506,7 @@ <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; + +&vpu { + compatible = "amlogic,meson-gxbb-vpu", "amlogic,meson-gx-vpu"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts index e99101ae9664..cea4a3eded9b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts @@ -117,6 +117,16 @@ clocks = <&wifi32k>; clock-names = "ext_clock"; }; + + cvbs-connector { + compatible = "composite-video-connector"; + + port { + cvbs_connector_in: endpoint { + remote-endpoint = <&cvbs_vdac_out>; + }; + }; + }; }; &uart_AO { @@ -203,3 +213,9 @@ clocks = <&clkc CLKID_FCLK_DIV4>; clock-names = "clkin0"; }; + +&cvbs_vdac_port { + cvbs_vdac_out: endpoint { + remote-endpoint = <&cvbs_connector_in>; + }; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 9f89b99c4806..69216246275d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -43,7 +43,7 @@ #include "meson-gx.dtsi" #include <dt-bindings/clock/gxbb-clkc.h> -#include <dt-bindings/gpio/meson-gxbb-gpio.h> +#include <dt-bindings/gpio/meson-gxl-gpio.h> / { compatible = "amlogic,meson-gxl"; @@ -299,3 +299,7 @@ <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; + +&vpu { + compatible = "amlogic,meson-gxl-vpu", "amlogic,meson-gx-vpu"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index f859d75db8bd..5a337d339df1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -90,6 +90,16 @@ compatible = "mmc-pwrseq-emmc"; reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>; }; + + cvbs-connector { + compatible = "composite-video-connector"; + + port { + cvbs_connector_in: endpoint { + remote-endpoint = <&cvbs_vdac_out>; + }; + }; + }; }; /* This UART is brought out to the DB9 connector */ @@ -167,3 +177,9 @@ max-speed = <1000>; }; }; + +&cvbs_vdac_port { + cvbs_vdac_out: endpoint { + remote-endpoint = <&cvbs_connector_in>; + }; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi index c1974bbbddea..eb2f0c3e5e53 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi @@ -112,3 +112,7 @@ }; }; }; + +&vpu { + compatible = "amlogic,meson-gxm-vpu", "amlogic,meson-gx-vpu"; +}; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts index a852e28a40e1..a83ed2c6bbf7 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts +++ b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts @@ -81,7 +81,7 @@ #address-cells = <0>; interrupt-controller; reg = <0x0 0x2c001000 0 0x1000>, - <0x0 0x2c002000 0 0x1000>, + <0x0 0x2c002000 0 0x2000>, <0x0 0x2c004000 0 0x2000>, <0x0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 9d1d7ad9b075..29ed6b61c737 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -64,6 +64,16 @@ reg = <0x0 0x86000000 0x0 0x200000>; no-map; }; + + memory@85800000 { + reg = <0x0 0x85800000 0x0 0x800000>; + no-map; + }; + + memory@86200000 { + reg = <0x0 0x86200000 0x0 0x2600000>; + no-map; + }; }; cpus { diff --git a/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts b/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts index 6ffb0517421a..dbea2c3d8f0c 100644 --- a/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts @@ -169,7 +169,7 @@ power-source = <3300>; }; - sdhi0_pins_uhs: sd0 { + sdhi0_pins_uhs: sd0_uhs { groups = "sdhi0_data4", "sdhi0_ctrl"; function = "sdhi0"; power-source = <1800>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 869dded0f09f..33b744d54739 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -331,6 +331,7 @@ CONFIG_DRM_VC4=m CONFIG_DRM_PANEL_SIMPLE=m CONFIG_DRM_I2C_ADV7511=m CONFIG_DRM_HISI_KIRIN=m +CONFIG_DRM_MESON=m CONFIG_FB=y CONFIG_FB_ARMCLCD=y CONFIG_BACKLIGHT_GENERIC=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 450a85df041a..5de75c3dcbd4 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -41,6 +41,10 @@ config CRYPTO_CRC32_ARM64_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_AES_ARM64 + tristate "AES core cipher using scalar instructions" + select CRYPTO_AES + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON @@ -72,4 +76,17 @@ config CRYPTO_CRC32_ARM64 depends on ARM64 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + +config CRYPTO_AES_ARM64_BS + tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES_ARM64 + select CRYPTO_SIMD + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index aa8888d7b744..d1ae1b9cbe70 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -41,6 +41,15 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o + +obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o +aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o +aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..37590ab8121a --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -0,0 +1,127 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + rk .req x0 + out .req x1 + in .req x2 + rounds .req x3 + tt .req x4 + lt .req x2 + + .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc + ldp \out0, \out1, [rk], #8 + + ubfx w13, \in0, #0, #8 + ubfx w14, \in1, #8, #8 + ldr w13, [tt, w13, uxtw #2] + ldr w14, [tt, w14, uxtw #2] + + .if \enc + ubfx w17, \in1, #0, #8 + ubfx w18, \in2, #8, #8 + .else + ubfx w17, \in3, #0, #8 + ubfx w18, \in0, #8, #8 + .endif + ldr w17, [tt, w17, uxtw #2] + ldr w18, [tt, w18, uxtw #2] + + ubfx w15, \in2, #16, #8 + ubfx w16, \in3, #24, #8 + ldr w15, [tt, w15, uxtw #2] + ldr w16, [tt, w16, uxtw #2] + + .if \enc + ubfx \t0, \in3, #16, #8 + ubfx \t1, \in0, #24, #8 + .else + ubfx \t0, \in1, #16, #8 + ubfx \t1, \in2, #24, #8 + .endif + ldr \t0, [tt, \t0, uxtw #2] + ldr \t1, [tt, \t1, uxtw #2] + + eor \out0, \out0, w13 + eor \out1, \out1, w17 + eor \out0, \out0, w14, ror #24 + eor \out1, \out1, w18, ror #24 + eor \out0, \out0, w15, ror #16 + eor \out1, \out1, \t0, ror #16 + eor \out0, \out0, w16, ror #8 + eor \out1, \out1, \t1, ror #8 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro do_crypt, round, ttab, ltab + ldp w5, w6, [in] + ldp w7, w8, [in, #8] + ldp w9, w10, [rk], #16 + ldp w11, w12, [rk, #-8] + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + eor w5, w5, w9 + eor w6, w6, w10 + eor w7, w7, w11 + eor w8, w8, w12 + + ldr tt, =\ttab + ldr lt, =\ltab + + tbnz rounds, #1, 1f + +0: \round w9, w10, w11, w12, w5, w6, w7, w8 + \round w5, w6, w7, w8, w9, w10, w11, w12 + +1: subs rounds, rounds, #4 + \round w9, w10, w11, w12, w5, w6, w7, w8 + csel tt, tt, lt, hi + \round w5, w6, w7, w8, w9, w10, w11, w12 + b.hi 0b + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + stp w5, w6, [out] + stp w7, w8, [out, #8] + ret + + .align 4 + .ltorg + .endm + + .align 5 +ENTRY(__aes_arm64_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm64_encrypt) + + .align 5 +ENTRY(__aes_arm64_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..7288e7cbebff --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -0,0 +1,69 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_encrypt); + +asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_encrypt(ctx->key_enc, out, in, rounds); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_decrypt(ctx->key_dec, out, in, rounds); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm64", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for arm64"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 4e3f8adb1793..5164aaf82c6a 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -327,6 +327,23 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = ctr_encrypt, }, { .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO - 1, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base = { .cra_name = "__xts(aes)", .cra_driver_name = "__xts-aes-" MODE, .cra_priority = PRIO, @@ -350,8 +367,9 @@ static void aes_exit(void) { int i; - for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) - simd_skcipher_free(aes_simd_algs[i]); + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } @@ -370,6 +388,9 @@ static int __init aes_init(void) return err; for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + algname = aes_algs[i].base.cra_name + 2; drvname = aes_algs[i].base.cra_driver_name + 2; basename = aes_algs[i].base.cra_driver_name; diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..8d0cdaa2768d --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -0,0 +1,963 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + rounds .req x11 + bskey .req x12 + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b2, \b2, \b1 + eor \b5, \b5, \b6 + eor \b3, \b3, \b0 + eor \b6, \b6, \b2 + eor \b5, \b5, \b0 + eor \b6, \b6, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b4, \b4, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + eor \b4, \b4, \b6 + eor \b2, \b2, \b0 + eor \b6, \b6, \b1 + eor \b1, \b1, \b5 + eor \b5, \b5, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b2, \b2, \b5 + eor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + eor \b1, \b1, \b7 + eor \b4, \b4, \b7 + eor \b7, \b7, \b5 + eor \b1, \b1, \b3 + eor \b2, \b2, \b5 + eor \b3, \b3, \b7 + eor \b6, \b6, \b1 + eor \b2, \b2, \b0 + eor \b5, \b5, \b3 + eor \b4, \b4, \b6 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + eor \b1, \b1, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b4, \b4, \b5 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b5, \b5, \b0 + eor \b3, \b3, \b7 + eor \b6, \b6, \b2 + eor \b2, \b2, \b1 + eor \b6, \b6, \b3 + eor \b3, \b3, \b0 + eor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + eor \t0, \y0, \y1 + and \t0, \t0, \x0 + eor \x0, \x0, \x1 + and \t1, \x1, \y0 + and \x0, \x0, \y1 + eor \x1, \t1, \t0 + eor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + eor \t0, \y0, \y1 + eor \t1, \y2, \y3 + and \t0, \t0, \x0 + and \t1, \t1, \x2 + eor \x0, \x0, \x1 + eor \x2, \x2, \x3 + and \x1, \x1, \y0 + and \x3, \x3, \y2 + and \x0, \x0, \y1 + and \x2, \x2, \y3 + eor \x1, \x1, \x0 + eor \x2, \x2, \x3 + eor \x0, \x0, \t0 + eor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + eor \t0, \x0, \x2 + eor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + eor \x0, \x0, \t0 + eor \x2, \x2, \t0 + eor \x1, \x1, \t1 + eor \x3, \x3, \t1 + eor \t0, \x4, \x6 + eor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + eor \x4, \x4, \t0 + eor \x6, \x6, \t0 + eor \x5, \x5, \t1 + eor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + eor \t3, \x4, \x6 + eor \t0, \x5, \x7 + eor \t1, \x1, \x3 + eor \s1, \x7, \x6 + eor \s0, \x0, \x2 + eor \s3, \t3, \t0 + orr \t2, \t0, \t1 + and \s2, \t3, \s0 + orr \t3, \t3, \s0 + eor \s0, \s0, \t1 + and \t0, \t0, \t1 + eor \t1, \x3, \x2 + and \s3, \s3, \s0 + and \s1, \s1, \t1 + eor \t1, \x4, \x5 + eor \s0, \x1, \x0 + eor \t3, \t3, \s1 + eor \t2, \t2, \s1 + and \s1, \t1, \s0 + orr \t1, \t1, \s0 + eor \t3, \t3, \s3 + eor \t0, \t0, \s1 + eor \t2, \t2, \s2 + eor \t1, \t1, \s3 + eor \t0, \t0, \s2 + and \s0, \x7, \x3 + eor \t1, \t1, \s2 + and \s1, \x6, \x2 + and \s2, \x5, \x1 + orr \s3, \x4, \x0 + eor \t3, \t3, \s0 + eor \t1, \t1, \s2 + eor \s0, \t0, \s3 + eor \t2, \t2, \s1 + and \s2, \t3, \t1 + eor \s1, \t2, \s2 + eor \s3, \s0, \s2 + bsl \s1, \t1, \s0 + not \t0, \s0 + bsl \s0, \s1, \s3 + bsl \t0, \s1, \s3 + bsl \s3, \t3, \t2 + eor \t3, \t3, \t2 + and \s2, \s0, \s3 + eor \t1, \t1, \t0 + eor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ + \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ + \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b + .endm + + .macro enc_next_rk + ldp q16, q17, [bskey], #128 + ldp q18, q19, [bskey, #-96] + ldp q20, q21, [bskey, #-64] + ldp q22, q23, [bskey, #-32] + .endm + + .macro dec_next_rk + ldp q16, q17, [bskey, #-128]! + ldp q18, q19, [bskey, #32] + ldp q20, q21, [bskey, #64] + ldp q22, q23, [bskey, #96] + .endm + + .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 + eor \x0\().16b, \x0\().16b, v16.16b + eor \x1\().16b, \x1\().16b, v17.16b + eor \x2\().16b, \x2\().16b, v18.16b + eor \x3\().16b, \x3\().16b, v19.16b + eor \x4\().16b, \x4\().16b, v20.16b + eor \x5\().16b, \x5\().16b, v21.16b + eor \x6\().16b, \x6\().16b, v22.16b + eor \x7\().16b, \x7\().16b, v23.16b + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask + tbl \x0\().16b, {\x0\().16b}, \mask\().16b + tbl \x1\().16b, {\x1\().16b}, \mask\().16b + tbl \x2\().16b, {\x2\().16b}, \mask\().16b + tbl \x3\().16b, {\x3\().16b}, \mask\().16b + tbl \x4\().16b, {\x4\().16b}, \mask\().16b + tbl \x5\().16b, {\x5\().16b}, \mask\().16b + tbl \x6\().16b, {\x6\().16b}, \mask\().16b + tbl \x7\().16b, {\x7\().16b}, \mask\().16b + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + ext \t0\().16b, \x0\().16b, \x0\().16b, #12 + ext \t1\().16b, \x1\().16b, \x1\().16b, #12 + eor \x0\().16b, \x0\().16b, \t0\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #12 + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #12 + eor \x2\().16b, \x2\().16b, \t2\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #12 + eor \x3\().16b, \x3\().16b, \t3\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #12 + eor \x4\().16b, \x4\().16b, \t4\().16b + ext \t6\().16b, \x6\().16b, \x6\().16b, #12 + eor \x5\().16b, \x5\().16b, \t5\().16b + ext \t7\().16b, \x7\().16b, \x7\().16b, #12 + eor \x6\().16b, \x6\().16b, \t6\().16b + eor \t1\().16b, \t1\().16b, \x0\().16b + eor \x7\().16b, \x7\().16b, \t7\().16b + ext \x0\().16b, \x0\().16b, \x0\().16b, #8 + eor \t2\().16b, \t2\().16b, \x1\().16b + eor \t0\().16b, \t0\().16b, \x7\().16b + eor \t1\().16b, \t1\().16b, \x7\().16b + ext \x1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t5\().16b, \t5\().16b, \x4\().16b + eor \x0\().16b, \x0\().16b, \t0\().16b + eor \t6\().16b, \t6\().16b, \x5\().16b + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t0\().16b, \x4\().16b, \x4\().16b, #8 + eor \t4\().16b, \t4\().16b, \x3\().16b + ext \t1\().16b, \x5\().16b, \x5\().16b, #8 + eor \t7\().16b, \t7\().16b, \x6\().16b + ext \x4\().16b, \x3\().16b, \x3\().16b, #8 + eor \t3\().16b, \t3\().16b, \x2\().16b + ext \x5\().16b, \x7\().16b, \x7\().16b, #8 + eor \t4\().16b, \t4\().16b, \x7\().16b + ext \x3\().16b, \x6\().16b, \x6\().16b, #8 + eor \t3\().16b, \t3\().16b, \x7\().16b + ext \x6\().16b, \x2\().16b, \x2\().16b, #8 + eor \x7\().16b, \t1\().16b, \t5\().16b + .ifb \inv + eor \x2\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t3\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t2\().16b + .else + eor \t3\().16b, \t3\().16b, \x4\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x2\().16b, \x3\().16b, \t6\().16b + eor \x3\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x6\().16b, \t2\().16b + mov \x6\().16b, \t3\().16b + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + ext \t0\().16b, \x0\().16b, \x0\().16b, #8 + ext \t6\().16b, \x6\().16b, \x6\().16b, #8 + ext \t7\().16b, \x7\().16b, \x7\().16b, #8 + eor \t0\().16b, \t0\().16b, \x0\().16b + ext \t1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t6\().16b, \t6\().16b, \x6\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #8 + eor \t7\().16b, \t7\().16b, \x7\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #8 + eor \t1\().16b, \t1\().16b, \x1\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #8 + eor \t2\().16b, \t2\().16b, \x2\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #8 + eor \t3\().16b, \t3\().16b, \x3\().16b + eor \t4\().16b, \t4\().16b, \x4\().16b + eor \t5\().16b, \t5\().16b, \x5\().16b + eor \x0\().16b, \x0\().16b, \t6\().16b + eor \x1\().16b, \x1\().16b, \t6\().16b + eor \x2\().16b, \x2\().16b, \t0\().16b + eor \x4\().16b, \x4\().16b, \t2\().16b + eor \x3\().16b, \x3\().16b, \t1\().16b + eor \x1\().16b, \x1\().16b, \t7\().16b + eor \x2\().16b, \x2\().16b, \t7\().16b + eor \x4\().16b, \x4\().16b, \t6\().16b + eor \x5\().16b, \x5\().16b, \t3\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t7\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x7\().16b, \x7\().16b, \t5\().16b + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + ushr \t0\().2d, \b0\().2d, #\n + ushr \t1\().2d, \b1\().2d, #\n + eor \t0\().16b, \t0\().16b, \a0\().16b + eor \t1\().16b, \t1\().16b, \a1\().16b + and \t0\().16b, \t0\().16b, \mask\().16b + and \t1\().16b, \t1\().16b, \mask\().16b + eor \a0\().16b, \a0\().16b, \t0\().16b + shl \t0\().2d, \t0\().2d, #\n + eor \a1\().16b, \a1\().16b, \t1\().16b + shl \t1\().2d, \t1\().2d, #\n + eor \b0\().16b, \b0\().16b, \t0\().16b + eor \b1\().16b, \b1\().16b, \t1\().16b + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + movi \t0\().16b, #0x55 + movi \t1\().16b, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + movi \t0\().16b, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + + .align 6 +M0: .octa 0x0004080c0105090d02060a0e03070b0f + +M0SR: .octa 0x0004080c05090d010a0e02060f03070b +SR: .octa 0x0f0e0d0c0a09080b0504070600030201 +SRM0: .octa 0x01060b0c0207080d0304090e00050a0f + +M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 +ISR: .octa 0x0f0e0d0c080b0a090504070602010003 +ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + ld1 {v7.4s}, [x1], #16 // load round 0 key + ld1 {v17.4s}, [x1], #16 // load round 1 key + + movi v8.16b, #0x01 // bit masks + movi v9.16b, #0x02 + movi v10.16b, #0x04 + movi v11.16b, #0x08 + movi v12.16b, #0x10 + movi v13.16b, #0x20 + movi v14.16b, #0x40 + movi v15.16b, #0x80 + ldr q16, M0 + + sub x2, x2, #1 + str q7, [x0], #16 // save round 0 key + +.Lkey_loop: + tbl v7.16b ,{v17.16b}, v16.16b + ld1 {v17.4s}, [x1], #16 // load next round key + + cmtst v0.16b, v7.16b, v8.16b + cmtst v1.16b, v7.16b, v9.16b + cmtst v2.16b, v7.16b, v10.16b + cmtst v3.16b, v7.16b, v11.16b + cmtst v4.16b, v7.16b, v12.16b + cmtst v5.16b, v7.16b, v13.16b + cmtst v6.16b, v7.16b, v14.16b + cmtst v7.16b, v7.16b, v15.16b + not v0.16b, v0.16b + not v1.16b, v1.16b + not v5.16b, v5.16b + not v6.16b, v6.16b + + subs x2, x2, #1 + stp q0, q1, [x0], #128 + stp q2, q3, [x0, #-96] + stp q4, q5, [x0, #-64] + stp q6, q7, [x0, #-32] + b.ne .Lkey_loop + + movi v7.16b, #0x63 // compose .L63 + eor v17.16b, v17.16b, v7.16b + str q17, [x0] + ret +ENDPROC(aesbs_convert_key) + + .align 4 +aesbs_encrypt8: + ldr q9, [bskey], #16 // round 0 key + ldr q8, M0SR + ldr q24, SR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Lenc_sbox + +.Lenc_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Lenc_sbox: + sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Lenc_done + + enc_next_rk + + mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + add_round_key v0, v1, v2, v3, v4, v5, v6, v7 + + b.ne .Lenc_loop + ldr q24, SRM0 + b .Lenc_loop + +.Lenc_done: + ldr q12, [bskey] // last round key + + bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_encrypt8) + + .align 4 +aesbs_decrypt8: + lsl x9, rounds, #7 + add bskey, bskey, x9 + + ldr q9, [bskey, #-112]! // round 0 key + ldr q8, M0ISR + ldr q24, ISR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Ldec_sbox + +.Ldec_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Ldec_sbox: + inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Ldec_done + + dec_next_rk + + add_round_key v0, v1, v6, v4, v2, v7, v3, v5 + + inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + b.ne .Ldec_loop + ldr q24, ISRM0 + b .Ldec_loop +.Ldec_done: + ldr q12, [bskey, #-16] // last round key + + bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x5, #1 + lsl x5, x5, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x5, x5, xzr, mi + + ld1 {v0.16b}, [x1], #16 + tbnz x5, #1, 0f + ld1 {v1.16b}, [x1], #16 + tbnz x5, #2, 0f + ld1 {v2.16b}, [x1], #16 + tbnz x5, #3, 0f + ld1 {v3.16b}, [x1], #16 + tbnz x5, #4, 0f + ld1 {v4.16b}, [x1], #16 + tbnz x5, #5, 0f + ld1 {v5.16b}, [x1], #16 + tbnz x5, #6, 0f + ld1 {v6.16b}, [x1], #16 + tbnz x5, #7, 0f + ld1 {v7.16b}, [x1], #16 + +0: mov bskey, x2 + mov rounds, x3 + bl \do8 + + st1 {\o0\().16b}, [x0], #16 + tbnz x5, #1, 1f + st1 {\o1\().16b}, [x0], #16 + tbnz x5, #2, 1f + st1 {\o2\().16b}, [x0], #16 + tbnz x5, #3, 1f + st1 {\o3\().16b}, [x0], #16 + tbnz x5, #4, 1f + st1 {\o4\().16b}, [x0], #16 + tbnz x5, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x5, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x5, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: ldp x29, x30, [sp], #16 + ret + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + mov v25.16b, v0.16b + tbnz x6, #1, 0f + ld1 {v1.16b}, [x1], #16 + mov v26.16b, v1.16b + tbnz x6, #2, 0f + ld1 {v2.16b}, [x1], #16 + mov v27.16b, v2.16b + tbnz x6, #3, 0f + ld1 {v3.16b}, [x1], #16 + mov v28.16b, v3.16b + tbnz x6, #4, 0f + ld1 {v4.16b}, [x1], #16 + mov v29.16b, v4.16b + tbnz x6, #5, 0f + ld1 {v5.16b}, [x1], #16 + mov v30.16b, v5.16b + tbnz x6, #6, 0f + ld1 {v6.16b}, [x1], #16 + mov v31.16b, v6.16b + tbnz x6, #7, 0f + ld1 {v7.16b}, [x1] + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_decrypt8 + + ld1 {v24.16b}, [x5] // load IV + + eor v1.16b, v1.16b, v25.16b + eor v6.16b, v6.16b, v26.16b + eor v4.16b, v4.16b, v27.16b + eor v2.16b, v2.16b, v28.16b + eor v7.16b, v7.16b, v29.16b + eor v0.16b, v0.16b, v24.16b + eor v3.16b, v3.16b, v30.16b + eor v5.16b, v5.16b, v31.16b + + st1 {v0.16b}, [x0], #16 + mov v24.16b, v25.16b + tbnz x6, #1, 1f + st1 {v1.16b}, [x0], #16 + mov v24.16b, v26.16b + tbnz x6, #2, 1f + st1 {v6.16b}, [x0], #16 + mov v24.16b, v27.16b + tbnz x6, #3, 1f + st1 {v4.16b}, [x0], #16 + mov v24.16b, v28.16b + tbnz x6, #4, 1f + st1 {v2.16b}, [x0], #16 + mov v24.16b, v29.16b + tbnz x6, #5, 1f + st1 {v7.16b}, [x0], #16 + mov v24.16b, v30.16b + tbnz x6, #6, 1f + st1 {v3.16b}, [x0], #16 + mov v24.16b, v31.16b + tbnz x6, #7, 1f + ld1 {v24.16b}, [x1], #16 + st1 {v5.16b}, [x0], #16 +1: st1 {v24.16b}, [x5] // store IV + + cbnz x4, 99b + + ldp x29, x30, [sp], #16 + ret +ENDPROC(aesbs_cbc_decrypt) + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + + .align 4 +.Lxts_mul_x: +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_crypt8: + mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + next_tweak v26, v25, v30, v31 + eor v0.16b, v0.16b, v25.16b + tbnz x6, #1, 0f + + ld1 {v1.16b}, [x1], #16 + next_tweak v27, v26, v30, v31 + eor v1.16b, v1.16b, v26.16b + tbnz x6, #2, 0f + + ld1 {v2.16b}, [x1], #16 + next_tweak v28, v27, v30, v31 + eor v2.16b, v2.16b, v27.16b + tbnz x6, #3, 0f + + ld1 {v3.16b}, [x1], #16 + next_tweak v29, v28, v30, v31 + eor v3.16b, v3.16b, v28.16b + tbnz x6, #4, 0f + + ld1 {v4.16b}, [x1], #16 + str q29, [sp, #16] + eor v4.16b, v4.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #5, 0f + + ld1 {v5.16b}, [x1], #16 + str q29, [sp, #32] + eor v5.16b, v5.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #6, 0f + + ld1 {v6.16b}, [x1], #16 + str q29, [sp, #48] + eor v6.16b, v6.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #7, 0f + + ld1 {v7.16b}, [x1], #16 + str q29, [sp, #64] + eor v7.16b, v7.16b, v29.16b + next_tweak v29, v29, v30, v31 + +0: mov bskey, x2 + mov rounds, x3 + br x7 +ENDPROC(__xts_crypt8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-80]! + mov x29, sp + + ldr q30, .Lxts_mul_x + ld1 {v25.16b}, [x5] + +99: adr x7, \do8 + bl __xts_crypt8 + + ldp q16, q17, [sp, #16] + ldp q18, q19, [sp, #48] + + eor \o0\().16b, \o0\().16b, v25.16b + eor \o1\().16b, \o1\().16b, v26.16b + eor \o2\().16b, \o2\().16b, v27.16b + eor \o3\().16b, \o3\().16b, v28.16b + + st1 {\o0\().16b}, [x0], #16 + mov v25.16b, v26.16b + tbnz x6, #1, 1f + st1 {\o1\().16b}, [x0], #16 + mov v25.16b, v27.16b + tbnz x6, #2, 1f + st1 {\o2\().16b}, [x0], #16 + mov v25.16b, v28.16b + tbnz x6, #3, 1f + st1 {\o3\().16b}, [x0], #16 + mov v25.16b, v29.16b + tbnz x6, #4, 1f + + eor \o4\().16b, \o4\().16b, v16.16b + eor \o5\().16b, \o5\().16b, v17.16b + eor \o6\().16b, \o6\().16b, v18.16b + eor \o7\().16b, \o7\().16b, v19.16b + + st1 {\o4\().16b}, [x0], #16 + tbnz x6, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x6, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x6, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: st1 {v25.16b}, [x5] + ldp x29, x30, [sp], #80 + ret + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_xts_decrypt) + + .macro next_ctr, v + mov \v\().d[1], x8 + adds x8, x8, #1 + mov \v\().d[0], x7 + adc x7, x7, xzr + rev64 \v\().16b, \v\().16b + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[], bool final) + */ +ENTRY(aesbs_ctr_encrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + add x4, x4, x6 // do one extra block if final + + ldp x7, x8, [x5] + ld1 {v0.16b}, [x5] +CPU_LE( rev x7, x7 ) +CPU_LE( rev x8, x8 ) + adds x8, x8, #1 + adc x7, x7, xzr + +99: mov x9, #1 + lsl x9, x9, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x9, x9, xzr, le + + next_ctr v1 + next_ctr v2 + next_ctr v3 + next_ctr v4 + next_ctr v5 + next_ctr v6 + next_ctr v7 + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_encrypt8 + + lsr x9, x9, x6 // disregard the extra block + tbnz x9, #0, 0f + + ld1 {v8.16b}, [x1], #16 + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x0], #16 + tbnz x9, #1, 1f + + ld1 {v9.16b}, [x1], #16 + eor v1.16b, v1.16b, v9.16b + st1 {v1.16b}, [x0], #16 + tbnz x9, #2, 2f + + ld1 {v10.16b}, [x1], #16 + eor v4.16b, v4.16b, v10.16b + st1 {v4.16b}, [x0], #16 + tbnz x9, #3, 3f + + ld1 {v11.16b}, [x1], #16 + eor v6.16b, v6.16b, v11.16b + st1 {v6.16b}, [x0], #16 + tbnz x9, #4, 4f + + ld1 {v12.16b}, [x1], #16 + eor v3.16b, v3.16b, v12.16b + st1 {v3.16b}, [x0], #16 + tbnz x9, #5, 5f + + ld1 {v13.16b}, [x1], #16 + eor v7.16b, v7.16b, v13.16b + st1 {v7.16b}, [x0], #16 + tbnz x9, #6, 6f + + ld1 {v14.16b}, [x1], #16 + eor v2.16b, v2.16b, v14.16b + st1 {v2.16b}, [x0], #16 + tbnz x9, #7, 7f + + ld1 {v15.16b}, [x1], #16 + eor v5.16b, v5.16b, v15.16b + st1 {v5.16b}, [x0], #16 + + next_ctr v0 + cbnz x4, 99b + +0: st1 {v0.16b}, [x5] +8: ldp x29, x30, [sp], #16 + ret + + /* + * If we are handling the tail of the input (x6 == 1), return the + * final keystream block back to the caller via the IV buffer. + */ +1: cbz x6, 8b + st1 {v1.16b}, [x5] + b 8b +2: cbz x6, 8b + st1 {v4.16b}, [x5] + b 8b +3: cbz x6, 8b + st1 {v6.16b}, [x5] + b 8b +4: cbz x6, 8b + st1 {v3.16b}, [x5] + b 8b +5: cbz x6, 8b + st1 {v7.16b}, [x5] + b 8b +6: cbz x6, 8b + st1 {v2.16b}, [x5] + b 8b +7: cbz x6, 8b + st1 {v5.16b}, [x5] + b 8b +ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..323dd76ae5f0 --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -0,0 +1,420 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/cbc.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/xts.h> +#include <linux/module.h> + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], bool final); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); + +struct aesbs_ctx { + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; + int rounds; +} __aligned(AES_BLOCK_SIZE); + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + __aes_arm64_encrypt(ctx->enc, dst, src, ctx->key.rounds); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + bool final = (walk.total % AES_BLOCK_SIZE) != 0; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = false; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + __aes_arm64_encrypt(ctx->twkey, walk.iv, walk.iv, ctx->key.rounds); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs", + .base.cra_priority = 250 - 1, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..13c85e272c2a --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-core.S @@ -0,0 +1,450 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> + + .text + .align 6 + +ENTRY(chacha20_block_xor_neon) + // x0: Input state matrix, s + // x1: 1 data block output, o + // x2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requires shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + adr x3, ROT8 + ld1 {v0.4s-v3.4s}, [x0] + ld1 {v8.4s-v11.4s}, [x0] + ld1 {v12.4s}, [x3] + + mov x3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + ext v1.16b, v1.16b, v1.16b, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + ext v3.16b, v3.16b, v3.16b, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + ext v1.16b, v1.16b, v1.16b, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + ext v3.16b, v3.16b, v3.16b, #4 + + subs x3, x3, #1 + b.ne .Ldoubleround + + ld1 {v4.16b-v7.16b}, [x2] + + // o0 = i0 ^ (x0 + s0) + add v0.4s, v0.4s, v8.4s + eor v0.16b, v0.16b, v4.16b + + // o1 = i1 ^ (x1 + s1) + add v1.4s, v1.4s, v9.4s + eor v1.16b, v1.16b, v5.16b + + // o2 = i2 ^ (x2 + s2) + add v2.4s, v2.4s, v10.4s + eor v2.16b, v2.16b, v6.16b + + // o3 = i3 ^ (x3 + s3) + add v3.4s, v3.4s, v11.4s + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1] + + ret +ENDPROC(chacha20_block_xor_neon) + + .align 6 +ENTRY(chacha20_4block_xor_neon) + // x0: Input state matrix, s + // x1: 4 data blocks output, o + // x2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + adr x3, CTRINC // ... and ROT8 + ld1 {v30.4s-v31.4s}, [x3] + + // x0..15[0-3] = s0..3[0..3] + mov x4, x0 + ld4r { v0.4s- v3.4s}, [x4], #16 + ld4r { v4.4s- v7.4s}, [x4], #16 + ld4r { v8.4s-v11.4s}, [x4], #16 + ld4r {v12.4s-v15.4s}, [x4] + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + mov x3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + rev32 v15.8h, v15.8h + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #12 + shl v5.4s, v17.4s, #12 + shl v6.4s, v18.4s, #12 + shl v7.4s, v19.4s, #12 + + sri v4.4s, v16.4s, #20 + sri v5.4s, v17.4s, #20 + sri v6.4s, v18.4s, #20 + sri v7.4s, v19.4s, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + tbl v15.16b, {v15.16b}, v31.16b + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #7 + shl v5.4s, v17.4s, #7 + shl v6.4s, v18.4s, #7 + shl v7.4s, v19.4s, #7 + + sri v4.4s, v16.4s, #25 + sri v5.4s, v17.4s, #25 + sri v6.4s, v18.4s, #25 + sri v7.4s, v19.4s, #25 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + rev32 v15.8h, v15.8h + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #12 + shl v6.4s, v17.4s, #12 + shl v7.4s, v18.4s, #12 + shl v4.4s, v19.4s, #12 + + sri v5.4s, v16.4s, #20 + sri v6.4s, v17.4s, #20 + sri v7.4s, v18.4s, #20 + sri v4.4s, v19.4s, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + tbl v15.16b, {v15.16b}, v31.16b + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #7 + shl v6.4s, v17.4s, #7 + shl v7.4s, v18.4s, #7 + shl v4.4s, v19.4s, #7 + + sri v5.4s, v16.4s, #25 + sri v6.4s, v17.4s, #25 + sri v7.4s, v18.4s, #25 + sri v4.4s, v19.4s, #25 + + subs x3, x3, #1 + b.ne .Ldoubleround4 + + ld4r {v16.4s-v19.4s}, [x0], #16 + ld4r {v20.4s-v23.4s}, [x0], #16 + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] + add v0.4s, v0.4s, v16.4s + add v1.4s, v1.4s, v17.4s + add v2.4s, v2.4s, v18.4s + add v3.4s, v3.4s, v19.4s + + ld4r {v24.4s-v27.4s}, [x0], #16 + ld4r {v28.4s-v31.4s}, [x0] + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + add v4.4s, v4.4s, v20.4s + add v5.4s, v5.4s, v21.4s + add v6.4s, v6.4s, v22.4s + add v7.4s, v7.4s, v23.4s + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + add v8.4s, v8.4s, v24.4s + add v9.4s, v9.4s, v25.4s + add v10.4s, v10.4s, v26.4s + add v11.4s, v11.4s, v27.4s + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + add v12.4s, v12.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v14.4s, v14.4s, v30.4s + add v15.4s, v15.4s, v31.4s + + // interleave 32-bit words in state n, n+1 + zip1 v16.4s, v0.4s, v1.4s + zip2 v17.4s, v0.4s, v1.4s + zip1 v18.4s, v2.4s, v3.4s + zip2 v19.4s, v2.4s, v3.4s + zip1 v20.4s, v4.4s, v5.4s + zip2 v21.4s, v4.4s, v5.4s + zip1 v22.4s, v6.4s, v7.4s + zip2 v23.4s, v6.4s, v7.4s + zip1 v24.4s, v8.4s, v9.4s + zip2 v25.4s, v8.4s, v9.4s + zip1 v26.4s, v10.4s, v11.4s + zip2 v27.4s, v10.4s, v11.4s + zip1 v28.4s, v12.4s, v13.4s + zip2 v29.4s, v12.4s, v13.4s + zip1 v30.4s, v14.4s, v15.4s + zip2 v31.4s, v14.4s, v15.4s + + // interleave 64-bit words in state n, n+2 + zip1 v0.2d, v16.2d, v18.2d + zip2 v4.2d, v16.2d, v18.2d + zip1 v8.2d, v17.2d, v19.2d + zip2 v12.2d, v17.2d, v19.2d + ld1 {v16.16b-v19.16b}, [x2], #64 + + zip1 v1.2d, v20.2d, v22.2d + zip2 v5.2d, v20.2d, v22.2d + zip1 v9.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + ld1 {v20.16b-v23.16b}, [x2], #64 + + zip1 v2.2d, v24.2d, v26.2d + zip2 v6.2d, v24.2d, v26.2d + zip1 v10.2d, v25.2d, v27.2d + zip2 v14.2d, v25.2d, v27.2d + ld1 {v24.16b-v27.16b}, [x2], #64 + + zip1 v3.2d, v28.2d, v30.2d + zip2 v7.2d, v28.2d, v30.2d + zip1 v11.2d, v29.2d, v31.2d + zip2 v15.2d, v29.2d, v31.2d + ld1 {v28.16b-v31.16b}, [x2] + + // xor with corresponding input, write to output + eor v16.16b, v16.16b, v0.16b + eor v17.16b, v17.16b, v1.16b + eor v18.16b, v18.16b, v2.16b + eor v19.16b, v19.16b, v3.16b + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v5.16b + st1 {v16.16b-v19.16b}, [x1], #64 + eor v22.16b, v22.16b, v6.16b + eor v23.16b, v23.16b, v7.16b + eor v24.16b, v24.16b, v8.16b + eor v25.16b, v25.16b, v9.16b + st1 {v20.16b-v23.16b}, [x1], #64 + eor v26.16b, v26.16b, v10.16b + eor v27.16b, v27.16b, v11.16b + eor v28.16b, v28.16b, v12.16b + st1 {v24.16b-v27.16b}, [x1], #64 + eor v29.16b, v29.16b, v13.16b + eor v30.16b, v30.16b, v14.16b + eor v31.16b, v31.16b, v15.16b + st1 {v28.16b-v31.16b}, [x1] + + ret +ENDPROC(chacha20_4block_xor_neon) + +CTRINC: .word 0, 1, 2, 3 +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..a7f2337d46cf --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -0,0 +1,127 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/algapi.h> +#include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h new file mode 100644 index 000000000000..df411f3e083c --- /dev/null +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -0,0 +1,65 @@ +#ifndef __ASM_ASM_UACCESS_H +#define __ASM_ASM_UACCESS_H + +#include <asm/alternative.h> +#include <asm/kernel-pgtable.h> +#include <asm/sysreg.h> +#include <asm/assembler.h> + +/* + * User access enabling/disabling macros. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + .macro __uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro __uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_disable, tmp1 +alternative_if_not ARM64_HAS_PAN + __uaccess_ttbr0_disable \tmp1 +alternative_else_nop_endif + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + __uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else_nop_endif + .endm +#else + .macro uaccess_ttbr0_disable, tmp1 + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 + .endm +#endif + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 + uaccess_ttbr0_disable \tmp1 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(1) +alternative_else_nop_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 + uaccess_ttbr0_enable \tmp1, \tmp2 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(0) +alternative_else_nop_endif + .endm + +#endif diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h index f2bcbe2d9889..86c404171305 100644 --- a/arch/arm64/include/asm/current.h +++ b/arch/arm64/include/asm/current.h @@ -9,9 +9,17 @@ struct task_struct; +/* + * We don't use read_sysreg() as we want the compiler to cache the value where + * possible. + */ static __always_inline struct task_struct *get_current(void) { - return (struct task_struct *)read_sysreg(sp_el0); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct task_struct *)sp_el0; } #define current get_current() diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index d26750ca6e06..46da3ea638bb 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -22,8 +22,6 @@ #include <asm/kernel-pgtable.h> #include <asm/sysreg.h> -#ifndef __ASSEMBLY__ - /* * User space memory access functions */ @@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#else /* __ASSEMBLY__ */ - -#include <asm/assembler.h> - -/* - * User access enabling/disabling macros. - */ -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - .macro __uaccess_ttbr0_disable, tmp1 - mrs \tmp1, ttbr1_el1 // swapper_pg_dir - add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir - msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb - .endm - - .macro __uaccess_ttbr0_enable, tmp1 - get_thread_info \tmp1 - ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 - msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 - isb - .endm - - .macro uaccess_ttbr0_disable, tmp1 -alternative_if_not ARM64_HAS_PAN - __uaccess_ttbr0_disable \tmp1 -alternative_else_nop_endif - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 -alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 -alternative_else_nop_endif - .endm -#else - .macro uaccess_ttbr0_disable, tmp1 - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 - .endm -#endif - -/* - * These macros are no-ops when UAO is present. - */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(1) -alternative_else_nop_endif - .endm - - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(0) -alternative_else_nop_endif - .endm - -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7504f40d7ee..923841ffe4a9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -31,7 +31,7 @@ #include <asm/memory.h> #include <asm/ptrace.h> #include <asm/thread_info.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> #include <asm/unistd.h> /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index add4a1334085..e88fb99c1561 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,7 +17,7 @@ */ #include <linux/linkage.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> .text diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index fd6cd05593f9..4b5d826895ff 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -17,7 +17,7 @@ #include <linux/linkage.h> #include <asm/cache.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index d828540ded6f..47184c3a97da 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -19,7 +19,7 @@ #include <linux/linkage.h> #include <asm/cache.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> /* * Copy from user space to user space (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 3e6ae2663b82..351f0766f7a6 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -17,7 +17,7 @@ #include <linux/linkage.h> #include <asm/cache.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 17f422a4dc55..83c27b6e6dca 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -23,7 +23,7 @@ #include <asm/assembler.h> #include <asm/cpufeature.h> #include <asm/alternative.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> /* * flush_icache_range(start,end) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 290a84f3351f..e04082700bb1 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -524,7 +524,8 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; return atomic_pool_init(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a78a5c401806..156169c6981b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -88,21 +88,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); - printk(", *pud=%016llx", pud_val(*pud)); + pr_cont(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; pmd = pmd_offset(pud, addr); - printk(", *pmd=%016llx", pmd_val(*pmd)); + pr_cont(", *pmd=%016llx", pmd_val(*pmd)); if (pmd_none(*pmd) || pmd_bad(*pmd)) break; pte = pte_offset_map(pmd, addr); - printk(", *pte=%016llx", pte_val(*pte)); + pr_cont(", *pte=%016llx", pte_val(*pte)); pte_unmap(pte); } while(0); - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_ARM64_HW_AFDBM diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 212c4d1e2f26..716d1226ba69 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -401,7 +401,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 47cf3f9d89ff..947830a459d2 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,7 +49,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> -#include <linux/uaccess.h> +#include <asm/asm-uaccess.h> #include <xen/interface/xen.h> diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index 6a02b3a3fa65..e92fb190e2d6 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -521,6 +521,9 @@ void *kvm_mips_build_exit(void *addr) uasm_i_and(&p, V0, V0, AT); uasm_i_lui(&p, AT, ST0_CU0 >> 16); uasm_i_or(&p, V0, V0, AT); +#ifdef CONFIG_64BIT + uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX); +#endif uasm_i_mtc0(&p, V0, C0_STATUS); uasm_i_ehb(&p); @@ -643,7 +646,7 @@ static void *kvm_mips_build_ret_to_guest(void *addr) /* Setup status register for running guest in UM */ uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); - UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX)); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX)); uasm_i_and(&p, V1, V1, AT); uasm_i_mtc0(&p, V1, C0_STATUS); uasm_i_ehb(&p); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 06a60b19acfb..29ec9ab3fd55 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -360,8 +360,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); /* Invalidate the icache for these ranges */ - local_flush_icache_range((unsigned long)gebase, - (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); + flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); /* * Allocate comm page for guest kernel, a TLB will be reserved for diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index ef31fc24344e..552544616b9d 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -44,6 +44,8 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = LOAD_BASE ; + _text = .; + /* _s_kernel_ro must be page aligned */ . = ALIGN(PAGE_SIZE); _s_kernel_ro = .; diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 7581330ea35b..88fe0aad4390 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -49,7 +49,6 @@ struct thread_info { #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index da0d9cb63403..1e22f981cd81 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -235,9 +235,26 @@ void __init time_init(void) cr16_hz = 100 * PAGE0->mem_10msec; /* Hz */ - /* register at clocksource framework */ - clocksource_register_hz(&clocksource_cr16, cr16_hz); - /* register as sched_clock source */ sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz); } + +static int __init init_cr16_clocksource(void) +{ + /* + * The cr16 interval timers are not syncronized across CPUs, so mark + * them unstable and lower rating on SMP systems. + */ + if (num_online_cpus() > 1) { + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + } + + /* register at clocksource framework */ + clocksource_register_hz(&clocksource_cr16, + 100 * PAGE0->mem_10msec); + + return 0; +} + +device_initcall(init_cr16_clocksource); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 8ff9253930af..1a0b4f63f0e9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -234,7 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, tsk->comm, code, address); print_vma_addr(KERN_CONT " in ", regs->iaoq[0]); - pr_cont(" trap #%lu: %s%c", code, trap_name(code), + pr_cont("\ntrap #%lu: %s%c", code, trap_name(code), vma ? ',':'\n'); if (vma) diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..2c3413b0ca52 --- /dev/null +++ b/arch/s390/include/asm/asm-prototypes.h @@ -0,0 +1,8 @@ +#ifndef _ASM_S390_PROTOTYPES_H + +#include <linux/kvm_host.h> +#include <linux/ftrace.h> +#include <asm/fpu/api.h> +#include <asm-generic/asm-prototypes.h> + +#endif /* _ASM_S390_PROTOTYPES_H */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 6b246aadf311..1b5c5ee9fc1b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -94,7 +94,7 @@ static void update_mt_scaling(void) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) +static int do_account_vtime(struct task_struct *tsk) { u64 timer, clock, user, system, steal; u64 user_scaled, system_scaled; @@ -138,7 +138,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) } account_user_time(tsk, user); tsk->utimescaled += user_scaled; - account_system_time(tsk, hardirq_offset, system); + account_system_time(tsk, 0, system); tsk->stimescaled += system_scaled; steal = S390_lowcore.steal_timer; @@ -152,7 +152,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) void vtime_task_switch(struct task_struct *prev) { - do_account_vtime(prev, 0); + do_account_vtime(prev); prev->thread.user_timer = S390_lowcore.user_timer; prev->thread.system_timer = S390_lowcore.system_timer; S390_lowcore.user_timer = current->thread.user_timer; @@ -166,7 +166,7 @@ void vtime_task_switch(struct task_struct *prev) */ void vtime_account_user(struct task_struct *tsk) { - if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + if (do_account_vtime(tsk)) virt_timer_expire(); } diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..3c465184ff8a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -46,28 +46,49 @@ #ifdef __x86_64__ -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 .align 16 .Lgf128mul_x_ble_mask: .octa 0x00000000000000010000000000000087 +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 POLY: .octa 0xC2000000000000000000000000000001 +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, -# and ZERO should follow ALL_F - +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst16.MASK1, "aM", @progbits, 16 +.align 16 MASK1: .octa 0x0000000000000000ffffffffffffffff +.section .rodata.cst16.MASK2, "aM", @progbits, 16 +.align 16 MASK2: .octa 0xffffffffffffffff0000000000000000 -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 +.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 +.align 16 F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +.section .rodata.cst16.dec, "aM", @progbits, 16 +.align 16 dec: .octa 0x1 +.section .rodata.cst16.enc, "aM", @progbits, 16 +.align 16 enc: .octa 0x2 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 522ab68d1c88..d664382c6e56 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -122,23 +122,39 @@ #include <linux/linkage.h> #include <asm/inst.h> -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 - POLY: .octa 0xC2000000000000000000000000000001 + +.section .rodata.cst16.POLY2, "aM", @progbits, 16 +.align 16 POLY2: .octa 0xC20000000000000000000001C2000000 -TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 +TWOONE: .octa 0x00000001000000000000000000000001 +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 + +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 + +.section .rodata.cst16.ONEf, "aM", @progbits, 16 +.align 16 ONEf: .octa 0x01000000000000000000000000000000 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 7ff1b0c86a8e..93de8ea51548 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); @@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..f7c495e2863c 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 16(rio); \ vmovdqu y7, 15 * 16(rio); -.data + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 .align 16 #define SHUFB_BYTES(idx) \ @@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 /* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 .L0f0f0f0f: .long 0x0f0f0f0f diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..eee5b3982cfd 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 32(rio); \ vmovdqu y7, 15 * 32(rio); -.data -.align 32 +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 #define SHUFB_BYTES(idx) \ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) - .Lshufb_16x16b: .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) +.section .rodata.cst32.pack_bswap, "aM", @progbits, 32 +.align 32 .Lpack_bswap: .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + /* For CTR-mode IV byteswap */ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 /* 4-bit mask */ .L0f0f0f0f: diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 14fa1966bf01..b4a8806234ea 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -195,19 +195,29 @@ vpshufb rmask, x0, x0; \ vpshufb rmask, x1, x1; -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16 +.align 16 .Lbswap_iv_mask: .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 +.section .rodata.cst4.32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index c419389889cd..952d3156a933 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -225,8 +225,7 @@ vpshufb rmask, x2, x2; \ vpshufb rmask, x3, x3; -.data - +.section .rodata.cst16, "aM", @progbits, 16 .align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 @@ -244,10 +243,19 @@ .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 .Lrkr_dec_QBAR_QBAR_QBAR_QBAR: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.L16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 + +.section .rodata.cst4.L32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 + +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S index 16694e625f77..3a2dc3dc6cac 100644 --- a/arch/x86/crypto/chacha20-avx2-x86_64.S +++ b/arch/x86/crypto/chacha20-avx2-x86_64.S @@ -11,13 +11,18 @@ #include <linux/linkage.h> -.data +.section .rodata.cst32.ROT8, "aM", @progbits, 32 .align 32 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 CTRINC: .octa 0x00000003000000020000000100000000 .octa 0x00000007000000060000000500000004 diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S index 3a33124e9112..3f511a7d73b8 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S @@ -11,11 +11,14 @@ #include <linux/linkage.h> -.data +.section .rodata.cst16.ROT8, "aM", @progbits, 16 .align 16 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +.section .rodata.cst16.CTRINC, "aM", @progbits, 16 +.align 16 CTRINC: .octa 0x00000003000000020000000100000000 .text diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index f910d1d449f0..1e6af1b35f7b 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -11,7 +11,7 @@ #include <crypto/algapi.h> #include <crypto/chacha20.h> -#include <linux/crypto.h> +#include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -63,36 +63,37 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, } } -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int chacha20_simd(struct skcipher_request *req) { - u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *state, state_buf[16 + 2] __aligned(8); + struct skcipher_walk walk; int err; - if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(desc, dst, src, nbytes); + BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); - state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + err = skcipher_walk_done(&walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); @@ -100,27 +101,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-simd", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, }; static int __init chacha20_simd_mod_init(void) @@ -133,12 +129,12 @@ static int __init chacha20_simd_mod_init(void) boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_simd_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_simd_mod_init); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..7a7de27c6f41 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -312,7 +312,7 @@ do_return: ret ENDPROC(crc_pcl) -.section .rodata, "a", %progbits +.section .rodata, "a", @progbits ################################################################ ## jump table Table is 129 entries x 2 bytes each ################################################################ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 35e97569d05f..de04d3e98d8d 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -554,12 +554,11 @@ _only_less_than_2: ENDPROC(crc_t10dif_pcl) -.data - +.section .rodata, "a", @progbits +.align 16 # precomputed constants # these constants are precomputed from the poly: # 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 # Q = 0x18BB70000 # rk1 = 2^(32*3) mod Q << 32 # rk2 = 2^(32*5) mod Q << 32 @@ -613,14 +612,23 @@ rk20: +.section .rodata.cst16.mask1, "aM", @progbits, 16 +.align 16 mask1: .octa 0x80808080808080808080808080808080 + +.section .rodata.cst16.mask2, "aM", @progbits, 16 +.align 16 mask2: .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 +.align 32 pshufb_shf_table: # use these values for shift constants for the pshufb instruction # different alignments result in values as shown: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 038f6ae87c5e..f3e91647ca27 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) -.data +.section .rodata, "a", @progbits .align 16 .L_s1: .quad 0x0010100001010400, 0x0000000000000000 diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index eed55c8cca4f..f94375a8dcd1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -20,8 +20,7 @@ #include <asm/inst.h> #include <asm/frame.h> -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index eff2f414e22b..3b6e70d085da 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -11,11 +11,13 @@ #include <linux/linkage.h> -.data +.section .rodata.cst32.ANMASK, "aM", @progbits, 32 .align 32 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst32.ORMASK, "aM", @progbits, 32 +.align 32 ORMASK: .octa 0x00000000010000000000000001000000 .octa 0x00000000010000000000000001000000 diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 338c748054ed..c88c670cb5fc 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -11,10 +11,12 @@ #include <linux/linkage.h> -.data +.section .rodata.cst16.ANMASK, "aM", @progbits, 16 .align 16 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst16.ORMASK, "aM", @progbits, 16 +.align 16 ORMASK: .octa 0x00000000010000000000000001000000 .text diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 8be571808342..2925077f8c6a 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -29,11 +29,12 @@ .file "serpent-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 97c48add33ed..d67888f2a52a 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,13 +20,18 @@ .file "serpent-avx2-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_0: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_1: .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 96df6a39d7e2..93b945597ecf 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha1_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S index 63a0d9c8e31f..7a93b1c0d69a 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S @@ -203,8 +203,7 @@ return_null: ENDPROC(sha1_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S index c9dae1cd2919..20f77aa633de 100644 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S @@ -461,21 +461,32 @@ lloop: ENDPROC(sha1_x8_avx2) -.data - +.section .rodata.cst32.K00_19, "aM", @progbits, 32 .align 32 K00_19: .octa 0x5A8279995A8279995A8279995A827999 .octa 0x5A8279995A8279995A8279995A827999 + +.section .rodata.cst32.K20_39, "aM", @progbits, 32 +.align 32 K20_39: .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 + +.section .rodata.cst32.K40_59, "aM", @progbits, 32 +.align 32 K40_59: .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC + +.section .rodata.cst32.K60_79, "aM", @progbits, 32 +.align 32 K60_79: .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 874a651b9e7d..ebbdba72ae07 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform) ret ENDPROC(sha1_ni_transform) -.data - -.align 64 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 UPPER_WORD_MASK: .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 92b3b5d75ba9..e08888a1a5f2 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -463,7 +463,7 @@ done_hash: ret ENDPROC(sha256_transform_avx) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -483,14 +483,21 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 570ec5ec62d7..89c8f09787d2 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -723,7 +723,7 @@ done_hash: ret ENDPROC(sha256_transform_rorx) -.data +.section .rodata.cst512.K256, "aM", @progbits, 512 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -759,14 +759,21 @@ K256: .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 # shuffle xBxA -> 00BA +.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32 +.align 32 _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 # shuffle xDxC -> DC00 +.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32 +.align 32 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index a78a0694ddef..8fe6338bcc84 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha256_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S index 7ea670e25acc..b36ae7454084 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S @@ -208,8 +208,7 @@ return_null: ENDPROC(sha256_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S index aa21aea4c722..1687c80c5995 100644 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S @@ -437,7 +437,8 @@ Lrounds_16_xx: ret ENDPROC(sha256_x8_avx2) -.data + +.section .rodata.K256_8, "a", @progbits .align 64 K256_8: .octa 0x428a2f98428a2f98428a2f98428a2f98 @@ -568,10 +569,14 @@ K256_8: .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 .octa 0xc67178f2c67178f2c67178f2c67178f2 .octa 0xc67178f2c67178f2c67178f2c67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 .global K256 K256: diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 2cedc44e8121..39b83c93e7fd 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -474,7 +474,7 @@ done_hash: ret ENDPROC(sha256_transform_ssse3) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -494,13 +494,19 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 748cdf21a938..fb58f58ecfbc 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform) ret ENDPROC(sha256_ni_transform) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -349,5 +349,7 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 565274d6a641..39235fefe6f7 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 1f20b35d8573..7f5f6c6ec72e 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx) ######################################################################## ### Binary Data -.data +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 .align 64 # K[t] used in SHA512 hashing K512: @@ -730,14 +733,17 @@ K512: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 .align 32 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 +.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32 +.align 32 MASK_YMM_LO: .octa 0x00000000000000000000000000000000 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S index 3ddba19a0db6..7c629caebc05 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S @@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2) pop %rbx ret ENDPROC(sha512_mb_mgr_get_comp_job_avx2) -.data -.align 16 +.section .rodata.cst8.one, "aM", @progbits, 8 +.align 8 one: .quad 1 + +.section .rodata.cst8.two, "aM", @progbits, 8 +.align 8 two: .quad 2 + +.section .rodata.cst8.three, "aM", @progbits, 8 +.align 8 three: .quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S index 815f07bdd1f8..4ba709ba78e5 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S @@ -209,8 +209,9 @@ return_null: xor job_rax, job_rax jmp return ENDPROC(sha512_mb_mgr_submit_avx2) -.data +/* UNUSED? +.section .rodata.cst16, "aM", @progbits, 16 .align 16 H0: .int 0x6a09e667 H1: .int 0xbb67ae85 @@ -220,3 +221,4 @@ H4: .int 0x510e527f H5: .int 0x9b05688c H6: .int 0x1f83d9ab H7: .int 0x5be0cd19 +*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S index 31ab1eff6413..e22e907643a6 100644 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S @@ -361,7 +361,7 @@ Lrounds_16_xx: ret ENDPROC(sha512_x4_avx2) -.data +.section .rodata.K512_4, "a", @progbits .align 64 K512_4: .octa 0x428a2f98d728ae22428a2f98d728ae22,\ @@ -525,5 +525,7 @@ K512_4: .octa 0x6c44198c4a4758176c44198c4a475817,\ 0x6c44198c4a4758176c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index e610e29cbc81..66bbd9058a90 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index dc66273e610d..b3f49d286348 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -29,11 +29,13 @@ .file "twofish-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 68557f52b961..854022772c5b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } +static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1\n\t" + CC_SET(s) + : CC_OUT(s) (negative), ADDR + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; +} + +// Let everybody know we have it +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte + /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ffacfdcacb85..a5fd137417a2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = get_name(bank, NULL); int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index b47edb8f5256..410efb2c7b80 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -68,12 +68,10 @@ static struct dma_map_ops swiotlb_dma_ops = { */ int __init pci_swiotlb_detect_override(void) { - int use_swiotlb = swiotlb | swiotlb_force; - - if (swiotlb_force) + if (swiotlb_force == SWIOTLB_FORCE) swiotlb = 1; - return use_swiotlb; + return swiotlb; } IOMMU_INIT_FINISH(pci_swiotlb_detect_override, pci_xen_swiotlb_detect, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 24db5fb6f575..a236decb81e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -132,12 +132,6 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 -#define VMX_VPID_EXTENT_SUPPORTED_MASK \ - (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ - VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ - VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ - VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) - /* * Hyper-V requires all of these, so mark them as supported even though * they are just treated the same as all-context. @@ -10473,12 +10467,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) { nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); - goto out; + return 1; } if (vmcs12->vmcs_link_pointer != -1ull) { nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); - goto out; + return 1; } /* @@ -10498,7 +10492,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); - goto out; + return 1; } } @@ -10516,7 +10510,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); - goto out; + return 1; } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51ccfe08e32f..2f22810a7e0c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3070,6 +3070,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } +static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); + static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -3106,10 +3108,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { + u32 hflags = vcpu->arch.hflags; if (events->smi.smm) - vcpu->arch.hflags |= HF_SMM_MASK; + hflags |= HF_SMM_MASK; else - vcpu->arch.hflags &= ~HF_SMM_MASK; + hflags &= ~HF_SMM_MASK; + kvm_set_hflags(vcpu, hflags); + vcpu->arch.smi_pending = events->smi.pending; if (events->smi.smm_inside_nmi) vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index a9fafb5c8738..a0b36a9d5df1 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -48,7 +48,7 @@ int __init pci_xen_swiotlb_detect(void) * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force)) + if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8c394e30e5fe..f3f7b41116f7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void) size = PFN_PHYS(xen_start_info->nr_p2m_frames); } - if (!xen_is_e820_reserved(start, size)) { - memblock_reserve(start, size); + memblock_reserve(start, size); + if (!xen_is_e820_reserved(start, size)) return; - } #ifdef CONFIG_X86_32 /* @@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void) BUG(); #else xen_relocate_p2m(); + memblock_free(start, size); #endif } diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6e82769f4042..f0a9c07b4c7a 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -544,6 +544,8 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) + __releases(lock) + __acquires(lock) { struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd()); DEFINE_WAIT(wait); @@ -558,13 +560,12 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) if (may_queue(rwb, rqw, &wait, rw)) break; - if (lock) + if (lock) { spin_unlock_irq(lock); - - io_schedule(); - - if (lock) + io_schedule(); spin_lock_irq(lock); + } else + io_schedule(); } while (1); finish_wait(&rqw->wait, &wait); @@ -595,7 +596,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. */ -unsigned int wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) +enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) { unsigned int ret = 0; diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index d676fc59521a..d880a4897159 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/scatterwalk.h> @@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; @@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; diff --git a/crypto/acompress.c b/crypto/acompress.c index 887783d8e9a9..47d11627cd20 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -20,6 +20,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/internal/acompress.h> #include <crypto/internal/scompress.h> @@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/aead.c b/crypto/aead.c index 3f5c5ff004ab..f794b30a9407 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) { struct aead_alg *aead = container_of(alg, struct aead_alg, base); diff --git a/crypto/ahash.c b/crypto/ahash.c index 2ce8bcb9049c..e58c4970c22b 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : ahash\n"); diff --git a/crypto/akcipher.c b/crypto/akcipher.c index def301ed1288..cfbdb06d8ca8 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/crypto.h> +#include <linux/compiler.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> #include <net/netlink.h> @@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index d19b09cdf284..54fc90e8339c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a832426820e8..6c43a0a17a55 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -1,6 +1,6 @@ /* * Block chaining cipher operations. - * + * * Generic encrypt/decrypt wrapper for ciphers, handles operations across * multiple page boundaries by using temporary blocks. In user context, * the kernel is given a chance to schedule us once per page. @@ -9,7 +9,7 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : blkcipher\n"); diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 1cab83146e33..8b3c04d625c3 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -10,10 +10,9 @@ */ #include <crypto/algapi.h> -#include <linux/crypto.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/module.h> static inline u32 le32_to_cpuvp(const void *p) { @@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) } EXPORT_SYMBOL_GPL(crypto_chacha20_init); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize) { - struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); int i; if (keysize != CHACHA20_KEY_SIZE) @@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int crypto_chacha20_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; u32 state[16]; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + err = skcipher_walk_virt(&walk, req, true); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } + crypto_chacha20_init(state, ctx, walk.iv); - if (walk.nbytes) { + while (walk.nbytes > 0) { chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } return err; } EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha20_crypt, + .decrypt = crypto_chacha20_crypt, }; static int __init chacha20_generic_mod_init(void) { - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_generic_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_generic_mod_init); diff --git a/crypto/cts.c b/crypto/cts.c index 00254d76b21b..a1335d6c35fb 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -49,6 +49,7 @@ #include <linux/scatterlist.h> #include <crypto/scatterwalk.h> #include <linux/slab.h> +#include <linux/compiler.h> struct crypto_cts_ctx { struct crypto_skcipher *child; @@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; @@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; diff --git a/crypto/kpp.c b/crypto/kpp.c index d36ce05eee43..a90edc27af77 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -19,6 +19,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/kpp.h> #include <crypto/internal/kpp.h> @@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/pcbc.c b/crypto/pcbc.c index e4538e07f7ca..11d248673ad4 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/compiler.h> struct crypto_pcbc_ctx { struct crypto_cipher *child; @@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *iv = walk->iv; - u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32)))); + u8 tmpbuf[bsize] __aligned(__alignof__(u32)); do { memcpy(tmpbuf, src, bsize); diff --git a/crypto/rng.c b/crypto/rng.c index b81cffb13bab..f46dac5288b9 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : rng\n"); diff --git a/crypto/scompress.c b/crypto/scompress.c index 35e396d154b7..6b048b36312d 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/crypto.h> +#include <linux/compiler.h> #include <linux/vmalloc.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> @@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/shash.c b/crypto/shash.c index a051541a4a17..5e31c8d776df 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -19,6 +19,7 @@ #include <linux/seq_file.h> #include <linux/cryptouser.h> #include <net/netlink.h> +#include <linux/compiler.h> #include "internal.h" @@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey); static inline unsigned int shash_align_buffer_size(unsigned len, unsigned long mask) { - typedef u8 __attribute__ ((aligned)) u8_aligned; + typedef u8 __aligned_largest u8_aligned; return len + (mask & ~(__alignof__(u8_aligned) - 1)); } @@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned int unaligned_len = alignmask + 1 - ((unsigned long)data & alignmask); u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) struct shash_alg *shash = crypto_shash_alg(tfm); unsigned int ds = crypto_shash_digestsize(tfm); u8 ubuf[shash_align_buffer_size(ds, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) { struct shash_alg *salg = __crypto_shash_alg(alg); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 0e1e6c35188e..014af741fc6a 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -19,6 +19,7 @@ #include <crypto/scatterwalk.h> #include <linux/bug.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <linux/list.h> #include <linux/module.h> #include <linux/rtnetlink.h> @@ -185,12 +186,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err) data = p->data; if (!data) { data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); - data = skcipher_get_spot(data, walk->chunksize); + data = skcipher_get_spot(data, walk->stride); } scatterwalk_copychunks(data, &p->dst, p->len, 1); - if (offset_in_page(p->data) + p->len + walk->chunksize > + if (offset_in_page(p->data) + p->len + walk->stride > PAGE_SIZE) free_page((unsigned long)p->data); @@ -299,7 +300,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk) p->len = walk->nbytes; skcipher_queue_write(walk, p); - if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize > + if (offset_in_page(walk->page) + walk->nbytes + walk->stride > PAGE_SIZE) walk->page = NULL; else @@ -344,7 +345,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk) SKCIPHER_WALK_DIFF); n = walk->total; - bsize = min(walk->chunksize, max(n, walk->blocksize)); + bsize = min(walk->stride, max(n, walk->blocksize)); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); @@ -393,7 +394,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) unsigned a = crypto_tfm_ctx_alignment() - 1; unsigned alignmask = walk->alignmask; unsigned ivsize = walk->ivsize; - unsigned bs = walk->chunksize; + unsigned bs = walk->stride; unsigned aligned_bs; unsigned size; u8 *iv; @@ -463,7 +464,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, SKCIPHER_WALK_SLEEP : 0; walk->blocksize = crypto_skcipher_blocksize(tfm); - walk->chunksize = crypto_skcipher_chunksize(tfm); + walk->stride = crypto_skcipher_walksize(tfm); walk->ivsize = crypto_skcipher_ivsize(tfm); walk->alignmask = crypto_skcipher_alignmask(tfm); @@ -525,7 +526,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->blocksize = crypto_aead_blocksize(tfm); - walk->chunksize = crypto_aead_chunksize(tfm); + walk->stride = crypto_aead_chunksize(tfm); walk->ivsize = crypto_aead_ivsize(tfm); walk->alignmask = crypto_aead_alignmask(tfm); @@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst) } static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, @@ -821,6 +822,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "max keysize : %u\n", skcipher->max_keysize); seq_printf(m, "ivsize : %u\n", skcipher->ivsize); seq_printf(m, "chunksize : %u\n", skcipher->chunksize); + seq_printf(m, "walksize : %u\n", skcipher->walksize); } #ifdef CONFIG_NET @@ -893,11 +895,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; - if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8) + if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || + alg->walksize > PAGE_SIZE / 8) return -EINVAL; if (!alg->chunksize) alg->chunksize = base->cra_blocksize; + if (!alg->walksize) + alg->walksize = alg->chunksize; base->cra_type = &crypto_skcipher_type2; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index ae22f05d5936..9a11f3c2bf98 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -22,6 +22,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <crypto/aead.h> #include <crypto/hash.h> #include <crypto/skcipher.h> @@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg) { int ret; + pr_debug("testing %s\n", alg); + ret = alg_test(alg, alg, 0, 0); /* non-fips algs return -EINVAL in fips mode */ if (fips_enabled && ret == -EINVAL) @@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void) if (err) { printk(KERN_ERR "tcrypt: one or more tests failed!\n"); goto err_free_tv; + } else { + pr_debug("all tests passed\n"); } /* We intentionaly return -EAGAIN to prevent keeping the module, diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 44e888b0b041..98eb09782db8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -265,6 +265,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); + size_t digest_size = crypto_ahash_digestsize(tfm); unsigned int i, j, k, temp; struct scatterlist sg[8]; char *result; @@ -275,7 +276,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, char *xbuf[XBUFSIZE]; int ret = -ENOMEM; - result = kmalloc(MAX_DIGEST_SIZE, GFP_KERNEL); + result = kmalloc(digest_size, GFP_KERNEL); if (!result) return ret; key = kmalloc(MAX_KEYLEN, GFP_KERNEL); @@ -305,7 +306,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, goto out; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); hash_buff = xbuf[0]; hash_buff += align_offset; @@ -380,7 +381,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); temp = 0; sg_init_table(sg, template[i].np); @@ -458,7 +459,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); ret = -EINVAL; hash_buff = xbuf[0]; @@ -1463,13 +1464,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = ctemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, ctemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); @@ -1525,13 +1525,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = dtemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(dtemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, dtemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); @@ -2251,30 +2250,23 @@ static int alg_test_null(const struct alg_test_desc *desc, return 0; } +#define __VECS(tv) { .vecs = tv, .count = ARRAY_SIZE(tv) } + /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { - .cprng = { - .vecs = ansi_cprng_aes_tv_template, - .count = ANSI_CPRNG_AES_TEST_VECTORS - } + .cprng = __VECS(ansi_cprng_aes_tv_template) } }, { .alg = "authenc(hmac(md5),ecb(cipher_null))", .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = hmac_md5_ecb_cipher_null_enc_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS - }, - .dec = { - .vecs = hmac_md5_ecb_cipher_null_dec_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS - } + .enc = __VECS(hmac_md5_ecb_cipher_null_enc_tv_template), + .dec = __VECS(hmac_md5_ecb_cipher_null_dec_tv_template) } } }, { @@ -2282,12 +2274,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA1_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp) } } }, { @@ -2295,12 +2282,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des_cbc_enc_tv_temp) } } }, { @@ -2309,12 +2291,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2326,18 +2303,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_ecb_cipher_null_enc_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC - }, - .dec = { - .vecs = - hmac_sha1_ecb_cipher_null_dec_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC - } + .enc = __VECS(hmac_sha1_ecb_cipher_null_enc_tv_temp), + .dec = __VECS(hmac_sha1_ecb_cipher_null_dec_tv_temp) } } }, { @@ -2349,12 +2316,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des_cbc_enc_tv_temp) } } }, { @@ -2363,12 +2325,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2377,12 +2334,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA256_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_aes_cbc_enc_tv_temp) } } }, { @@ -2390,12 +2342,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des_cbc_enc_tv_temp) } } }, { @@ -2404,12 +2351,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2425,12 +2367,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des_cbc_enc_tv_temp) } } }, { @@ -2439,12 +2376,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2461,12 +2393,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA512_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_aes_cbc_enc_tv_temp) } } }, { @@ -2474,12 +2401,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des_cbc_enc_tv_temp) } } }, { @@ -2488,12 +2410,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2510,14 +2427,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_cbc_enc_tv_template, - .count = AES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_cbc_dec_tv_template, - .count = AES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(aes_cbc_enc_tv_template), + .dec = __VECS(aes_cbc_dec_tv_template) } } }, { @@ -2525,14 +2436,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_cbc_enc_tv_template, - .count = ANUBIS_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_cbc_dec_tv_template, - .count = ANUBIS_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_cbc_enc_tv_template), + .dec = __VECS(anubis_cbc_dec_tv_template) } } }, { @@ -2540,14 +2445,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_cbc_enc_tv_template, - .count = BF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_cbc_dec_tv_template, - .count = BF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(bf_cbc_enc_tv_template), + .dec = __VECS(bf_cbc_dec_tv_template) } } }, { @@ -2555,14 +2454,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_cbc_enc_tv_template, - .count = CAMELLIA_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_cbc_dec_tv_template, - .count = CAMELLIA_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_cbc_enc_tv_template), + .dec = __VECS(camellia_cbc_dec_tv_template) } } }, { @@ -2570,14 +2463,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_cbc_enc_tv_template, - .count = CAST5_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_cbc_dec_tv_template, - .count = CAST5_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_cbc_enc_tv_template), + .dec = __VECS(cast5_cbc_dec_tv_template) } } }, { @@ -2585,14 +2472,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_cbc_enc_tv_template, - .count = CAST6_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_cbc_dec_tv_template, - .count = CAST6_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_cbc_enc_tv_template), + .dec = __VECS(cast6_cbc_dec_tv_template) } } }, { @@ -2600,14 +2481,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_cbc_enc_tv_template, - .count = DES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_cbc_dec_tv_template, - .count = DES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des_cbc_enc_tv_template), + .dec = __VECS(des_cbc_dec_tv_template) } } }, { @@ -2616,14 +2491,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_cbc_enc_tv_template, - .count = DES3_EDE_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_cbc_dec_tv_template, - .count = DES3_EDE_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_cbc_enc_tv_template), + .dec = __VECS(des3_ede_cbc_dec_tv_template) } } }, { @@ -2631,14 +2500,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_cbc_enc_tv_template, - .count = SERPENT_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_cbc_dec_tv_template, - .count = SERPENT_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_cbc_enc_tv_template), + .dec = __VECS(serpent_cbc_dec_tv_template) } } }, { @@ -2646,14 +2509,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_cbc_enc_tv_template, - .count = TF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_cbc_dec_tv_template, - .count = TF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(tf_cbc_enc_tv_template), + .dec = __VECS(tf_cbc_dec_tv_template) } } }, { @@ -2662,14 +2519,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_enc_tv_template, - .count = AES_CCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_dec_tv_template, - .count = AES_CCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_enc_tv_template), + .dec = __VECS(aes_ccm_dec_tv_template) } } }, { @@ -2677,14 +2528,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, - .dec = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, + .enc = __VECS(chacha20_enc_tv_template), + .dec = __VECS(chacha20_enc_tv_template), } } }, { @@ -2692,20 +2537,14 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_cmac128_tv_template, - .count = CMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_cmac128_tv_template) } }, { .alg = "cmac(des3_ede)", .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = des3_ede_cmac64_tv_template, - .count = CMAC_DES3_EDE_TEST_VECTORS - } + .hash = __VECS(des3_ede_cmac64_tv_template) } }, { .alg = "compress_null", @@ -2714,30 +2553,21 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "crc32", .test = alg_test_hash, .suite = { - .hash = { - .vecs = crc32_tv_template, - .count = CRC32_TEST_VECTORS - } + .hash = __VECS(crc32_tv_template) } }, { .alg = "crc32c", .test = alg_test_crc32c, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crc32c_tv_template, - .count = CRC32C_TEST_VECTORS - } + .hash = __VECS(crc32c_tv_template) } }, { .alg = "crct10dif", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crct10dif_tv_template, - .count = CRCT10DIF_TEST_VECTORS - } + .hash = __VECS(crct10dif_tv_template) } }, { .alg = "ctr(aes)", @@ -2745,14 +2575,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_enc_tv_template, - .count = AES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_dec_tv_template, - .count = AES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_enc_tv_template), + .dec = __VECS(aes_ctr_dec_tv_template) } } }, { @@ -2760,14 +2584,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_ctr_enc_tv_template, - .count = BF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_ctr_dec_tv_template, - .count = BF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(bf_ctr_enc_tv_template), + .dec = __VECS(bf_ctr_dec_tv_template) } } }, { @@ -2775,14 +2593,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_ctr_enc_tv_template, - .count = CAMELLIA_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_ctr_dec_tv_template, - .count = CAMELLIA_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_ctr_enc_tv_template), + .dec = __VECS(camellia_ctr_dec_tv_template) } } }, { @@ -2790,14 +2602,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_ctr_enc_tv_template, - .count = CAST5_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_ctr_dec_tv_template, - .count = CAST5_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_ctr_enc_tv_template), + .dec = __VECS(cast5_ctr_dec_tv_template) } } }, { @@ -2805,14 +2611,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_ctr_enc_tv_template, - .count = CAST6_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_ctr_dec_tv_template, - .count = CAST6_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_ctr_enc_tv_template), + .dec = __VECS(cast6_ctr_dec_tv_template) } } }, { @@ -2820,14 +2620,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_ctr_enc_tv_template, - .count = DES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_ctr_dec_tv_template, - .count = DES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des_ctr_enc_tv_template), + .dec = __VECS(des_ctr_dec_tv_template) } } }, { @@ -2835,14 +2629,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_ctr_enc_tv_template, - .count = DES3_EDE_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_ctr_dec_tv_template, - .count = DES3_EDE_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_ctr_enc_tv_template), + .dec = __VECS(des3_ede_ctr_dec_tv_template) } } }, { @@ -2850,14 +2638,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_ctr_enc_tv_template, - .count = SERPENT_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_ctr_dec_tv_template, - .count = SERPENT_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_ctr_enc_tv_template), + .dec = __VECS(serpent_ctr_dec_tv_template) } } }, { @@ -2865,14 +2647,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_ctr_enc_tv_template, - .count = TF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_ctr_dec_tv_template, - .count = TF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(tf_ctr_enc_tv_template), + .dec = __VECS(tf_ctr_dec_tv_template) } } }, { @@ -2880,14 +2656,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cts_mode_enc_tv_template, - .count = CTS_MODE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cts_mode_dec_tv_template, - .count = CTS_MODE_DEC_TEST_VECTORS - } + .enc = __VECS(cts_mode_enc_tv_template), + .dec = __VECS(cts_mode_dec_tv_template) } } }, { @@ -2896,14 +2666,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = deflate_comp_tv_template, - .count = DEFLATE_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = deflate_decomp_tv_template, - .count = DEFLATE_DECOMP_TEST_VECTORS - } + .comp = __VECS(deflate_comp_tv_template), + .decomp = __VECS(deflate_decomp_tv_template) } } }, { @@ -2911,10 +2675,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = dh_tv_template, - .count = DH_TEST_VECTORS - } + .kpp = __VECS(dh_tv_template) } }, { .alg = "digest_null", @@ -2924,30 +2685,21 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes128_tv_template) } }, { .alg = "drbg_nopr_ctr_aes192", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes192_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes192_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes192_tv_template) } }, { .alg = "drbg_nopr_ctr_aes256", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes256_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes256_tv_template) } }, { /* @@ -2962,11 +2714,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_hmac_sha256_tv_template, - .count = - ARRAY_SIZE(drbg_nopr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_hmac_sha256_tv_template) } }, { /* covered by drbg_nopr_hmac_sha256 test */ @@ -2986,10 +2734,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_sha256_tv_template) } }, { /* covered by drbg_nopr_sha256 test */ @@ -3005,10 +2750,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_pr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_pr_ctr_aes128_tv_template) } }, { /* covered by drbg_pr_ctr_aes128 test */ @@ -3028,10 +2770,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_hmac_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_hmac_sha256_tv_template) } }, { /* covered by drbg_pr_hmac_sha256 test */ @@ -3051,10 +2790,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_sha256_tv_template) } }, { /* covered by drbg_pr_sha256 test */ @@ -3071,14 +2807,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_enc_tv_template, - .count = AES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_dec_tv_template, - .count = AES_DEC_TEST_VECTORS - } + .enc = __VECS(aes_enc_tv_template), + .dec = __VECS(aes_dec_tv_template) } } }, { @@ -3086,14 +2816,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_enc_tv_template, - .count = ANUBIS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_dec_tv_template, - .count = ANUBIS_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_enc_tv_template), + .dec = __VECS(anubis_dec_tv_template) } } }, { @@ -3101,14 +2825,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = arc4_enc_tv_template, - .count = ARC4_ENC_TEST_VECTORS - }, - .dec = { - .vecs = arc4_dec_tv_template, - .count = ARC4_DEC_TEST_VECTORS - } + .enc = __VECS(arc4_enc_tv_template), + .dec = __VECS(arc4_dec_tv_template) } } }, { @@ -3116,14 +2834,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_enc_tv_template, - .count = BF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_dec_tv_template, - .count = BF_DEC_TEST_VECTORS - } + .enc = __VECS(bf_enc_tv_template), + .dec = __VECS(bf_dec_tv_template) } } }, { @@ -3131,14 +2843,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_enc_tv_template, - .count = CAMELLIA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_dec_tv_template, - .count = CAMELLIA_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_enc_tv_template), + .dec = __VECS(camellia_dec_tv_template) } } }, { @@ -3146,14 +2852,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_enc_tv_template, - .count = CAST5_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_dec_tv_template, - .count = CAST5_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_enc_tv_template), + .dec = __VECS(cast5_dec_tv_template) } } }, { @@ -3161,14 +2861,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_enc_tv_template, - .count = CAST6_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_dec_tv_template, - .count = CAST6_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_enc_tv_template), + .dec = __VECS(cast6_dec_tv_template) } } }, { @@ -3179,14 +2873,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_enc_tv_template, - .count = DES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_dec_tv_template, - .count = DES_DEC_TEST_VECTORS - } + .enc = __VECS(des_enc_tv_template), + .dec = __VECS(des_dec_tv_template) } } }, { @@ -3195,14 +2883,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_enc_tv_template, - .count = DES3_EDE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_dec_tv_template, - .count = DES3_EDE_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_enc_tv_template), + .dec = __VECS(des3_ede_dec_tv_template) } } }, { @@ -3225,14 +2907,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = khazad_enc_tv_template, - .count = KHAZAD_ENC_TEST_VECTORS - }, - .dec = { - .vecs = khazad_dec_tv_template, - .count = KHAZAD_DEC_TEST_VECTORS - } + .enc = __VECS(khazad_enc_tv_template), + .dec = __VECS(khazad_dec_tv_template) } } }, { @@ -3240,14 +2916,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = seed_enc_tv_template, - .count = SEED_ENC_TEST_VECTORS - }, - .dec = { - .vecs = seed_dec_tv_template, - .count = SEED_DEC_TEST_VECTORS - } + .enc = __VECS(seed_enc_tv_template), + .dec = __VECS(seed_dec_tv_template) } } }, { @@ -3255,14 +2925,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_enc_tv_template, - .count = SERPENT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_dec_tv_template, - .count = SERPENT_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_enc_tv_template), + .dec = __VECS(serpent_dec_tv_template) } } }, { @@ -3270,14 +2934,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tea_enc_tv_template, - .count = TEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tea_dec_tv_template, - .count = TEA_DEC_TEST_VECTORS - } + .enc = __VECS(tea_enc_tv_template), + .dec = __VECS(tea_dec_tv_template) } } }, { @@ -3285,14 +2943,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tnepres_enc_tv_template, - .count = TNEPRES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tnepres_dec_tv_template, - .count = TNEPRES_DEC_TEST_VECTORS - } + .enc = __VECS(tnepres_enc_tv_template), + .dec = __VECS(tnepres_dec_tv_template) } } }, { @@ -3300,14 +2952,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_enc_tv_template, - .count = TF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_dec_tv_template, - .count = TF_DEC_TEST_VECTORS - } + .enc = __VECS(tf_enc_tv_template), + .dec = __VECS(tf_dec_tv_template) } } }, { @@ -3315,14 +2961,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xeta_enc_tv_template, - .count = XETA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xeta_dec_tv_template, - .count = XETA_DEC_TEST_VECTORS - } + .enc = __VECS(xeta_enc_tv_template), + .dec = __VECS(xeta_dec_tv_template) } } }, { @@ -3330,14 +2970,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xtea_enc_tv_template, - .count = XTEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xtea_dec_tv_template, - .count = XTEA_DEC_TEST_VECTORS - } + .enc = __VECS(xtea_enc_tv_template), + .dec = __VECS(xtea_dec_tv_template) } } }, { @@ -3345,10 +2979,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = ecdh_tv_template, - .count = ECDH_TEST_VECTORS - } + .kpp = __VECS(ecdh_tv_template) } }, { .alg = "gcm(aes)", @@ -3356,14 +2987,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_enc_tv_template, - .count = AES_GCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_dec_tv_template, - .count = AES_GCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_enc_tv_template), + .dec = __VECS(aes_gcm_dec_tv_template) } } }, { @@ -3371,136 +2996,94 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = ghash_tv_template, - .count = GHASH_TEST_VECTORS - } + .hash = __VECS(ghash_tv_template) } }, { .alg = "hmac(crc32)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = bfin_crc_tv_template, - .count = BFIN_CRC_TEST_VECTORS - } + .hash = __VECS(bfin_crc_tv_template) } }, { .alg = "hmac(md5)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_md5_tv_template, - .count = HMAC_MD5_TEST_VECTORS - } + .hash = __VECS(hmac_md5_tv_template) } }, { .alg = "hmac(rmd128)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd128_tv_template, - .count = HMAC_RMD128_TEST_VECTORS - } + .hash = __VECS(hmac_rmd128_tv_template) } }, { .alg = "hmac(rmd160)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd160_tv_template, - .count = HMAC_RMD160_TEST_VECTORS - } + .hash = __VECS(hmac_rmd160_tv_template) } }, { .alg = "hmac(sha1)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha1_tv_template, - .count = HMAC_SHA1_TEST_VECTORS - } + .hash = __VECS(hmac_sha1_tv_template) } }, { .alg = "hmac(sha224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha224_tv_template, - .count = HMAC_SHA224_TEST_VECTORS - } + .hash = __VECS(hmac_sha224_tv_template) } }, { .alg = "hmac(sha256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha256_tv_template, - .count = HMAC_SHA256_TEST_VECTORS - } + .hash = __VECS(hmac_sha256_tv_template) } }, { .alg = "hmac(sha3-224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_224_tv_template, - .count = HMAC_SHA3_224_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_224_tv_template) } }, { .alg = "hmac(sha3-256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_256_tv_template, - .count = HMAC_SHA3_256_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_256_tv_template) } }, { .alg = "hmac(sha3-384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_384_tv_template, - .count = HMAC_SHA3_384_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_384_tv_template) } }, { .alg = "hmac(sha3-512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_512_tv_template, - .count = HMAC_SHA3_512_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_512_tv_template) } }, { .alg = "hmac(sha384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha384_tv_template, - .count = HMAC_SHA384_TEST_VECTORS - } + .hash = __VECS(hmac_sha384_tv_template) } }, { .alg = "hmac(sha512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha512_tv_template, - .count = HMAC_SHA512_TEST_VECTORS - } + .hash = __VECS(hmac_sha512_tv_template) } }, { .alg = "jitterentropy_rng", @@ -3512,14 +3095,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_kw_enc_tv_template, - .count = ARRAY_SIZE(aes_kw_enc_tv_template) - }, - .dec = { - .vecs = aes_kw_dec_tv_template, - .count = ARRAY_SIZE(aes_kw_dec_tv_template) - } + .enc = __VECS(aes_kw_enc_tv_template), + .dec = __VECS(aes_kw_dec_tv_template) } } }, { @@ -3527,14 +3104,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = aes_lrw_enc_tv_template, - .count = AES_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_lrw_dec_tv_template, - .count = AES_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(aes_lrw_enc_tv_template), + .dec = __VECS(aes_lrw_dec_tv_template) } } }, { @@ -3542,14 +3113,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_lrw_enc_tv_template, - .count = CAMELLIA_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_lrw_dec_tv_template, - .count = CAMELLIA_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_lrw_enc_tv_template), + .dec = __VECS(camellia_lrw_dec_tv_template) } } }, { @@ -3557,14 +3122,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_lrw_enc_tv_template, - .count = CAST6_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_lrw_dec_tv_template, - .count = CAST6_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_lrw_enc_tv_template), + .dec = __VECS(cast6_lrw_dec_tv_template) } } }, { @@ -3572,14 +3131,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_lrw_enc_tv_template, - .count = SERPENT_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_lrw_dec_tv_template, - .count = SERPENT_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_lrw_enc_tv_template), + .dec = __VECS(serpent_lrw_dec_tv_template) } } }, { @@ -3587,14 +3140,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_lrw_enc_tv_template, - .count = TF_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_lrw_dec_tv_template, - .count = TF_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(tf_lrw_enc_tv_template), + .dec = __VECS(tf_lrw_dec_tv_template) } } }, { @@ -3603,14 +3150,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4_comp_tv_template, - .count = LZ4_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4_decomp_tv_template, - .count = LZ4_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4_comp_tv_template), + .decomp = __VECS(lz4_decomp_tv_template) } } }, { @@ -3619,14 +3160,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4hc_comp_tv_template, - .count = LZ4HC_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4hc_decomp_tv_template, - .count = LZ4HC_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4hc_comp_tv_template), + .decomp = __VECS(lz4hc_decomp_tv_template) } } }, { @@ -3635,42 +3170,27 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lzo_comp_tv_template, - .count = LZO_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lzo_decomp_tv_template, - .count = LZO_DECOMP_TEST_VECTORS - } + .comp = __VECS(lzo_comp_tv_template), + .decomp = __VECS(lzo_decomp_tv_template) } } }, { .alg = "md4", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md4_tv_template, - .count = MD4_TEST_VECTORS - } + .hash = __VECS(md4_tv_template) } }, { .alg = "md5", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md5_tv_template, - .count = MD5_TEST_VECTORS - } + .hash = __VECS(md5_tv_template) } }, { .alg = "michael_mic", .test = alg_test_hash, .suite = { - .hash = { - .vecs = michael_mic_tv_template, - .count = MICHAEL_MIC_TEST_VECTORS - } + .hash = __VECS(michael_mic_tv_template) } }, { .alg = "ofb(aes)", @@ -3678,14 +3198,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ofb_enc_tv_template, - .count = AES_OFB_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ofb_dec_tv_template, - .count = AES_OFB_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ofb_enc_tv_template), + .dec = __VECS(aes_ofb_dec_tv_template) } } }, { @@ -3693,24 +3207,15 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = fcrypt_pcbc_enc_tv_template, - .count = FCRYPT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = fcrypt_pcbc_dec_tv_template, - .count = FCRYPT_DEC_TEST_VECTORS - } + .enc = __VECS(fcrypt_pcbc_enc_tv_template), + .dec = __VECS(fcrypt_pcbc_dec_tv_template) } } }, { .alg = "poly1305", .test = alg_test_hash, .suite = { - .hash = { - .vecs = poly1305_tv_template, - .count = POLY1305_TEST_VECTORS - } + .hash = __VECS(poly1305_tv_template) } }, { .alg = "rfc3686(ctr(aes))", @@ -3718,14 +3223,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_rfc3686_enc_tv_template, - .count = AES_CTR_3686_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_rfc3686_dec_tv_template, - .count = AES_CTR_3686_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_rfc3686_enc_tv_template), + .dec = __VECS(aes_ctr_rfc3686_dec_tv_template) } } }, { @@ -3734,14 +3233,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4106_enc_tv_template, - .count = AES_GCM_4106_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4106_dec_tv_template, - .count = AES_GCM_4106_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_rfc4106_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4106_dec_tv_template) } } }, { @@ -3750,14 +3243,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_rfc4309_enc_tv_template, - .count = AES_CCM_4309_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_rfc4309_dec_tv_template, - .count = AES_CCM_4309_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_rfc4309_enc_tv_template), + .dec = __VECS(aes_ccm_rfc4309_dec_tv_template) } } }, { @@ -3765,14 +3252,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4543_enc_tv_template, - .count = AES_GCM_4543_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4543_dec_tv_template, - .count = AES_GCM_4543_DEC_TEST_VECTORS - }, + .enc = __VECS(aes_gcm_rfc4543_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4543_dec_tv_template), } } }, { @@ -3780,14 +3261,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539_enc_tv_template, - .count = RFC7539_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539_dec_tv_template, - .count = RFC7539_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539_enc_tv_template), + .dec = __VECS(rfc7539_dec_tv_template), } } }, { @@ -3795,71 +3270,47 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539esp_enc_tv_template, - .count = RFC7539ESP_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539esp_dec_tv_template, - .count = RFC7539ESP_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539esp_enc_tv_template), + .dec = __VECS(rfc7539esp_dec_tv_template), } } }, { .alg = "rmd128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd128_tv_template, - .count = RMD128_TEST_VECTORS - } + .hash = __VECS(rmd128_tv_template) } }, { .alg = "rmd160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd160_tv_template, - .count = RMD160_TEST_VECTORS - } + .hash = __VECS(rmd160_tv_template) } }, { .alg = "rmd256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd256_tv_template, - .count = RMD256_TEST_VECTORS - } + .hash = __VECS(rmd256_tv_template) } }, { .alg = "rmd320", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd320_tv_template, - .count = RMD320_TEST_VECTORS - } + .hash = __VECS(rmd320_tv_template) } }, { .alg = "rsa", .test = alg_test_akcipher, .fips_allowed = 1, .suite = { - .akcipher = { - .vecs = rsa_tv_template, - .count = RSA_TEST_VECTORS - } + .akcipher = __VECS(rsa_tv_template) } }, { .alg = "salsa20", .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = salsa20_stream_enc_tv_template, - .count = SALSA20_STREAM_ENC_TEST_VECTORS - } + .enc = __VECS(salsa20_stream_enc_tv_template) } } }, { @@ -3867,162 +3318,111 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha1_tv_template, - .count = SHA1_TEST_VECTORS - } + .hash = __VECS(sha1_tv_template) } }, { .alg = "sha224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha224_tv_template, - .count = SHA224_TEST_VECTORS - } + .hash = __VECS(sha224_tv_template) } }, { .alg = "sha256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha256_tv_template, - .count = SHA256_TEST_VECTORS - } + .hash = __VECS(sha256_tv_template) } }, { .alg = "sha3-224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_224_tv_template, - .count = SHA3_224_TEST_VECTORS - } + .hash = __VECS(sha3_224_tv_template) } }, { .alg = "sha3-256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_256_tv_template, - .count = SHA3_256_TEST_VECTORS - } + .hash = __VECS(sha3_256_tv_template) } }, { .alg = "sha3-384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_384_tv_template, - .count = SHA3_384_TEST_VECTORS - } + .hash = __VECS(sha3_384_tv_template) } }, { .alg = "sha3-512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_512_tv_template, - .count = SHA3_512_TEST_VECTORS - } + .hash = __VECS(sha3_512_tv_template) } }, { .alg = "sha384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha384_tv_template, - .count = SHA384_TEST_VECTORS - } + .hash = __VECS(sha384_tv_template) } }, { .alg = "sha512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha512_tv_template, - .count = SHA512_TEST_VECTORS - } + .hash = __VECS(sha512_tv_template) } }, { .alg = "tgr128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr128_tv_template, - .count = TGR128_TEST_VECTORS - } + .hash = __VECS(tgr128_tv_template) } }, { .alg = "tgr160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr160_tv_template, - .count = TGR160_TEST_VECTORS - } + .hash = __VECS(tgr160_tv_template) } }, { .alg = "tgr192", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr192_tv_template, - .count = TGR192_TEST_VECTORS - } + .hash = __VECS(tgr192_tv_template) } }, { .alg = "vmac(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_vmac128_tv_template, - .count = VMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_vmac128_tv_template) } }, { .alg = "wp256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp256_tv_template, - .count = WP256_TEST_VECTORS - } + .hash = __VECS(wp256_tv_template) } }, { .alg = "wp384", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp384_tv_template, - .count = WP384_TEST_VECTORS - } + .hash = __VECS(wp384_tv_template) } }, { .alg = "wp512", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp512_tv_template, - .count = WP512_TEST_VECTORS - } + .hash = __VECS(wp512_tv_template) } }, { .alg = "xcbc(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_xcbc128_tv_template, - .count = XCBC_AES_TEST_VECTORS - } + .hash = __VECS(aes_xcbc128_tv_template) } }, { .alg = "xts(aes)", @@ -4030,14 +3430,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_xts_enc_tv_template, - .count = AES_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_xts_dec_tv_template, - .count = AES_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(aes_xts_enc_tv_template), + .dec = __VECS(aes_xts_dec_tv_template) } } }, { @@ -4045,14 +3439,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_xts_enc_tv_template, - .count = CAMELLIA_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_xts_dec_tv_template, - .count = CAMELLIA_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_xts_enc_tv_template), + .dec = __VECS(camellia_xts_dec_tv_template) } } }, { @@ -4060,14 +3448,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_xts_enc_tv_template, - .count = CAST6_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_xts_dec_tv_template, - .count = CAST6_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_xts_enc_tv_template), + .dec = __VECS(cast6_xts_dec_tv_template) } } }, { @@ -4075,14 +3457,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_xts_enc_tv_template, - .count = SERPENT_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_xts_dec_tv_template, - .count = SERPENT_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_xts_enc_tv_template), + .dec = __VECS(serpent_xts_dec_tv_template) } } }, { @@ -4090,14 +3466,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_xts_enc_tv_template, - .count = TF_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_xts_dec_tv_template, - .count = TF_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(tf_xts_enc_tv_template), + .dec = __VECS(tf_xts_dec_tv_template) } } } diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9b656be7f52f..64595f067d72 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -151,11 +151,6 @@ static char zeroed_string[48]; /* * RSA test vectors. Borrowed from openSSL. */ -#ifdef CONFIG_CRYPTO_FIPS -#define RSA_TEST_VECTORS 2 -#else -#define RSA_TEST_VECTORS 5 -#endif static struct akcipher_testvec rsa_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -340,6 +335,7 @@ static struct akcipher_testvec rsa_tv_template[] = { .m_size = 8, .c_size = 256, .public_key_vec = true, +#ifndef CONFIG_CRYPTO_FIPS }, { .key = "\x30\x82\x09\x29" /* sequence of 2345 bytes */ @@ -538,11 +534,10 @@ static struct akcipher_testvec rsa_tv_template[] = { .key_len = 2349, .m_size = 8, .c_size = 512, +#endif } }; -#define DH_TEST_VECTORS 2 - struct kpp_testvec dh_tv_template[] = { { .secret = @@ -760,11 +755,6 @@ struct kpp_testvec dh_tv_template[] = { } }; -#ifdef CONFIG_CRYPTO_FIPS -#define ECDH_TEST_VECTORS 1 -#else -#define ECDH_TEST_VECTORS 2 -#endif struct kpp_testvec ecdh_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -856,8 +846,6 @@ struct kpp_testvec ecdh_tv_template[] = { /* * MD4 test vectors from RFC1320 */ -#define MD4_TEST_VECTORS 7 - static struct hash_testvec md4_tv_template [] = { { .plaintext = "", @@ -899,7 +887,6 @@ static struct hash_testvec md4_tv_template [] = { }, }; -#define SHA3_224_TEST_VECTORS 3 static struct hash_testvec sha3_224_tv_template[] = { { .plaintext = "", @@ -925,7 +912,6 @@ static struct hash_testvec sha3_224_tv_template[] = { }, }; -#define SHA3_256_TEST_VECTORS 3 static struct hash_testvec sha3_256_tv_template[] = { { .plaintext = "", @@ -952,7 +938,6 @@ static struct hash_testvec sha3_256_tv_template[] = { }; -#define SHA3_384_TEST_VECTORS 3 static struct hash_testvec sha3_384_tv_template[] = { { .plaintext = "", @@ -985,7 +970,6 @@ static struct hash_testvec sha3_384_tv_template[] = { }; -#define SHA3_512_TEST_VECTORS 3 static struct hash_testvec sha3_512_tv_template[] = { { .plaintext = "", @@ -1027,8 +1011,6 @@ static struct hash_testvec sha3_512_tv_template[] = { /* * MD5 test vectors from RFC1321 */ -#define MD5_TEST_VECTORS 7 - static struct hash_testvec md5_tv_template[] = { { .digest = "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04" @@ -1073,8 +1055,6 @@ static struct hash_testvec md5_tv_template[] = { /* * RIPEMD-128 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD128_TEST_VECTORS 10 - static struct hash_testvec rmd128_tv_template[] = { { .digest = "\xcd\xf2\x62\x13\xa1\x50\xdc\x3e" @@ -1137,8 +1117,6 @@ static struct hash_testvec rmd128_tv_template[] = { /* * RIPEMD-160 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD160_TEST_VECTORS 10 - static struct hash_testvec rmd160_tv_template[] = { { .digest = "\x9c\x11\x85\xa5\xc5\xe9\xfc\x54\x61\x28" @@ -1201,8 +1179,6 @@ static struct hash_testvec rmd160_tv_template[] = { /* * RIPEMD-256 test vectors */ -#define RMD256_TEST_VECTORS 8 - static struct hash_testvec rmd256_tv_template[] = { { .digest = "\x02\xba\x4c\x4e\x5f\x8e\xcd\x18" @@ -1269,8 +1245,6 @@ static struct hash_testvec rmd256_tv_template[] = { /* * RIPEMD-320 test vectors */ -#define RMD320_TEST_VECTORS 8 - static struct hash_testvec rmd320_tv_template[] = { { .digest = "\x22\xd6\x5d\x56\x61\x53\x6c\xdc\x75\xc1" @@ -1334,7 +1308,6 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template) static struct hash_testvec crct10dif_tv_template[] = { { .plaintext = "abc", @@ -1385,8 +1358,6 @@ static struct hash_testvec crct10dif_tv_template[] = { * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 */ -#define SHA1_TEST_VECTORS 6 - static struct hash_testvec sha1_tv_template[] = { { .plaintext = "", @@ -1577,8 +1548,6 @@ static struct hash_testvec sha1_tv_template[] = { /* * SHA224 test vectors from from FIPS PUB 180-2 */ -#define SHA224_TEST_VECTORS 5 - static struct hash_testvec sha224_tv_template[] = { { .plaintext = "", @@ -1751,8 +1720,6 @@ static struct hash_testvec sha224_tv_template[] = { /* * SHA256 test vectors from from NIST */ -#define SHA256_TEST_VECTORS 5 - static struct hash_testvec sha256_tv_template[] = { { .plaintext = "", @@ -1924,8 +1891,6 @@ static struct hash_testvec sha256_tv_template[] = { /* * SHA384 test vectors from from NIST and kerneli */ -#define SHA384_TEST_VECTORS 6 - static struct hash_testvec sha384_tv_template[] = { { .plaintext = "", @@ -2118,8 +2083,6 @@ static struct hash_testvec sha384_tv_template[] = { /* * SHA512 test vectors from from NIST and kerneli */ -#define SHA512_TEST_VECTORS 6 - static struct hash_testvec sha512_tv_template[] = { { .plaintext = "", @@ -2327,8 +2290,6 @@ static struct hash_testvec sha512_tv_template[] = { * by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE * submission */ -#define WP512_TEST_VECTORS 8 - static struct hash_testvec wp512_tv_template[] = { { .plaintext = "", @@ -2425,8 +2386,6 @@ static struct hash_testvec wp512_tv_template[] = { }, }; -#define WP384_TEST_VECTORS 8 - static struct hash_testvec wp384_tv_template[] = { { .plaintext = "", @@ -2507,8 +2466,6 @@ static struct hash_testvec wp384_tv_template[] = { }, }; -#define WP256_TEST_VECTORS 8 - static struct hash_testvec wp256_tv_template[] = { { .plaintext = "", @@ -2576,8 +2533,6 @@ static struct hash_testvec wp256_tv_template[] = { /* * TIGER test vectors from Tiger website */ -#define TGR192_TEST_VECTORS 6 - static struct hash_testvec tgr192_tv_template[] = { { .plaintext = "", @@ -2621,8 +2576,6 @@ static struct hash_testvec tgr192_tv_template[] = { }, }; -#define TGR160_TEST_VECTORS 6 - static struct hash_testvec tgr160_tv_template[] = { { .plaintext = "", @@ -2666,8 +2619,6 @@ static struct hash_testvec tgr160_tv_template[] = { }, }; -#define TGR128_TEST_VECTORS 6 - static struct hash_testvec tgr128_tv_template[] = { { .plaintext = "", @@ -2705,8 +2656,6 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; -#define GHASH_TEST_VECTORS 6 - static struct hash_testvec ghash_tv_template[] = { { @@ -2822,8 +2771,6 @@ static struct hash_testvec ghash_tv_template[] = * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). */ -#define HMAC_MD5_TEST_VECTORS 7 - static struct hash_testvec hmac_md5_tv_template[] = { { @@ -2904,8 +2851,6 @@ static struct hash_testvec hmac_md5_tv_template[] = /* * HMAC-RIPEMD128 test vectors from RFC2286 */ -#define HMAC_RMD128_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd128_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -2985,8 +2930,6 @@ static struct hash_testvec hmac_rmd128_tv_template[] = { /* * HMAC-RIPEMD160 test vectors from RFC2286 */ -#define HMAC_RMD160_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd160_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3066,8 +3009,6 @@ static struct hash_testvec hmac_rmd160_tv_template[] = { /* * HMAC-SHA1 test vectors from RFC2202 */ -#define HMAC_SHA1_TEST_VECTORS 7 - static struct hash_testvec hmac_sha1_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3149,8 +3090,6 @@ static struct hash_testvec hmac_sha1_tv_template[] = { /* * SHA224 HMAC test vectors from RFC4231 */ -#define HMAC_SHA224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3264,8 +3203,6 @@ static struct hash_testvec hmac_sha224_tv_template[] = { * HMAC-SHA256 test vectors from * draft-ietf-ipsec-ciph-sha-256-01.txt */ -#define HMAC_SHA256_TEST_VECTORS 10 - static struct hash_testvec hmac_sha256_tv_template[] = { { .key = "\x01\x02\x03\x04\x05\x06\x07\x08" @@ -3401,8 +3338,6 @@ static struct hash_testvec hmac_sha256_tv_template[] = { }, }; -#define CMAC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_cmac128_tv_template[] = { { /* From NIST Special Publication 800-38B, AES-128 */ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" @@ -3478,8 +3413,6 @@ static struct hash_testvec aes_cmac128_tv_template[] = { } }; -#define CMAC_DES3_EDE_TEST_VECTORS 4 - static struct hash_testvec des3_ede_cmac64_tv_template[] = { /* * From NIST Special Publication 800-38B, Three Key TDEA @@ -3526,8 +3459,6 @@ static struct hash_testvec des3_ede_cmac64_tv_template[] = { } }; -#define XCBC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_xcbc128_tv_template[] = { { .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -3594,7 +3525,6 @@ static struct hash_testvec aes_xcbc128_tv_template[] = { } }; -#define VMAC_AES_TEST_VECTORS 11 static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', '\x02', '\x04', '\x01', '\x07', @@ -3701,8 +3631,6 @@ static struct hash_testvec aes_vmac128_tv_template[] = { * SHA384 HMAC test vectors from RFC4231 */ -#define HMAC_SHA384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3801,8 +3729,6 @@ static struct hash_testvec hmac_sha384_tv_template[] = { * SHA512 HMAC test vectors from RFC4231 */ -#define HMAC_SHA512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3908,8 +3834,6 @@ static struct hash_testvec hmac_sha512_tv_template[] = { }, }; -#define HMAC_SHA3_224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3999,8 +3923,6 @@ static struct hash_testvec hmac_sha3_224_tv_template[] = { }, }; -#define HMAC_SHA3_256_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_256_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4090,8 +4012,6 @@ static struct hash_testvec hmac_sha3_256_tv_template[] = { }, }; -#define HMAC_SHA3_384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4189,8 +4109,6 @@ static struct hash_testvec hmac_sha3_384_tv_template[] = { }, }; -#define HMAC_SHA3_512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4300,8 +4218,6 @@ static struct hash_testvec hmac_sha3_512_tv_template[] = { * Poly1305 test vectors from RFC7539 A.3. */ -#define POLY1305_TEST_VECTORS 11 - static struct hash_testvec poly1305_tv_template[] = { { /* Test Vector #1 */ .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -4547,19 +4463,6 @@ static struct hash_testvec poly1305_tv_template[] = { /* * DES test vectors. */ -#define DES_ENC_TEST_VECTORS 11 -#define DES_DEC_TEST_VECTORS 5 -#define DES_CBC_ENC_TEST_VECTORS 6 -#define DES_CBC_DEC_TEST_VECTORS 5 -#define DES_CTR_ENC_TEST_VECTORS 2 -#define DES_CTR_DEC_TEST_VECTORS 2 -#define DES3_EDE_ENC_TEST_VECTORS 4 -#define DES3_EDE_DEC_TEST_VECTORS 4 -#define DES3_EDE_CBC_ENC_TEST_VECTORS 2 -#define DES3_EDE_CBC_DEC_TEST_VECTORS 2 -#define DES3_EDE_CTR_ENC_TEST_VECTORS 2 -#define DES3_EDE_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec des_enc_tv_template[] = { { /* From Applied Cryptography */ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -6620,13 +6523,6 @@ static struct cipher_testvec des3_ede_ctr_dec_tv_template[] = { /* * Blowfish test vectors. */ -#define BF_ENC_TEST_VECTORS 7 -#define BF_DEC_TEST_VECTORS 7 -#define BF_CBC_ENC_TEST_VECTORS 2 -#define BF_CBC_DEC_TEST_VECTORS 2 -#define BF_CTR_ENC_TEST_VECTORS 2 -#define BF_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec bf_enc_tv_template[] = { { /* DES test vectors from OpenSSL */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -8152,17 +8048,6 @@ static struct cipher_testvec bf_ctr_dec_tv_template[] = { /* * Twofish test vectors. */ -#define TF_ENC_TEST_VECTORS 4 -#define TF_DEC_TEST_VECTORS 4 -#define TF_CBC_ENC_TEST_VECTORS 5 -#define TF_CBC_DEC_TEST_VECTORS 5 -#define TF_CTR_ENC_TEST_VECTORS 2 -#define TF_CTR_DEC_TEST_VECTORS 2 -#define TF_LRW_ENC_TEST_VECTORS 8 -#define TF_LRW_DEC_TEST_VECTORS 8 -#define TF_XTS_ENC_TEST_VECTORS 5 -#define TF_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec tf_enc_tv_template[] = { { .key = zeroed_string, @@ -10881,24 +10766,6 @@ static struct cipher_testvec tf_xts_dec_tv_template[] = { * Serpent test vectors. These are backwards because Serpent writes * octet sequences in right-to-left mode. */ -#define SERPENT_ENC_TEST_VECTORS 5 -#define SERPENT_DEC_TEST_VECTORS 5 - -#define TNEPRES_ENC_TEST_VECTORS 4 -#define TNEPRES_DEC_TEST_VECTORS 4 - -#define SERPENT_CBC_ENC_TEST_VECTORS 1 -#define SERPENT_CBC_DEC_TEST_VECTORS 1 - -#define SERPENT_CTR_ENC_TEST_VECTORS 2 -#define SERPENT_CTR_DEC_TEST_VECTORS 2 - -#define SERPENT_LRW_ENC_TEST_VECTORS 8 -#define SERPENT_LRW_DEC_TEST_VECTORS 8 - -#define SERPENT_XTS_ENC_TEST_VECTORS 5 -#define SERPENT_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec serpent_enc_tv_template[] = { { .input = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -13637,17 +13504,6 @@ static struct cipher_testvec serpent_xts_dec_tv_template[] = { }; /* Cast6 test vectors from RFC 2612 */ -#define CAST6_ENC_TEST_VECTORS 4 -#define CAST6_DEC_TEST_VECTORS 4 -#define CAST6_CBC_ENC_TEST_VECTORS 1 -#define CAST6_CBC_DEC_TEST_VECTORS 1 -#define CAST6_CTR_ENC_TEST_VECTORS 2 -#define CAST6_CTR_DEC_TEST_VECTORS 2 -#define CAST6_LRW_ENC_TEST_VECTORS 1 -#define CAST6_LRW_DEC_TEST_VECTORS 1 -#define CAST6_XTS_ENC_TEST_VECTORS 1 -#define CAST6_XTS_DEC_TEST_VECTORS 1 - static struct cipher_testvec cast6_enc_tv_template[] = { { .key = "\x23\x42\xbb\x9e\xfa\x38\x54\x2c" @@ -15182,38 +15038,6 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { /* * AES test vectors. */ -#define AES_ENC_TEST_VECTORS 4 -#define AES_DEC_TEST_VECTORS 4 -#define AES_CBC_ENC_TEST_VECTORS 5 -#define AES_CBC_DEC_TEST_VECTORS 5 -#define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 -#define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC 2 -#define HMAC_SHA1_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA256_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA512_AES_CBC_ENC_TEST_VEC 7 -#define AES_LRW_ENC_TEST_VECTORS 8 -#define AES_LRW_DEC_TEST_VECTORS 8 -#define AES_XTS_ENC_TEST_VECTORS 5 -#define AES_XTS_DEC_TEST_VECTORS 5 -#define AES_CTR_ENC_TEST_VECTORS 5 -#define AES_CTR_DEC_TEST_VECTORS 5 -#define AES_OFB_ENC_TEST_VECTORS 1 -#define AES_OFB_DEC_TEST_VECTORS 1 -#define AES_CTR_3686_ENC_TEST_VECTORS 7 -#define AES_CTR_3686_DEC_TEST_VECTORS 6 -#define AES_GCM_ENC_TEST_VECTORS 9 -#define AES_GCM_DEC_TEST_VECTORS 8 -#define AES_GCM_4106_ENC_TEST_VECTORS 23 -#define AES_GCM_4106_DEC_TEST_VECTORS 23 -#define AES_GCM_4543_ENC_TEST_VECTORS 1 -#define AES_GCM_4543_DEC_TEST_VECTORS 2 -#define AES_CCM_ENC_TEST_VECTORS 8 -#define AES_CCM_DEC_TEST_VECTORS 7 -#define AES_CCM_4309_ENC_TEST_VECTORS 7 -#define AES_CCM_4309_DEC_TEST_VECTORS 10 - static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -17069,8 +16893,6 @@ static struct aead_testvec hmac_sha512_aes_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17130,8 +16952,6 @@ static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17191,8 +17011,6 @@ static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17254,8 +17072,6 @@ static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17321,8 +17137,6 @@ static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17392,8 +17206,6 @@ static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17455,8 +17267,6 @@ static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17518,8 +17328,6 @@ static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17583,8 +17391,6 @@ static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17652,8 +17458,6 @@ static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -24434,8 +24238,6 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = { /* * ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2./A.5. */ -#define RFC7539_ENC_TEST_VECTORS 2 -#define RFC7539_DEC_TEST_VECTORS 2 static struct aead_testvec rfc7539_enc_tv_template[] = { { .key = "\x80\x81\x82\x83\x84\x85\x86\x87" @@ -24703,8 +24505,6 @@ static struct aead_testvec rfc7539_dec_tv_template[] = { /* * draft-irtf-cfrg-chacha20-poly1305 */ -#define RFC7539ESP_DEC_TEST_VECTORS 1 -#define RFC7539ESP_ENC_TEST_VECTORS 1 static struct aead_testvec rfc7539esp_enc_tv_template[] = { { .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" @@ -24927,8 +24727,6 @@ static struct cipher_testvec aes_kw_dec_tv_template[] = { * http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf * Only AES-128 is supported at this time. */ -#define ANSI_CPRNG_AES_TEST_VECTORS 6 - static struct cprng_testvec ansi_cprng_aes_tv_template[] = { { .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" @@ -25846,13 +25644,6 @@ static struct drbg_testvec drbg_nopr_ctr_aes128_tv_template[] = { }; /* Cast5 test vectors from RFC 2144 */ -#define CAST5_ENC_TEST_VECTORS 4 -#define CAST5_DEC_TEST_VECTORS 4 -#define CAST5_CBC_ENC_TEST_VECTORS 1 -#define CAST5_CBC_DEC_TEST_VECTORS 1 -#define CAST5_CTR_ENC_TEST_VECTORS 2 -#define CAST5_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec cast5_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x12\x34\x56\x78" @@ -26756,9 +26547,6 @@ static struct cipher_testvec cast5_ctr_dec_tv_template[] = { /* * ARC4 test vectors from OpenSSL */ -#define ARC4_ENC_TEST_VECTORS 7 -#define ARC4_DEC_TEST_VECTORS 7 - static struct cipher_testvec arc4_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -26894,9 +26682,6 @@ static struct cipher_testvec arc4_dec_tv_template[] = { /* * TEA test vectors */ -#define TEA_ENC_TEST_VECTORS 4 -#define TEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec tea_enc_tv_template[] = { { .key = zeroed_string, @@ -26986,9 +26771,6 @@ static struct cipher_testvec tea_dec_tv_template[] = { /* * XTEA test vectors */ -#define XTEA_ENC_TEST_VECTORS 4 -#define XTEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xtea_enc_tv_template[] = { { .key = zeroed_string, @@ -27078,9 +26860,6 @@ static struct cipher_testvec xtea_dec_tv_template[] = { /* * KHAZAD test vectors. */ -#define KHAZAD_ENC_TEST_VECTORS 5 -#define KHAZAD_DEC_TEST_VECTORS 5 - static struct cipher_testvec khazad_enc_tv_template[] = { { .key = "\x80\x00\x00\x00\x00\x00\x00\x00" @@ -27177,11 +26956,6 @@ static struct cipher_testvec khazad_dec_tv_template[] = { * Anubis test vectors. */ -#define ANUBIS_ENC_TEST_VECTORS 5 -#define ANUBIS_DEC_TEST_VECTORS 5 -#define ANUBIS_CBC_ENC_TEST_VECTORS 2 -#define ANUBIS_CBC_DEC_TEST_VECTORS 2 - static struct cipher_testvec anubis_enc_tv_template[] = { { .key = "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" @@ -27381,9 +27155,6 @@ static struct cipher_testvec anubis_cbc_dec_tv_template[] = { /* * XETA test vectors */ -#define XETA_ENC_TEST_VECTORS 4 -#define XETA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xeta_enc_tv_template[] = { { .key = zeroed_string, @@ -27473,9 +27244,6 @@ static struct cipher_testvec xeta_dec_tv_template[] = { /* * FCrypt test vectors */ -#define FCRYPT_ENC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_enc_tv_template) -#define FCRYPT_DEC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_dec_tv_template) - static struct cipher_testvec fcrypt_pcbc_enc_tv_template[] = { { /* http://www.openafs.org/pipermail/openafs-devel/2000-December/005320.html */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -27601,17 +27369,6 @@ static struct cipher_testvec fcrypt_pcbc_dec_tv_template[] = { /* * CAMELLIA test vectors. */ -#define CAMELLIA_ENC_TEST_VECTORS 4 -#define CAMELLIA_DEC_TEST_VECTORS 4 -#define CAMELLIA_CBC_ENC_TEST_VECTORS 3 -#define CAMELLIA_CBC_DEC_TEST_VECTORS 3 -#define CAMELLIA_CTR_ENC_TEST_VECTORS 2 -#define CAMELLIA_CTR_DEC_TEST_VECTORS 2 -#define CAMELLIA_LRW_ENC_TEST_VECTORS 8 -#define CAMELLIA_LRW_DEC_TEST_VECTORS 8 -#define CAMELLIA_XTS_ENC_TEST_VECTORS 5 -#define CAMELLIA_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec camellia_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" @@ -31331,9 +31088,6 @@ static struct cipher_testvec camellia_xts_dec_tv_template[] = { /* * SEED test vectors */ -#define SEED_ENC_TEST_VECTORS 4 -#define SEED_DEC_TEST_VECTORS 4 - static struct cipher_testvec seed_enc_tv_template[] = { { .key = zeroed_string, @@ -31418,7 +31172,6 @@ static struct cipher_testvec seed_dec_tv_template[] = { } }; -#define SALSA20_STREAM_ENC_TEST_VECTORS 5 static struct cipher_testvec salsa20_stream_enc_tv_template[] = { /* * Testvectors from verified.test-vectors submitted to ECRYPT. @@ -32588,7 +32341,6 @@ static struct cipher_testvec salsa20_stream_enc_tv_template[] = { }, }; -#define CHACHA20_ENC_TEST_VECTORS 4 static struct cipher_testvec chacha20_enc_tv_template[] = { { /* RFC7539 A.2. Test Vector #1 */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -33100,8 +32852,6 @@ static struct cipher_testvec chacha20_enc_tv_template[] = { /* * CTS (Cipher Text Stealing) mode tests */ -#define CTS_MODE_ENC_TEST_VECTORS 6 -#define CTS_MODE_DEC_TEST_VECTORS 6 static struct cipher_testvec cts_mode_enc_tv_template[] = { { /* from rfc3962 */ .klen = 16, @@ -33322,9 +33072,6 @@ struct comp_testvec { * Params: winbits=-11, Z_DEFAULT_COMPRESSION, MAX_MEM_LEVEL. */ -#define DEFLATE_COMP_TEST_VECTORS 2 -#define DEFLATE_DECOMP_TEST_VECTORS 2 - static struct comp_testvec deflate_comp_tv_template[] = { { .inlen = 70, @@ -33400,9 +33147,6 @@ static struct comp_testvec deflate_decomp_tv_template[] = { /* * LZO test vectors (null-terminated strings). */ -#define LZO_COMP_TEST_VECTORS 2 -#define LZO_DECOMP_TEST_VECTORS 2 - static struct comp_testvec lzo_comp_tv_template[] = { { .inlen = 70, @@ -33534,8 +33278,6 @@ static struct hash_testvec michael_mic_tv_template[] = { /* * CRC32 test vectors */ -#define CRC32_TEST_VECTORS 14 - static struct hash_testvec crc32_tv_template[] = { { .key = "\x87\xa9\xcb\xed", @@ -33968,8 +33710,6 @@ static struct hash_testvec crc32_tv_template[] = { /* * CRC32C test vectors */ -#define CRC32C_TEST_VECTORS 15 - static struct hash_testvec crc32c_tv_template[] = { { .psize = 0, @@ -34406,8 +34146,6 @@ static struct hash_testvec crc32c_tv_template[] = { /* * Blakcifn CRC test vectors */ -#define BFIN_CRC_TEST_VECTORS 6 - static struct hash_testvec bfin_crc_tv_template[] = { { .psize = 0, @@ -34493,9 +34231,6 @@ static struct hash_testvec bfin_crc_tv_template[] = { }; -#define LZ4_COMP_TEST_VECTORS 1 -#define LZ4_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4_comp_tv_template[] = { { .inlen = 70, @@ -34526,9 +34261,6 @@ static struct comp_testvec lz4_decomp_tv_template[] = { }, }; -#define LZ4HC_COMP_TEST_VECTORS 1 -#define LZ4HC_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4hc_comp_tv_template[] = { { .inlen = 70, diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 13caebd679f5..8c4e0a18460a 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -114,7 +114,7 @@ void __init acpi_watchdog_init(void) pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE, resources, nresources); if (IS_ERR(pdev)) - pr_err("Failed to create platform device\n"); + pr_err("Device creation failed: %ld\n", PTR_ERR(pdev)); kfree(resources); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index f8d65647ea79..fb19e1cdb641 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -98,7 +98,15 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) if (check_children && list_empty(&adev->children)) return -ENODEV; - return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; + /* + * If the device has a _HID (or _CID) returning a valid ACPI/PNP + * device ID, it is better to make it look less attractive here, so that + * the other device with the same _ADR value (that may not have a valid + * device ID) can be matched going forward. [This means a second spec + * violation in a row, so whatever we do here is best effort anyway.] + */ + return sta_present && list_empty(&adev->pnp.ids) ? + FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } struct acpi_device *acpi_find_child_device(struct acpi_device *parent, @@ -250,7 +258,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) return 0; err: - acpi_dma_deconfigure(dev); ACPI_COMPANION_SET(dev, NULL); put_device(dev); put_device(&acpi_dev->dev); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1b41a2739dac..0c452265c111 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -37,6 +37,7 @@ void acpi_amba_init(void); static inline void acpi_amba_init(void) {} #endif int acpi_sysfs_init(void); +void acpi_gpe_apply_masked_gpes(void); void acpi_container_init(void); void acpi_memory_hotplug_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 45dec874ea55..192691880d55 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2074,6 +2074,7 @@ int __init acpi_scan_init(void) } } + acpi_gpe_apply_masked_gpes(); acpi_update_all_gpes(); acpi_ec_ecdt_start(); diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 703c26e7022c..cf05ae973381 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -708,6 +708,62 @@ end: return result ? result : size; } +/* + * A Quirk Mechanism for GPE Flooding Prevention: + * + * Quirks may be needed to prevent GPE flooding on a specific GPE. The + * flooding typically cannot be detected and automatically prevented by + * ACPI_GPE_DISPATCH_NONE check because there is a _Lxx/_Exx prepared in + * the AML tables. This normally indicates a feature gap in Linux, thus + * instead of providing endless quirk tables, we provide a boot parameter + * for those who want this quirk. For example, if the users want to prevent + * the GPE flooding for GPE 00, they need to specify the following boot + * parameter: + * acpi_mask_gpe=0x00 + * The masking status can be modified by the following runtime controlling + * interface: + * echo unmask > /sys/firmware/acpi/interrupts/gpe00 + */ + +/* + * Currently, the GPE flooding prevention only supports to mask the GPEs + * numbered from 00 to 7f. + */ +#define ACPI_MASKABLE_GPE_MAX 0x80 + +static u64 __initdata acpi_masked_gpes; + +static int __init acpi_gpe_set_masked_gpes(char *val) +{ + u8 gpe; + + if (kstrtou8(val, 0, &gpe) || gpe > ACPI_MASKABLE_GPE_MAX) + return -EINVAL; + acpi_masked_gpes |= ((u64)1<<gpe); + + return 1; +} +__setup("acpi_mask_gpe=", acpi_gpe_set_masked_gpes); + +void __init acpi_gpe_apply_masked_gpes(void) +{ + acpi_handle handle; + acpi_status status; + u8 gpe; + + for (gpe = 0; + gpe < min_t(u8, ACPI_MASKABLE_GPE_MAX, acpi_current_gpe_count); + gpe++) { + if (acpi_masked_gpes & ((u64)1<<gpe)) { + status = acpi_get_gpe_device(gpe, &handle); + if (ACPI_SUCCESS(status)) { + pr_info("Masking GPE 0x%x.\n", gpe); + (void)acpi_mask_gpe(handle, gpe, TRUE); + } + } + } +} + void acpi_irq_stats_init(void) { acpi_status status; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index a5e1262b964b..2997026b4dfb 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -626,6 +626,7 @@ static int genpd_runtime_resume(struct device *dev) out: /* Measure resume latency. */ + time_start = 0; if (timed && runtime_pm) time_start = ktime_get(); diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6ce5ce8be2f2..68fdc60ddf50 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -1,55 +1,30 @@ /* - Added support for the AMD Geode LX RNG - (c) Copyright 2004-2005 Advanced Micro Devices, Inc. - - derived from - - Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG) - (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com> - - derived from - - Hardware driver for the AMD 768 Random Number Generator (RNG) - (c) Copyright 2001 Red Hat Inc <alan@redhat.com> - - derived from - - Hardware driver for Intel i810 Random Number Generator (RNG) - Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com> - Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com> - - Added generic RNG API - Copyright 2006 Michael Buesch <m@bues.ch> - Copyright 2005 (c) MontaVista Software, Inc. - - Please read Documentation/hw_random.txt for details on use. - - ---------------------------------------------------------- - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - + * hw_random/core.c: HWRNG core API + * + * Copyright 2006 Michael Buesch <m@bues.ch> + * Copyright 2005 (c) MontaVista Software, Inc. + * + * Please read Documentation/hw_random.txt for details on use. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. */ - +#include <linux/delay.h> #include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> #include <linux/hw_random.h> -#include <linux/module.h> #include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/miscdevice.h> #include <linux/kthread.h> -#include <linux/delay.h> -#include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/module.h> #include <linux/random.h> -#include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> #include <linux/uaccess.h> - #define RNG_MODULE_NAME "hw_random" -#define PFX RNG_MODULE_NAME ": " -#define RNG_MISCDEV_MINOR 183 /* official */ - static struct hwrng *current_rng; static struct task_struct *hwrng_fill; @@ -298,7 +273,6 @@ out_put: goto out; } - static const struct file_operations rng_chrdev_ops = { .owner = THIS_MODULE, .open = rng_dev_open, @@ -309,14 +283,13 @@ static const struct file_operations rng_chrdev_ops = { static const struct attribute_group *rng_dev_groups[]; static struct miscdevice rng_miscdev = { - .minor = RNG_MISCDEV_MINOR, + .minor = HWRNG_MINOR, .name = RNG_MODULE_NAME, .nodename = "hwrng", .fops = &rng_chrdev_ops, .groups = rng_dev_groups, }; - static ssize_t hwrng_attr_current_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -447,8 +420,7 @@ int hwrng_register(struct hwrng *rng) int err = -EINVAL; struct hwrng *old_rng, *tmp; - if (rng->name == NULL || - (rng->data_read == NULL && rng->read == NULL)) + if (!rng->name || (!rng->data_read && !rng->read)) goto out; mutex_lock(&rng_mutex); diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 3b06c1d6cfb2..31cbdbbaebfc 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -21,11 +21,11 @@ #define DRV_MODULE_NAME "n2rng" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.2" -#define DRV_MODULE_RELDATE "July 27, 2011" +#define DRV_MODULE_VERSION "0.3" +#define DRV_MODULE_RELDATE "Jan 7, 2017" static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_NAME " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); MODULE_DESCRIPTION("Niagara2 RNG driver"); @@ -302,26 +302,57 @@ static int n2rng_try_read_ctl(struct n2rng *np) return n2rng_hv_err_trans(hv_err); } -#define CONTROL_DEFAULT_BASE \ - ((2 << RNG_CTL_ASEL_SHIFT) | \ - (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_CTL_WAIT_SHIFT) | \ - RNG_CTL_LFSR) - -#define CONTROL_DEFAULT_0 \ - (CONTROL_DEFAULT_BASE | \ - (1 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES1) -#define CONTROL_DEFAULT_1 \ - (CONTROL_DEFAULT_BASE | \ - (2 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES2) -#define CONTROL_DEFAULT_2 \ - (CONTROL_DEFAULT_BASE | \ - (3 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES3) -#define CONTROL_DEFAULT_3 \ - (CONTROL_DEFAULT_BASE | \ - RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3) +static u64 n2rng_control_default(struct n2rng *np, int ctl) +{ + u64 val = 0; + + if (np->data->chip_version == 1) { + val = ((2 << RNG_v1_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v1_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + + } else { + val = ((2 << RNG_v2_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v2_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + } + + return val; +} static void n2rng_control_swstate_init(struct n2rng *np) { @@ -336,10 +367,10 @@ static void n2rng_control_swstate_init(struct n2rng *np) for (i = 0; i < np->num_units; i++) { struct n2rng_unit *up = &np->units[i]; - up->control[0] = CONTROL_DEFAULT_0; - up->control[1] = CONTROL_DEFAULT_1; - up->control[2] = CONTROL_DEFAULT_2; - up->control[3] = CONTROL_DEFAULT_3; + up->control[0] = n2rng_control_default(np, 0); + up->control[1] = n2rng_control_default(np, 1); + up->control[2] = n2rng_control_default(np, 2); + up->control[3] = n2rng_control_default(np, 3); } np->hv_state = HV_RNG_STATE_UNCONFIGURED; @@ -399,6 +430,7 @@ static int n2rng_data_read(struct hwrng *rng, u32 *data) } else { int err = n2rng_generic_read_data(ra); if (!err) { + np->flags |= N2RNG_FLAG_BUFFER_VALID; np->buffer = np->test_data >> 32; *data = np->test_data & 0xffffffff; len = 4; @@ -487,9 +519,21 @@ static void n2rng_dump_test_buffer(struct n2rng *np) static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) { - u64 val = SELFTEST_VAL; + u64 val; int err, matches, limit; + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + case N2_m4_rng: /* yes, m4 uses the old value */ + val = RNG_v1_SELFTEST_VAL; + break; + default: + val = RNG_v2_SELFTEST_VAL; + break; + } + matches = 0; for (limit = 0; limit < SELFTEST_LOOPS_MAX; limit++) { matches += n2rng_test_buffer_find(np, val); @@ -512,14 +556,32 @@ static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) static int n2rng_control_selftest(struct n2rng *np, unsigned long unit) { int err; + u64 base, base3; + + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + base = RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v1_CTL_WAIT_SHIFT); + break; + case N2_m4_rng: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v2_CTL_WAIT_SHIFT); + break; + default: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + (RNG_v2_SELFTEST_TICKS << RNG_v2_CTL_WAIT_SHIFT); + break; + } - np->test_control[0] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[1] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[2] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[3] = ((0x2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR | - ((SELFTEST_TICKS - 2) << RNG_CTL_WAIT_SHIFT)); - + np->test_control[0] = base; + np->test_control[1] = base; + np->test_control[2] = base; + np->test_control[3] = base3; err = n2rng_entropy_diag_read(np, unit, np->test_control, HV_RNG_STATE_HEALTHCHECK, @@ -557,11 +619,19 @@ static int n2rng_control_configure_units(struct n2rng *np) struct n2rng_unit *up = &np->units[unit]; unsigned long ctl_ra = __pa(&up->control[0]); int esrc; - u64 base; + u64 base, shift; - base = ((np->accum_cycles << RNG_CTL_WAIT_SHIFT) | - (2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR); + if (np->data->chip_version == 1) { + base = ((np->accum_cycles << RNG_v1_CTL_WAIT_SHIFT) | + (RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v1_CTL_VCO_SHIFT; + } else { + base = ((np->accum_cycles << RNG_v2_CTL_WAIT_SHIFT) | + (RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v2_CTL_VCO_SHIFT; + } /* XXX This isn't the best. We should fetch a bunch * XXX of words using each entropy source combined XXX @@ -570,7 +640,7 @@ static int n2rng_control_configure_units(struct n2rng *np) */ for (esrc = 0; esrc < 3; esrc++) up->control[esrc] = base | - (esrc << RNG_CTL_VCO_SHIFT) | + (esrc << shift) | (RNG_CTL_ES1 << esrc); up->control[3] = base | @@ -589,6 +659,7 @@ static void n2rng_work(struct work_struct *work) { struct n2rng *np = container_of(work, struct n2rng, work.work); int err = 0; + static int retries = 4; if (!(np->flags & N2RNG_FLAG_CONTROL)) { err = n2rng_guest_check(np); @@ -606,7 +677,9 @@ static void n2rng_work(struct work_struct *work) dev_info(&np->op->dev, "RNG ready\n"); } - if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) + if (--retries == 0) + dev_err(&np->op->dev, "Self-test retries failed, RNG not ready\n"); + else if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) schedule_delayed_work(&np->work, HZ * 2); } @@ -622,24 +695,23 @@ static const struct of_device_id n2rng_match[]; static int n2rng_probe(struct platform_device *op) { const struct of_device_id *match; - int multi_capable; int err = -ENOMEM; struct n2rng *np; match = of_match_device(n2rng_match, &op->dev); if (!match) return -EINVAL; - multi_capable = (match->data != NULL); n2rng_driver_version(); np = devm_kzalloc(&op->dev, sizeof(*np), GFP_KERNEL); if (!np) goto out; np->op = op; + np->data = (struct n2rng_template *)match->data; INIT_DELAYED_WORK(&np->work, n2rng_work); - if (multi_capable) + if (np->data->multi_capable) np->flags |= N2RNG_FLAG_MULTI; err = -ENODEV; @@ -670,8 +742,9 @@ static int n2rng_probe(struct platform_device *op) dev_err(&op->dev, "VF RNG lacks rng-#units property\n"); goto out_hvapi_unregister; } - } else + } else { np->num_units = 1; + } dev_info(&op->dev, "Registered RNG HVAPI major %lu minor %lu\n", np->hvapi_major, np->hvapi_minor); @@ -692,7 +765,7 @@ static int n2rng_probe(struct platform_device *op) "multi-unit-capable" : "single-unit"), np->num_units); - np->hwrng.name = "n2rng"; + np->hwrng.name = DRV_MODULE_NAME; np->hwrng.data_read = n2rng_data_read; np->hwrng.priv = (unsigned long) np; @@ -728,30 +801,61 @@ static int n2rng_remove(struct platform_device *op) return 0; } +static struct n2rng_template n2_template = { + .id = N2_n2_rng, + .multi_capable = 0, + .chip_version = 1, +}; + +static struct n2rng_template vf_template = { + .id = N2_vf_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template kt_template = { + .id = N2_kt_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template m4_template = { + .id = N2_m4_rng, + .multi_capable = 1, + .chip_version = 2, +}; + +static struct n2rng_template m7_template = { + .id = N2_m7_rng, + .multi_capable = 1, + .chip_version = 2, +}; + static const struct of_device_id n2rng_match[] = { { .name = "random-number-generator", .compatible = "SUNW,n2-rng", + .data = &n2_template, }, { .name = "random-number-generator", .compatible = "SUNW,vf-rng", - .data = (void *) 1, + .data = &vf_template, }, { .name = "random-number-generator", .compatible = "SUNW,kt-rng", - .data = (void *) 1, + .data = &kt_template, }, { .name = "random-number-generator", .compatible = "ORCL,m4-rng", - .data = (void *) 1, + .data = &m4_template, }, { .name = "random-number-generator", .compatible = "ORCL,m7-rng", - .data = (void *) 1, + .data = &m7_template, }, {}, }; diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index f244ac89087f..6bad6cc634e8 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -6,18 +6,34 @@ #ifndef _N2RNG_H #define _N2RNG_H -#define RNG_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ -#define RNG_CTL_WAIT_SHIFT 9 -#define RNG_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ -#define RNG_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ -#define RNG_CTL_VCO_SHIFT 6 -#define RNG_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ -#define RNG_CTL_ASEL_SHIFT 4 +/* ver1 devices - n2-rng, vf-rng, kt-rng */ +#define RNG_v1_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ +#define RNG_v1_CTL_WAIT_SHIFT 9 +#define RNG_v1_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ +#define RNG_v1_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ +#define RNG_v1_CTL_VCO_SHIFT 6 +#define RNG_v1_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ +#define RNG_v1_CTL_ASEL_SHIFT 4 +#define RNG_v1_CTL_ASEL_NOOUT 2 + +/* these are the same in v2 as in v1 */ #define RNG_CTL_LFSR 0x0000000000000008ULL /* Use LFSR or plain shift */ #define RNG_CTL_ES3 0x0000000000000004ULL /* Enable entropy source 3 */ #define RNG_CTL_ES2 0x0000000000000002ULL /* Enable entropy source 2 */ #define RNG_CTL_ES1 0x0000000000000001ULL /* Enable entropy source 1 */ +/* ver2 devices - m4-rng, m7-rng */ +#define RNG_v2_CTL_WAIT 0x0000000007fff800ULL /* Minimum wait time */ +#define RNG_v2_CTL_WAIT_SHIFT 12 +#define RNG_v2_CTL_BYPASS 0x0000000000000400ULL /* VCO voltage source */ +#define RNG_v2_CTL_VCO 0x0000000000000300ULL /* VCO rate control */ +#define RNG_v2_CTL_VCO_SHIFT 9 +#define RNG_v2_CTL_PERF 0x0000000000000180ULL /* Perf */ +#define RNG_v2_CTL_ASEL 0x0000000000000070ULL /* Analog MUX select */ +#define RNG_v2_CTL_ASEL_SHIFT 4 +#define RNG_v2_CTL_ASEL_NOOUT 7 + + #define HV_FAST_RNG_GET_DIAG_CTL 0x130 #define HV_FAST_RNG_CTL_READ 0x131 #define HV_FAST_RNG_CTL_WRITE 0x132 @@ -60,6 +76,20 @@ extern unsigned long sun4v_rng_data_read_diag_v2(unsigned long data_ra, extern unsigned long sun4v_rng_data_read(unsigned long data_ra, unsigned long *tick_delta); +enum n2rng_compat_id { + N2_n2_rng, + N2_vf_rng, + N2_kt_rng, + N2_m4_rng, + N2_m7_rng, +}; + +struct n2rng_template { + enum n2rng_compat_id id; + int multi_capable; + int chip_version; +}; + struct n2rng_unit { u64 control[HV_RNG_NUM_CONTROL]; }; @@ -74,6 +104,7 @@ struct n2rng { #define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */ #define N2RNG_FLAG_BUFFER_VALID 0x00000020 /* u32 buffer holds valid data */ + struct n2rng_template *data; int num_units; struct n2rng_unit *units; @@ -97,8 +128,10 @@ struct n2rng { u64 scratch_control[HV_RNG_NUM_CONTROL]; -#define SELFTEST_TICKS 38859 -#define SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v1_SELFTEST_TICKS 38859 +#define RNG_v1_SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v2_SELFTEST_TICKS 64 +#define RNG_v2_SELFTEST_VAL ((u64)0xffffffffffffffff) #define SELFTEST_POLY ((u64)0x231DCEE91262B8A3) #define SELFTEST_MATCH_GOAL 6 #define SELFTEST_LOOPS_MAX 40000 diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 5eb05dbf59b8..fc585f370549 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -768,5 +768,5 @@ fail: kfree(clks); iounmap(base); } -CLK_OF_DECLARE(stm32f42xx_rcc, "st,stm32f42xx-rcc", stm32f4_rcc_init); -CLK_OF_DECLARE(stm32f46xx_rcc, "st,stm32f469-rcc", stm32f4_rcc_init); +CLK_OF_DECLARE_DRIVER(stm32f42xx_rcc, "st,stm32f42xx-rcc", stm32f4_rcc_init); +CLK_OF_DECLARE_DRIVER(stm32f46xx_rcc, "st,stm32f469-rcc", stm32f4_rcc_init); diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 9375777776d9..b533f99550e1 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -37,12 +37,14 @@ * @smstpcr: module stop control register * @mstpsr: module stop status register (optional) * @lock: protects writes to SMSTPCR + * @width_8bit: registers are 8-bit, not 32-bit */ struct mstp_clock_group { struct clk_onecell_data data; void __iomem *smstpcr; void __iomem *mstpsr; spinlock_t lock; + bool width_8bit; }; /** @@ -59,6 +61,18 @@ struct mstp_clock { #define to_mstp_clock(_hw) container_of(_hw, struct mstp_clock, hw) +static inline u32 cpg_mstp_read(struct mstp_clock_group *group, + u32 __iomem *reg) +{ + return group->width_8bit ? readb(reg) : clk_readl(reg); +} + +static inline void cpg_mstp_write(struct mstp_clock_group *group, u32 val, + u32 __iomem *reg) +{ + group->width_8bit ? writeb(val, reg) : clk_writel(val, reg); +} + static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) { struct mstp_clock *clock = to_mstp_clock(hw); @@ -70,12 +84,12 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) spin_lock_irqsave(&group->lock, flags); - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); if (enable) value &= ~bitmask; else value |= bitmask; - clk_writel(value, group->smstpcr); + cpg_mstp_write(group, value, group->smstpcr); spin_unlock_irqrestore(&group->lock, flags); @@ -83,7 +97,7 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) return 0; for (i = 1000; i > 0; --i) { - if (!(clk_readl(group->mstpsr) & bitmask)) + if (!(cpg_mstp_read(group, group->mstpsr) & bitmask)) break; cpu_relax(); } @@ -114,9 +128,9 @@ static int cpg_mstp_clock_is_enabled(struct clk_hw *hw) u32 value; if (group->mstpsr) - value = clk_readl(group->mstpsr); + value = cpg_mstp_read(group, group->mstpsr); else - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); return !(value & BIT(clock->bit_index)); } @@ -188,6 +202,9 @@ static void __init cpg_mstp_clocks_init(struct device_node *np) return; } + if (of_device_is_compatible(np, "renesas,r7s72100-mstp-clocks")) + group->width_8bit = true; + for (i = 0; i < MSTP_MAX_CLOCKS; ++i) clks[i] = ERR_PTR(-ENOENT); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bc97b6a4b1cf..7fcaf26e8f81 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -26,6 +26,8 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "allwinner,sun8i-a83t", }, { .compatible = "allwinner,sun8i-h3", }, + { .compatible = "apm,xgene-shadowcat", }, + { .compatible = "arm,integrator-ap", }, { .compatible = "arm,integrator-cp", }, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6acbd4af632e..f91c25718d16 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -857,13 +857,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_hwp_set(const struct cpumask *cpumask) +static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { int min, hw_min, max, hw_max, cpu, range, adj_range; struct perf_limits *perf_limits = limits; u64 value, cap; - for_each_cpu(cpu, cpumask) { + for_each_cpu(cpu, policy->cpus) { int max_perf_pct, min_perf_pct; struct cpudata *cpu_data = all_cpu_data[cpu]; s16 epp; @@ -949,7 +949,7 @@ skip_epp: static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) { if (hwp_active) - intel_pstate_hwp_set(policy->cpus); + intel_pstate_hwp_set(policy); return 0; } @@ -968,19 +968,28 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) static int intel_pstate_resume(struct cpufreq_policy *policy) { + int ret; + if (!hwp_active) return 0; + mutex_lock(&intel_pstate_limits_lock); + all_cpu_data[policy->cpu]->epp_policy = 0; - return intel_pstate_hwp_set_policy(policy); + ret = intel_pstate_hwp_set_policy(policy); + + mutex_unlock(&intel_pstate_limits_lock); + + return ret; } -static void intel_pstate_hwp_set_online_cpus(void) +static void intel_pstate_update_policies(void) { - get_online_cpus(); - intel_pstate_hwp_set(cpu_online_mask); - put_online_cpus(); + int cpu; + + for_each_possible_cpu(cpu) + cpufreq_update_policy(cpu); } /************************** debugfs begin ************************/ @@ -1018,10 +1027,6 @@ static void __init intel_pstate_debug_expose_params(void) struct dentry *debugfs_parent; int i = 0; - if (hwp_active || - pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) - return; - debugfs_parent = debugfs_create_dir("pstate_snb", NULL); if (IS_ERR_OR_NULL(debugfs_parent)) return; @@ -1105,11 +1110,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); - if (hwp_active) - intel_pstate_hwp_set_online_cpus(); - mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + return count; } @@ -1134,11 +1138,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); - if (hwp_active) - intel_pstate_hwp_set_online_cpus(); - mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + return count; } @@ -1163,11 +1166,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - if (hwp_active) - intel_pstate_hwp_set_online_cpus(); - mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + return count; } @@ -2153,8 +2155,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) if (per_cpu_limits) perf_limits = cpu->perf_limits; + mutex_lock(&intel_pstate_limits_lock); + intel_pstate_update_perf_limits(policy, perf_limits); + mutex_unlock(&intel_pstate_limits_lock); + return 0; } @@ -2487,7 +2493,10 @@ hwp_cpu_matched: if (rc) goto out; - intel_pstate_debug_expose_params(); + if (intel_pstate_driver == &intel_pstate && !hwp_active && + pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + intel_pstate_debug_expose_params(); + intel_pstate_sysfs_expose_params(); if (hwp_active) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 79564785ae30..bf7da55cffe6 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -339,7 +339,7 @@ config CRYPTO_DEV_OMAP_DES config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" - depends on ARCH_PICOXCELL && HAVE_CLK + depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK select CRYPTO_AEAD select CRYPTO_AES select CRYPTO_AUTHENC @@ -553,6 +553,22 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_MEDIATEK + tristate "MediaTek's EIP97 Cryptographic Engine driver" + depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST + select CRYPTO_AES + select CRYPTO_AEAD + select CRYPTO_BLKCIPHER + select CRYPTO_CTR + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + select CRYPTO_HMAC + help + This driver allows you to utilize the hardware crypto accelerator + EIP97 which can be found on the MT7623 MT2701, MT8521p, etc .... + Select this if you want to use it for AES/SHA1/SHA2 algorithms. + source "drivers/crypto/chelsio/Kconfig" source "drivers/crypto/virtio/Kconfig" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index bc53cb833a06..8891ccc5844c 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o @@ -10,7 +11,9 @@ obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o +obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ @@ -21,15 +24,13 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ +obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ +obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ +obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o -obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o +obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ -obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ -obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ -obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ -obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ -obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ -obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 10db7df366c8..a118b9bed669 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -203,7 +203,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -233,7 +233,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = dma_count; crc->sg_cpu[i].x_modify = dma_mod; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -257,7 +257,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); diff --git a/drivers/crypto/bfin_crc.h b/drivers/crypto/bfin_crc.h index 75cef4dc85a1..786ef746d109 100644 --- a/drivers/crypto/bfin_crc.h +++ b/drivers/crypto/bfin_crc.h @@ -55,7 +55,6 @@ struct crc_info { #include <linux/types.h> #include <linux/spinlock.h> -#include <linux/miscdevice.h> struct crc_register { u32 control; diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index a2e77b87485b..9b07f3d88feb 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -226,7 +226,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) struct dma_async_tx_descriptor *desc; struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); - ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); if (ctx->dma_ct == 0) { dev_err(hdev->dev, "Invalid DMA sg\n"); hdev->err = -EINVAL; @@ -241,7 +241,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) if (!desc) { dev_err(hdev->dev, "Null DMA descriptor\n"); hdev->err = -EINVAL; - dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); return -EINVAL; } desc->callback = img_hash_dma_callback; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index a768da7138a1..b7872f62f674 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -273,7 +273,8 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_SRC_IN_SRAM BIT(30) #define CESA_TDMA_END_OF_REQ BIT(29) #define CESA_TDMA_BREAK_CHAIN BIT(28) -#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) +#define CESA_TDMA_SET_STATE BIT(27) +#define CESA_TDMA_TYPE_MSK GENMASK(26, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 317cf029c0cf..77c0fb936f47 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) sreq->offset = 0; } +static void mv_cesa_ahash_dma_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *base = &creq->base; + + /* We must explicitly set the digest state. */ + if (base->chain.first->flags & CESA_TDMA_SET_STATE) { + struct mv_cesa_engine *engine = base->engine; + int i; + + /* Set the hash state in the IVDIG regs. */ + for (i = 0; i < ARRAY_SIZE(creq->state); i++) + writel_relaxed(creq->state[i], engine->regs + + CESA_IVDIG(i)); + } + + mv_cesa_dma_step(base); +} + static void mv_cesa_ahash_step(struct crypto_async_request *req) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->base); + mv_cesa_ahash_dma_step(ahashreq); else mv_cesa_ahash_std_step(ahashreq); } @@ -584,12 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; + bool set_state = false; int ret; u32 type; basereq->chain.first = NULL; basereq->chain.last = NULL; + if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl)) + set_state = true; + if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); @@ -683,6 +706,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (type != CESA_TDMA_RESULT) basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; + if (set_state) { + /* + * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to + * let the step logic know that the IVDIG registers should be + * explicitly set before launching a TDMA chain. + */ + basereq->chain.first->flags |= CESA_TDMA_SET_STATE; + } + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4416b88eca70..c76375ff376d 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, last->next = dreq->chain.first; engine->chain.last = dreq->chain.last; - if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain, or if the request + * being queued needs the IV regs to be set before lauching + * the request. + */ + if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && + !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) last->next_dma = dreq->chain.first->cur_dma; } } diff --git a/drivers/crypto/mediatek/Makefile b/drivers/crypto/mediatek/Makefile new file mode 100644 index 000000000000..187be79c7f3e --- /dev/null +++ b/drivers/crypto/mediatek/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mtk-crypto.o +mtk-crypto-objs:= mtk-platform.o mtk-aes.o mtk-sha.o diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c new file mode 100644 index 000000000000..3a47cdb8f0c8 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -0,0 +1,1299 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 AES acceleration. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-aes.c drivers. + */ + +#include <crypto/aes.h> +#include "mtk-platform.h" + +#define AES_QUEUE_SIZE 512 +#define AES_BUF_ORDER 2 +#define AES_BUF_SIZE ((PAGE_SIZE << AES_BUF_ORDER) \ + & ~(AES_BLOCK_SIZE - 1)) + +/* AES command token size */ +#define AES_CT_SIZE_ECB 2 +#define AES_CT_SIZE_CBC 3 +#define AES_CT_SIZE_CTR 3 +#define AES_CT_SIZE_GCM_OUT 5 +#define AES_CT_SIZE_GCM_IN 6 +#define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) + +/* AES-CBC/ECB/CTR command token */ +#define AES_CMD0 cpu_to_le32(0x05000000) +#define AES_CMD1 cpu_to_le32(0x2d060000) +#define AES_CMD2 cpu_to_le32(0xe4a63806) +/* AES-GCM command token */ +#define AES_GCM_CMD0 cpu_to_le32(0x0b000000) +#define AES_GCM_CMD1 cpu_to_le32(0xa0800000) +#define AES_GCM_CMD2 cpu_to_le32(0x25000010) +#define AES_GCM_CMD3 cpu_to_le32(0x0f020000) +#define AES_GCM_CMD4 cpu_to_le32(0x21e60000) +#define AES_GCM_CMD5 cpu_to_le32(0x40e60000) +#define AES_GCM_CMD6 cpu_to_le32(0xd0070000) + +/* AES transform information word 0 fields */ +#define AES_TFM_BASIC_OUT cpu_to_le32(0x4 << 0) +#define AES_TFM_BASIC_IN cpu_to_le32(0x5 << 0) +#define AES_TFM_GCM_OUT cpu_to_le32(0x6 << 0) +#define AES_TFM_GCM_IN cpu_to_le32(0xf << 0) +#define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define AES_TFM_128BITS cpu_to_le32(0xb << 16) +#define AES_TFM_192BITS cpu_to_le32(0xd << 16) +#define AES_TFM_256BITS cpu_to_le32(0xf << 16) +/* AES transform information word 1 fields */ +#define AES_TFM_ECB cpu_to_le32(0x0 << 0) +#define AES_TFM_CBC cpu_to_le32(0x1 << 0) +#define AES_TFM_CTR_INIT cpu_to_le32(0x2 << 0) /* init counter to 1 */ +#define AES_TFM_CTR_LOAD cpu_to_le32(0x6 << 0) /* load/reuse counter */ +#define AES_TFM_3IV cpu_to_le32(0x7 << 5) /* using IV 0-2 */ +#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* using IV 0-3 */ +#define AES_TFM_IV_CTR_MODE cpu_to_le32(0x1 << 10) +#define AES_TFM_ENC_HASH cpu_to_le32(0x1 << 17) +#define AES_TFM_GHASH_DIG cpu_to_le32(0x2 << 21) +#define AES_TFM_GHASH cpu_to_le32(0x4 << 23) + +/* AES flags */ +#define AES_FLAGS_ECB BIT(0) +#define AES_FLAGS_CBC BIT(1) +#define AES_FLAGS_CTR BIT(2) +#define AES_FLAGS_GCM BIT(3) +#define AES_FLAGS_ENCRYPT BIT(4) +#define AES_FLAGS_BUSY BIT(5) + +/** + * Command token(CT) is a set of hardware instructions that + * are used to control engine's processing flow of AES. + * + * Transform information(TFM) is used to define AES state and + * contains all keys and initial vectors. + * + * The engine requires CT and TFM to do: + * - Commands decoding and control of the engine's data path. + * - Coordinating hardware data fetch and store operations. + * - Result token construction and output. + * + * Memory map of GCM's TFM: + * /-----------\ + * | AES KEY | 128/196/256 bits + * |-----------| + * | HASH KEY | a string 128 zero bits encrypted using the block cipher + * |-----------| + * | IVs | 4 * 4 bytes + * \-----------/ + */ +struct mtk_aes_ct { + __le32 cmd[AES_CT_SIZE_GCM_IN]; +}; + +struct mtk_aes_tfm { + __le32 ctrl[2]; + __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE * 2)]; +}; + +struct mtk_aes_reqctx { + u64 mode; +}; + +struct mtk_aes_base_ctx { + struct mtk_cryp *cryp; + u32 keylen; + mtk_aes_fn start; + + struct mtk_aes_ct ct; + dma_addr_t ct_dma; + struct mtk_aes_tfm tfm; + dma_addr_t tfm_dma; + + __le32 ct_hdr; + u32 ct_size; +}; + +struct mtk_aes_ctx { + struct mtk_aes_base_ctx base; +}; + +struct mtk_aes_ctr_ctx { + struct mtk_aes_base_ctx base; + + u32 iv[AES_BLOCK_SIZE / sizeof(u32)]; + size_t offset; + struct scatterlist src[2]; + struct scatterlist dst[2]; +}; + +struct mtk_aes_gcm_ctx { + struct mtk_aes_base_ctx base; + + u32 authsize; + size_t textlen; + + struct crypto_skcipher *ctr; +}; + +struct mtk_aes_gcm_setkey_result { + int err; + struct completion completion; +}; + +struct mtk_aes_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_aes_drv mtk_aes = { + .dev_list = LIST_HEAD_INIT(mtk_aes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_aes.lock), +}; + +static inline u32 mtk_aes_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_aes_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_base_ctx *ctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_aes.lock); + if (!ctx->cryp) { + list_for_each_entry(tmp, &mtk_aes.dev_list, aes_list) { + cryp = tmp; + break; + } + ctx->cryp = cryp; + } else { + cryp = ctx->cryp; + } + spin_unlock_bh(&mtk_aes.lock); + + return cryp; +} + +static inline size_t mtk_aes_padlen(size_t len) +{ + len &= AES_BLOCK_SIZE - 1; + return len ? AES_BLOCK_SIZE - len : 0; +} + +static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len, + struct mtk_aes_dma *dma) +{ + int nents; + + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + for (nents = 0; sg; sg = sg_next(sg), ++nents) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return false; + + if (len <= sg->length) { + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + dma->nents = nents + 1; + dma->remainder = sg->length - len; + sg->length = len; + return true; + } + + if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) + return false; + + len -= sg->length; + } + + return false; +} + +static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes, + const struct mtk_aes_reqctx *rctx) +{ + /* Clear all but persistent flags and set request flags. */ + aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode; +} + +static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) +{ + struct scatterlist *sg = dma->sg; + int nents = dma->nents; + + if (!dma->remainder) + return; + + while (--nents > 0 && sg) + sg = sg_next(sg); + + if (!sg) + return; + + sg->length += dma->remainder; +} + +/* + * Write descriptors for processing. This will configure the engine, load + * the transform information and then start the packet processing. + */ +static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_ring *ring = cryp->ring[aes->id]; + struct mtk_desc *cmd = NULL, *res = NULL; + struct scatterlist *ssg = aes->src.sg, *dsg = aes->dst.sg; + u32 slen = aes->src.sg_len, dlen = aes->dst.sg_len; + int nents; + + /* Write command descriptors */ + for (nents = 0; nents < slen; ++nents, ssg = sg_next(ssg)) { + cmd = ring->cmd_base + ring->cmd_pos; + cmd->hdr = MTK_DESC_BUF_LEN(ssg->length); + cmd->buf = cpu_to_le32(sg_dma_address(ssg)); + + if (nents == 0) { + cmd->hdr |= MTK_DESC_FIRST | + MTK_DESC_CT_LEN(aes->ctx->ct_size); + cmd->ct = cpu_to_le32(aes->ctx->ct_dma); + cmd->ct_hdr = aes->ctx->ct_hdr; + cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma); + } + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + } + cmd->hdr |= MTK_DESC_LAST; + + /* Prepare result descriptors */ + for (nents = 0; nents < dlen; ++nents, dsg = sg_next(dsg)) { + res = ring->res_base + ring->res_pos; + res->hdr = MTK_DESC_BUF_LEN(dsg->length); + res->buf = cpu_to_le32(sg_dma_address(dsg)); + + if (nents == 0) + res->hdr |= MTK_DESC_FIRST; + + if (++ring->res_pos == MTK_DESC_NUM) + ring->res_pos = 0; + } + res->hdr |= MTK_DESC_LAST; + + /* Prepare enough space for authenticated tag */ + if (aes->flags & AES_FLAGS_GCM) + res->hdr += AES_BLOCK_SIZE; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(dlen)); + mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(slen)); + + return -EINPROGRESS; +} + +static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); + + if (aes->src.sg == aes->dst.sg) { + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_BIDIRECTIONAL); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } else { + dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents, + DMA_FROM_DEVICE); + + if (aes->dst.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->dst); + + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } + + if (aes->dst.sg == &aes->aligned_sg) + sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst), + aes->buf, aes->total); +} + +static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) + return -EINVAL; + + ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) + goto tfm_map_err; + + if (aes->src.sg == aes->dst.sg) { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, + DMA_BIDIRECTIONAL); + aes->dst.sg_len = aes->src.sg_len; + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + } else { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + + aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, + aes->dst.nents, DMA_FROM_DEVICE); + if (unlikely(!aes->dst.sg_len)) { + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); + goto sg_map_err; + } + } + + return mtk_aes_xmit(cryp, aes); + +sg_map_err: + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); +tfm_map_err: + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + + return -EINVAL; +} + +/* Initialize transform information of CBC/ECB/CTR mode */ +static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + size_t len) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); + ctx->ct.cmd[1] = AES_CMD1; + + if (aes->flags & AES_FLAGS_ENCRYPT) + ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT; + else + ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN; + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + if (aes->flags & AES_FLAGS_CBC) { + const u32 *iv = (const u32 *)req->info; + u32 *iv_state = ctx->tfm.state + ctx->keylen; + int i; + + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV; + + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(iv[i]); + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CBC; + } else if (aes->flags & AES_FLAGS_ECB) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen); + ctx->tfm.ctrl[1] = AES_TFM_ECB; + + ctx->ct_size = AES_CT_SIZE_ECB; + } else if (aes->flags & AES_FLAGS_CTR) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_LOAD | AES_TFM_FULL_IV; + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CTR; + } +} + +static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) +{ + size_t padlen = 0; + bool src_aligned, dst_aligned; + + aes->total = len; + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + padlen = mtk_aes_padlen(len); + + if (len + padlen > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen); + } + + mtk_aes_info_init(cryp, aes, len + padlen); + + return mtk_aes_map(cryp, aes); +} + +static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, + struct crypto_async_request *new_areq) +{ + struct mtk_aes_rec *aes = cryp->aes[id]; + struct crypto_async_request *areq, *backlog; + struct mtk_aes_base_ctx *ctx; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&aes->lock, flags); + if (new_areq) + ret = crypto_enqueue_request(&aes->queue, new_areq); + if (aes->flags & AES_FLAGS_BUSY) { + spin_unlock_irqrestore(&aes->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&aes->queue); + areq = crypto_dequeue_request(&aes->queue); + if (areq) + aes->flags |= AES_FLAGS_BUSY; + spin_unlock_irqrestore(&aes->lock, flags); + + if (!areq) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + ctx = crypto_tfm_ctx(areq->tfm); + + aes->areq = areq; + aes->ctx = ctx; + + return ctx->start(cryp, aes); +} + +static int mtk_aes_complete(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + aes->flags &= ~AES_FLAGS_BUSY; + aes->areq->complete(aes->areq, 0); + + /* Handle new request */ + return mtk_aes_handle_queue(cryp, aes->id, NULL); +} + +static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + mtk_aes_set_mode(aes, rctx); + aes->resume = mtk_aes_complete; + + return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes); +} + +static inline struct mtk_aes_ctr_ctx * +mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_ctr_ctx, base); +} + +static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct scatterlist *src, *dst; + int i; + u32 start, end, ctr, blocks, *iv_state; + size_t datalen; + bool fragmented = false; + + /* Check for transfer completion. */ + cctx->offset += aes->total; + if (cctx->offset >= req->nbytes) + return mtk_aes_complete(cryp, aes); + + /* Compute data length. */ + datalen = req->nbytes - cctx->offset; + blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); + ctr = be32_to_cpu(cctx->iv[3]); + + /* Check 32bit counter overflow. */ + start = ctr; + end = start + blocks - 1; + if (end < start) { + ctr |= 0xffffffff; + datalen = AES_BLOCK_SIZE * -start; + fragmented = true; + } + + /* Jump to offset. */ + src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset); + dst = ((req->src == req->dst) ? src : + scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset)); + + /* Write IVs into transform state buffer. */ + iv_state = ctx->tfm.state + ctx->keylen; + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(cctx->iv[i]); + + if (unlikely(fragmented)) { + /* + * Increment the counter manually to cope with the hardware + * counter overflow. + */ + cctx->iv[3] = cpu_to_be32(ctr); + crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE); + } + aes->resume = mtk_aes_ctr_transfer; + + return mtk_aes_dma(cryp, aes, src, dst, datalen); +} + +static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + mtk_aes_set_mode(aes, rctx); + + memcpy(cctx->iv, req->info, AES_BLOCK_SIZE); + cctx->offset = 0; + aes->total = 0; + + return mtk_aes_ctr_transfer(cryp, aes); +} + +/* Check and set the AES key to transform state buffer */ +static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, + const u8 *key, u32 keylen) +{ + struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm); + const u32 *aes_key = (const u32 *)key; + u32 *key_state = ctx->tfm.state; + int i; + + if (keylen != AES_KEYSIZE_128 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_256) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + key_state[i] = cpu_to_le32(aes_key[i]); + + return 0; +} + +static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode) +{ + struct mtk_aes_base_ctx *ctx; + struct mtk_aes_reqctx *rctx; + + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx = ablkcipher_request_ctx(req); + rctx->mode = mode; + + return mtk_aes_handle_queue(ctx->cryp, !(mode & AES_FLAGS_ENCRYPT), + &req->base); +} + +static int mtk_aes_ecb_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB); +} + +static int mtk_aes_ecb_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ECB); +} + +static int mtk_aes_cbc_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); +} + +static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CBC); +} + +static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR); +} + +static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CTR); +} + +static int mtk_aes_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_start; + return 0; +} + +static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_ctr_start; + return 0; +} + +static struct crypto_alg aes_algs[] = { +{ + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_cbc_encrypt, + .decrypt = mtk_aes_cbc_decrypt, + .ivsize = AES_BLOCK_SIZE, + } +}, +{ + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ecb_encrypt, + .decrypt = mtk_aes_ecb_decrypt, + } +}, +{ + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_ctr_cra_init, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_ctr_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ctr_encrypt, + .decrypt = mtk_aes_ctr_decrypt, + } +}, +}; + +static inline struct mtk_aes_gcm_ctx * +mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_gcm_ctx, base); +} + +/* Initialize transform information of GCM mode */ +static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp, + struct mtk_aes_rec *aes, + size_t len) +{ + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + const u32 *iv = (const u32 *)req->iv; + u32 *iv_state = ctx->tfm.state + ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE); + u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req)); + int i; + + ctx->ct_hdr = AES_CT_CTRL_HDR | len; + + ctx->ct.cmd[0] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[1] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[2] = AES_GCM_CMD2; + ctx->ct.cmd[3] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + ctx->ct.cmd[4] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_OUT; + ctx->tfm.ctrl[0] = AES_TFM_GCM_OUT; + } else { + ctx->ct.cmd[4] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize); + ctx->ct.cmd[5] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_IN; + ctx->tfm.ctrl[0] = AES_TFM_GCM_IN; + } + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + ctx->tfm.ctrl[0] |= AES_TFM_GHASH_DIG | AES_TFM_GHASH | + AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS( + AES_BLOCK_SIZE + ivsize)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE | + AES_TFM_3IV | AES_TFM_ENC_HASH; + + for (i = 0; i < SIZE_IN_WORDS(ivsize); i++) + iv_state[i] = cpu_to_le32(iv[i]); +} + +static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) +{ + bool src_aligned, dst_aligned; + + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + if (aes->total > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, aes->total); + } + + mtk_aes_gcm_info_init(cryp, aes, len); + + return mtk_aes_map(cryp, aes); +} + +/* Todo: GMAC */ +static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx); + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + u32 len = req->assoclen + req->cryptlen; + + mtk_aes_set_mode(aes, rctx); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + u32 tag[4]; + /* Compute total process length. */ + aes->total = len + gctx->authsize; + /* Compute text length. */ + gctx->textlen = req->cryptlen; + /* Hardware will append authenticated tag to output buffer */ + scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1); + } else { + aes->total = len; + gctx->textlen = req->cryptlen - gctx->authsize; + } + aes->resume = mtk_aes_complete; + + return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len); +} + +static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + + rctx->mode = AES_FLAGS_GCM | mode; + + return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT), + &req->base); +} + +static void mtk_gcm_setkey_done(struct crypto_async_request *req, int err) +{ + struct mtk_aes_gcm_setkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + + result->err = err; + complete(&result->completion); +} + +/* + * Because of the hardware limitation, we need to pre-calculate key(H) + * for the GHASH operation. The result of the encryption operation + * need to be stored in the transform state buffer. + */ +static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, + u32 keylen) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + struct crypto_skcipher *ctr = gctx->ctr; + struct { + u32 hash[4]; + u8 iv[8]; + + struct mtk_aes_gcm_setkey_result result; + + struct scatterlist sg[1]; + struct skcipher_request req; + } *data; + const u32 *aes_key; + u32 *key_state, *hash_state; + int err, i; + + if (keylen != AES_KEYSIZE_256 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_128) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + key_state = ctx->tfm.state; + aes_key = (u32 *)key; + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + ctx->tfm.state[i] = cpu_to_le32(aes_key[i]); + + /* Same as crypto_gcm_setkey() from crypto/gcm.c */ + crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctr, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + init_completion(&data->result.completion); + sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE); + skcipher_request_set_tfm(&data->req, ctr); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + mtk_gcm_setkey_done, &data->result); + skcipher_request_set_crypt(&data->req, data->sg, data->sg, + AES_BLOCK_SIZE, data->iv); + + err = crypto_skcipher_encrypt(&data->req); + if (err == -EINPROGRESS || err == -EBUSY) { + err = wait_for_completion_interruptible( + &data->result.completion); + if (!err) + err = data->result.err; + } + if (err) + goto out; + + hash_state = key_state + ctx->keylen; + + for (i = 0; i < 4; i++) + hash_state[i] = cpu_to_be32(data->hash[i]); +out: + kzfree(data); + return err; +} + +static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead, + u32 authsize) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + + /* Same as crypto_gcm_authsize() from crypto/gcm.c */ + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + + gctx->authsize = authsize; + return 0; +} + +static int mtk_aes_gcm_encrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT); +} + +static int mtk_aes_gcm_decrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, 0); +} + +static int mtk_aes_gcm_init(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->ctr)) { + pr_err("Error allocating ctr(aes)\n"); + return PTR_ERR(ctx->ctr); + } + + crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx)); + ctx->base.start = mtk_aes_gcm_start; + return 0; +} + +static void mtk_aes_gcm_exit(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + + crypto_free_skcipher(ctx->ctr); +} + +static struct aead_alg aes_gcm_alg = { + .setkey = mtk_aes_gcm_setkey, + .setauthsize = mtk_aes_gcm_setauthsize, + .encrypt = mtk_aes_gcm_encrypt, + .decrypt = mtk_aes_gcm_decrypt, + .init = mtk_aes_gcm_init, + .exit = mtk_aes_gcm_exit, + .ivsize = 12, + .maxauthsize = AES_BLOCK_SIZE, + + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_gcm_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}; + +static void mtk_aes_enc_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[0]; + + mtk_aes_unmap(cryp, aes); + aes->resume(cryp, aes); +} + +static void mtk_aes_dec_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[1]; + + mtk_aes_unmap(cryp, aes); + aes->resume(cryp, aes); +} + +static irqreturn_t mtk_aes_enc_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[0]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING0)); + + mtk_aes_write(cryp, RDR_STAT(RING0), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING0), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING0), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_aes_dec_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[1]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING1)); + + mtk_aes_write(cryp, RDR_STAT(RING1), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING1), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING1), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of creating encryption and decryption records is + * to process outbound/inbound data in parallel, it can improve + * performance in most use cases, such as IPSec VPN, especially + * under heavy network traffic. + */ +static int mtk_aes_record_init(struct mtk_cryp *cryp) +{ + struct mtk_aes_rec **aes = cryp->aes; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + aes[i] = kzalloc(sizeof(**aes), GFP_KERNEL); + if (!aes[i]) + goto err_cleanup; + + aes[i]->buf = (void *)__get_free_pages(GFP_KERNEL, + AES_BUF_ORDER); + if (!aes[i]->buf) + goto err_cleanup; + + aes[i]->id = i; + + spin_lock_init(&aes[i]->lock); + crypto_init_queue(&aes[i]->queue, AES_QUEUE_SIZE); + } + + tasklet_init(&aes[0]->task, mtk_aes_enc_task, (unsigned long)cryp); + tasklet_init(&aes[1]->task, mtk_aes_dec_task, (unsigned long)cryp); + + return 0; + +err_cleanup: + for (; i--; ) { + free_page((unsigned long)aes[i]->buf); + kfree(aes[i]); + } + + return err; +} + +static void mtk_aes_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->aes[i]->task); + free_page((unsigned long)cryp->aes[i]->buf); + kfree(cryp->aes[i]); + } +} + +static void mtk_aes_unregister_algs(void) +{ + int i; + + crypto_unregister_aead(&aes_gcm_alg); + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) + crypto_unregister_alg(&aes_algs[i]); +} + +static int mtk_aes_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + err = crypto_register_alg(&aes_algs[i]); + if (err) + goto err_aes_algs; + } + + err = crypto_register_aead(&aes_gcm_alg); + if (err) + goto err_aes_algs; + + return 0; + +err_aes_algs: + for (; i--; ) + crypto_unregister_alg(&aes_algs[i]); + + return err; +} + +int mtk_cipher_alg_register(struct mtk_cryp *cryp) +{ + int ret; + + INIT_LIST_HEAD(&cryp->aes_list); + + /* Initialize two cipher records */ + ret = mtk_aes_record_init(cryp); + if (ret) + goto err_record; + + /* Ring0 is use by encryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING0], mtk_aes_enc_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES encryption irq.\n"); + goto err_res; + } + + /* Ring1 is use by decryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING1], mtk_aes_dec_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES decryption irq.\n"); + goto err_res; + } + + /* Enable ring0 and ring1 interrupt */ + mtk_aes_write(cryp, AIC_ENABLE_SET(RING0), MTK_IRQ_RDR0); + mtk_aes_write(cryp, AIC_ENABLE_SET(RING1), MTK_IRQ_RDR1); + + spin_lock(&mtk_aes.lock); + list_add_tail(&cryp->aes_list, &mtk_aes.dev_list); + spin_unlock(&mtk_aes.lock); + + ret = mtk_aes_register_algs(); + if (ret) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); +err_res: + mtk_aes_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-aes initialization failed.\n"); + return ret; +} + +void mtk_cipher_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); + + mtk_aes_unregister_algs(); + mtk_aes_record_free(cryp); +} diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c new file mode 100644 index 000000000000..a9c713d4c733 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.c @@ -0,0 +1,604 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include "mtk-platform.h" + +#define MTK_BURST_SIZE_MSK GENMASK(7, 4) +#define MTK_BURST_SIZE(x) ((x) << 4) +#define MTK_DESC_SIZE(x) ((x) << 0) +#define MTK_DESC_OFFSET(x) ((x) << 16) +#define MTK_DESC_FETCH_SIZE(x) ((x) << 0) +#define MTK_DESC_FETCH_THRESH(x) ((x) << 16) +#define MTK_DESC_OVL_IRQ_EN BIT(25) +#define MTK_DESC_ATP_PRESENT BIT(30) + +#define MTK_DFSE_IDLE GENMASK(3, 0) +#define MTK_DFSE_THR_CTRL_EN BIT(30) +#define MTK_DFSE_THR_CTRL_RESET BIT(31) +#define MTK_DFSE_RING_ID(x) (((x) >> 12) & GENMASK(3, 0)) +#define MTK_DFSE_MIN_DATA(x) ((x) << 0) +#define MTK_DFSE_MAX_DATA(x) ((x) << 8) +#define MTK_DFE_MIN_CTRL(x) ((x) << 16) +#define MTK_DFE_MAX_CTRL(x) ((x) << 24) + +#define MTK_IN_BUF_MIN_THRESH(x) ((x) << 8) +#define MTK_IN_BUF_MAX_THRESH(x) ((x) << 12) +#define MTK_OUT_BUF_MIN_THRESH(x) ((x) << 0) +#define MTK_OUT_BUF_MAX_THRESH(x) ((x) << 4) +#define MTK_IN_TBUF_SIZE(x) (((x) >> 4) & GENMASK(3, 0)) +#define MTK_IN_DBUF_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_OUT_DBUF_SIZE(x) (((x) >> 16) & GENMASK(3, 0)) +#define MTK_CMD_FIFO_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_RES_FIFO_SIZE(x) (((x) >> 12) & GENMASK(3, 0)) + +#define MTK_PE_TK_LOC_AVL BIT(2) +#define MTK_PE_PROC_HELD BIT(14) +#define MTK_PE_TK_TIMEOUT_EN BIT(22) +#define MTK_PE_INPUT_DMA_ERR BIT(0) +#define MTK_PE_OUTPUT_DMA_ERR BIT(1) +#define MTK_PE_PKT_PORC_ERR BIT(2) +#define MTK_PE_PKT_TIMEOUT BIT(3) +#define MTK_PE_FATAL_ERR BIT(14) +#define MTK_PE_INPUT_DMA_ERR_EN BIT(16) +#define MTK_PE_OUTPUT_DMA_ERR_EN BIT(17) +#define MTK_PE_PKT_PORC_ERR_EN BIT(18) +#define MTK_PE_PKT_TIMEOUT_EN BIT(19) +#define MTK_PE_FATAL_ERR_EN BIT(30) +#define MTK_PE_INT_OUT_EN BIT(31) + +#define MTK_HIA_SIGNATURE ((u16)0x35ca) +#define MTK_HIA_DATA_WIDTH(x) (((x) >> 25) & GENMASK(1, 0)) +#define MTK_HIA_DMA_LENGTH(x) (((x) >> 20) & GENMASK(4, 0)) +#define MTK_CDR_STAT_CLR GENMASK(4, 0) +#define MTK_RDR_STAT_CLR GENMASK(7, 0) + +#define MTK_AIC_INT_MSK GENMASK(5, 0) +#define MTK_AIC_VER_MSK (GENMASK(15, 0) | GENMASK(27, 20)) +#define MTK_AIC_VER11 0x011036c9 +#define MTK_AIC_VER12 0x012036c9 +#define MTK_AIC_G_CLR GENMASK(30, 20) + +/** + * EIP97 is an integrated security subsystem to accelerate cryptographic + * functions and protocols to offload the host processor. + * Some important hardware modules are briefly introduced below: + * + * Host Interface Adapter(HIA) - the main interface between the host + * system and the hardware subsystem. It is responsible for attaching + * processing engine to the specific host bus interface and provides a + * standardized software view for off loading tasks to the engine. + * + * Command Descriptor Ring Manager(CDR Manager) - keeps track of how many + * CD the host has prepared in the CDR. It monitors the fill level of its + * CD-FIFO and if there's sufficient space for the next block of descriptors, + * then it fires off a DMA request to fetch a block of CDs. + * + * Data fetch engine(DFE) - It is responsible for parsing the CD and + * setting up the required control and packet data DMA transfers from + * system memory to the processing engine. + * + * Result Descriptor Ring Manager(RDR Manager) - same as CDR Manager, + * but target is result descriptors, Moreover, it also handles the RD + * updates under control of the DSE. For each packet data segment + * processed, the DSE triggers the RDR Manager to write the updated RD. + * If triggered to update, the RDR Manager sets up a DMA operation to + * copy the RD from the DSE to the correct location in the RDR. + * + * Data Store Engine(DSE) - It is responsible for parsing the prepared RD + * and setting up the required control and packet data DMA transfers from + * the processing engine to system memory. + * + * Advanced Interrupt Controllers(AICs) - receive interrupt request signals + * from various sources and combine them into one interrupt output. + * The AICs are used by: + * - One for the HIA global and processing engine interrupts. + * - The others for the descriptor ring interrupts. + */ + +/* Cryptographic engine capabilities */ +struct mtk_sys_cap { + /* host interface adapter */ + u32 hia_ver; + u32 hia_opt; + /* packet engine */ + u32 pkt_eng_opt; + /* global hardware */ + u32 hw_opt; +}; + +static void mtk_desc_ring_link(struct mtk_cryp *cryp, u32 mask) +{ + /* Assign rings to DFE/DSE thread and enable it */ + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DFE_THR_CTRL); + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DSE_THR_CTRL); +} + +static void mtk_dfe_dse_buf_setup(struct mtk_cryp *cryp, + struct mtk_sys_cap *cap) +{ + u32 width = MTK_HIA_DATA_WIDTH(cap->hia_opt) + 2; + u32 len = MTK_HIA_DMA_LENGTH(cap->hia_opt) - 1; + u32 ipbuf = min((u32)MTK_IN_DBUF_SIZE(cap->hw_opt) + width, len); + u32 opbuf = min((u32)MTK_OUT_DBUF_SIZE(cap->hw_opt) + width, len); + u32 itbuf = min((u32)MTK_IN_TBUF_SIZE(cap->hw_opt) + width, len); + + writel(MTK_DFSE_MIN_DATA(ipbuf - 1) | + MTK_DFSE_MAX_DATA(ipbuf) | + MTK_DFE_MIN_CTRL(itbuf - 1) | + MTK_DFE_MAX_CTRL(itbuf), + cryp->base + DFE_CFG); + + writel(MTK_DFSE_MIN_DATA(opbuf - 1) | + MTK_DFSE_MAX_DATA(opbuf), + cryp->base + DSE_CFG); + + writel(MTK_IN_BUF_MIN_THRESH(ipbuf - 1) | + MTK_IN_BUF_MAX_THRESH(ipbuf), + cryp->base + PE_IN_DBUF_THRESH); + + writel(MTK_IN_BUF_MIN_THRESH(itbuf - 1) | + MTK_IN_BUF_MAX_THRESH(itbuf), + cryp->base + PE_IN_TBUF_THRESH); + + writel(MTK_OUT_BUF_MIN_THRESH(opbuf - 1) | + MTK_OUT_BUF_MAX_THRESH(opbuf), + cryp->base + PE_OUT_DBUF_THRESH); + + writel(0, cryp->base + PE_OUT_TBUF_THRESH); + writel(0, cryp->base + PE_OUT_BUF_CTRL); +} + +static int mtk_dfe_dse_state_check(struct mtk_cryp *cryp) +{ + int ret = -EINVAL; + u32 val; + + /* Check for completion of all DMA transfers */ + val = readl(cryp->base + DFE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) { + val = readl(cryp->base + DSE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) + ret = 0; + } + + if (!ret) { + /* Take DFE/DSE thread out of reset */ + writel(0, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DSE_THR_CTRL); + } else { + return -EBUSY; + } + + return 0; +} + +static int mtk_dfe_dse_reset(struct mtk_cryp *cryp) +{ + int err; + + /* Reset DSE/DFE and correct system priorities for all rings. */ + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DFE_PRIO_0); + writel(0, cryp->base + DFE_PRIO_1); + writel(0, cryp->base + DFE_PRIO_2); + writel(0, cryp->base + DFE_PRIO_3); + + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DSE_THR_CTRL); + writel(0, cryp->base + DSE_PRIO_0); + writel(0, cryp->base + DSE_PRIO_1); + writel(0, cryp->base + DSE_PRIO_2); + writel(0, cryp->base + DSE_PRIO_3); + + err = mtk_dfe_dse_state_check(cryp); + if (err) + return err; + + return 0; +} + +static void mtk_cmd_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + /* Full descriptor that fits FIFO minus one */ + u32 count = + ((1 << MTK_CMD_FIFO_SIZE(cap->hia_opt)) / MTK_DESC_SZ) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + CDR_CFG(i)); + + /* Clear CDR count */ + writel(MTK_CNT_RST, cryp->base + CDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + CDR_PROC_COUNT(i)); + + writel(0, cryp->base + CDR_PREP_PNTR(i)); + writel(0, cryp->base + CDR_PROC_PNTR(i)); + writel(0, cryp->base + CDR_DMA_CFG(i)); + + /* Configure CDR host address space */ + writel(0, cryp->base + CDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->cmd_dma, cryp->base + CDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + CDR_RING_SIZE(i)); + + /* Clear and disable all CDR interrupts */ + writel(MTK_CDR_STAT_CLR, cryp->base + CDR_STAT(i)); + + /* + * Set command descriptor offset and enable additional + * token present in descriptor. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | + MTK_DESC_OFFSET(MTK_DESC_OFF) | + MTK_DESC_ATP_PRESENT, + cryp->base + CDR_DESC_SIZE(i)); + + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * MTK_DESC_SZ), + cryp->base + CDR_CFG(i)); +} + +static void mtk_res_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + u32 rndup = 2; + u32 count = ((1 << MTK_RES_FIFO_SIZE(cap->hia_opt)) / rndup) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + RDR_CFG(i)); + + /* Clear RDR count */ + writel(MTK_CNT_RST, cryp->base + RDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + RDR_PROC_COUNT(i)); + + writel(0, cryp->base + RDR_PREP_PNTR(i)); + writel(0, cryp->base + RDR_PROC_PNTR(i)); + writel(0, cryp->base + RDR_DMA_CFG(i)); + + /* Configure RDR host address space */ + writel(0, cryp->base + RDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->res_dma, cryp->base + RDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + RDR_RING_SIZE(i)); + writel(MTK_RDR_STAT_CLR, cryp->base + RDR_STAT(i)); + + /* + * RDR manager generates update interrupts on a per-completed-packet, + * and the rd_proc_thresh_irq interrupt is fired when proc_pkt_count + * for the RDR exceeds the number of packets. + */ + writel(MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE, + cryp->base + RDR_THRESH(i)); + + /* + * Configure a threshold and time-out value for the processed + * result descriptors (or complete packets) that are written to + * the RDR. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | MTK_DESC_OFFSET(MTK_DESC_OFF), + cryp->base + RDR_DESC_SIZE(i)); + + /* + * Configure HIA fetch size and fetch threshold that are used to + * fetch blocks of multiple descriptors. + */ + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * rndup) | + MTK_DESC_OVL_IRQ_EN, + cryp->base + RDR_CFG(i)); +} + +static int mtk_packet_engine_setup(struct mtk_cryp *cryp) +{ + struct mtk_sys_cap cap; + int i, err; + u32 val; + + cap.hia_ver = readl(cryp->base + HIA_VERSION); + cap.hia_opt = readl(cryp->base + HIA_OPTIONS); + cap.hw_opt = readl(cryp->base + EIP97_OPTIONS); + + if (!(((u16)cap.hia_ver) == MTK_HIA_SIGNATURE)) + return -EINVAL; + + /* Configure endianness conversion method for master (DMA) interface */ + writel(0, cryp->base + EIP97_MST_CTRL); + + /* Set HIA burst size */ + val = readl(cryp->base + HIA_MST_CTRL); + val &= ~MTK_BURST_SIZE_MSK; + val |= MTK_BURST_SIZE(5); + writel(val, cryp->base + HIA_MST_CTRL); + + err = mtk_dfe_dse_reset(cryp); + if (err) { + dev_err(cryp->dev, "Failed to reset DFE and DSE.\n"); + return err; + } + + mtk_dfe_dse_buf_setup(cryp, &cap); + + /* Enable the 4 rings for the packet engines. */ + mtk_desc_ring_link(cryp, 0xf); + + for (i = 0; i < RING_MAX; i++) { + mtk_cmd_desc_ring_setup(cryp, i, &cap); + mtk_res_desc_ring_setup(cryp, i, &cap); + } + + writel(MTK_PE_TK_LOC_AVL | MTK_PE_PROC_HELD | MTK_PE_TK_TIMEOUT_EN, + cryp->base + PE_TOKEN_CTRL_STAT); + + /* Clear all pending interrupts */ + writel(MTK_AIC_G_CLR, cryp->base + AIC_G_ACK); + writel(MTK_PE_INPUT_DMA_ERR | MTK_PE_OUTPUT_DMA_ERR | + MTK_PE_PKT_PORC_ERR | MTK_PE_PKT_TIMEOUT | + MTK_PE_FATAL_ERR | MTK_PE_INPUT_DMA_ERR_EN | + MTK_PE_OUTPUT_DMA_ERR_EN | MTK_PE_PKT_PORC_ERR_EN | + MTK_PE_PKT_TIMEOUT_EN | MTK_PE_FATAL_ERR_EN | + MTK_PE_INT_OUT_EN, + cryp->base + PE_INTERRUPT_CTRL_STAT); + + return 0; +} + +static int mtk_aic_cap_check(struct mtk_cryp *cryp, int hw) +{ + u32 val; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_VERSION); + else + val = readl(cryp->base + AIC_VERSION(hw)); + + val &= MTK_AIC_VER_MSK; + if (val != MTK_AIC_VER11 && val != MTK_AIC_VER12) + return -ENXIO; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_OPTIONS); + else + val = readl(cryp->base + AIC_OPTIONS(hw)); + + val &= MTK_AIC_INT_MSK; + if (!val || val > 32) + return -ENXIO; + + return 0; +} + +static int mtk_aic_init(struct mtk_cryp *cryp, int hw) +{ + int err; + + err = mtk_aic_cap_check(cryp, hw); + if (err) + return err; + + /* Disable all interrupts and set initial configuration */ + if (hw == RING_MAX) { + writel(0, cryp->base + AIC_G_ENABLE_CTRL); + writel(0, cryp->base + AIC_G_POL_CTRL); + writel(0, cryp->base + AIC_G_TYPE_CTRL); + writel(0, cryp->base + AIC_G_ENABLE_SET); + } else { + writel(0, cryp->base + AIC_ENABLE_CTRL(hw)); + writel(0, cryp->base + AIC_POL_CTRL(hw)); + writel(0, cryp->base + AIC_TYPE_CTRL(hw)); + writel(0, cryp->base + AIC_ENABLE_SET(hw)); + } + + return 0; +} + +static int mtk_accelerator_init(struct mtk_cryp *cryp) +{ + int i, err; + + /* Initialize advanced interrupt controller(AIC) */ + for (i = 0; i < MTK_IRQ_NUM; i++) { + err = mtk_aic_init(cryp, i); + if (err) { + dev_err(cryp->dev, "Failed to initialize AIC.\n"); + return err; + } + } + + /* Initialize packet engine */ + err = mtk_packet_engine_setup(cryp); + if (err) { + dev_err(cryp->dev, "Failed to configure packet engine.\n"); + return err; + } + + return 0; +} + +static void mtk_desc_dma_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < RING_MAX; i++) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->res_base, + cryp->ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->cmd_base, + cryp->ring[i]->cmd_dma); + kfree(cryp->ring[i]); + } +} + +static int mtk_desc_ring_alloc(struct mtk_cryp *cryp) +{ + struct mtk_ring **ring = cryp->ring; + int i, err = ENOMEM; + + for (i = 0; i < RING_MAX; i++) { + ring[i] = kzalloc(sizeof(**ring), GFP_KERNEL); + if (!ring[i]) + goto err_cleanup; + + ring[i]->cmd_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->cmd_dma, + GFP_KERNEL); + if (!ring[i]->cmd_base) + goto err_cleanup; + + ring[i]->res_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->res_dma, + GFP_KERNEL); + if (!ring[i]->res_base) + goto err_cleanup; + } + return 0; + +err_cleanup: + for (; i--; ) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->res_base, ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->cmd_base, ring[i]->cmd_dma); + kfree(ring[i]); + } + return err; +} + +static int mtk_crypto_probe(struct platform_device *pdev) +{ + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct mtk_cryp *cryp; + int i, err; + + cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); + if (!cryp) + return -ENOMEM; + + cryp->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cryp->base)) + return PTR_ERR(cryp->base); + + for (i = 0; i < MTK_IRQ_NUM; i++) { + cryp->irq[i] = platform_get_irq(pdev, i); + if (cryp->irq[i] < 0) { + dev_err(cryp->dev, "no IRQ:%d resource info\n", i); + return -ENXIO; + } + } + + cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif"); + cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp"); + if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp)) + return -EPROBE_DEFER; + + cryp->dev = &pdev->dev; + pm_runtime_enable(cryp->dev); + pm_runtime_get_sync(cryp->dev); + + err = clk_prepare_enable(cryp->clk_ethif); + if (err) + goto err_clk_ethif; + + err = clk_prepare_enable(cryp->clk_cryp); + if (err) + goto err_clk_cryp; + + /* Allocate four command/result descriptor rings */ + err = mtk_desc_ring_alloc(cryp); + if (err) { + dev_err(cryp->dev, "Unable to allocate descriptor rings.\n"); + goto err_resource; + } + + /* Initialize hardware modules */ + err = mtk_accelerator_init(cryp); + if (err) { + dev_err(cryp->dev, "Failed to initialize cryptographic engine.\n"); + goto err_engine; + } + + err = mtk_cipher_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register cipher algorithm.\n"); + goto err_cipher; + } + + err = mtk_hash_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register hash algorithm.\n"); + goto err_hash; + } + + platform_set_drvdata(pdev, cryp); + return 0; + +err_hash: + mtk_cipher_alg_release(cryp); +err_cipher: + mtk_dfe_dse_reset(cryp); +err_engine: + mtk_desc_dma_free(cryp); +err_resource: + clk_disable_unprepare(cryp->clk_cryp); +err_clk_cryp: + clk_disable_unprepare(cryp->clk_ethif); +err_clk_ethif: + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + + return err; +} + +static int mtk_crypto_remove(struct platform_device *pdev) +{ + struct mtk_cryp *cryp = platform_get_drvdata(pdev); + + mtk_hash_alg_release(cryp); + mtk_cipher_alg_release(cryp); + mtk_desc_dma_free(cryp); + + clk_disable_unprepare(cryp->clk_cryp); + clk_disable_unprepare(cryp->clk_ethif); + + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static const struct of_device_id of_crypto_id[] = { + { .compatible = "mediatek,eip97-crypto" }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_crypto_id); + +static struct platform_driver mtk_crypto_driver = { + .probe = mtk_crypto_probe, + .remove = mtk_crypto_remove, + .driver = { + .name = "mtk-crypto", + .owner = THIS_MODULE, + .of_match_table = of_crypto_id, + }, +}; +module_platform_driver(mtk_crypto_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ryder Lee <ryder.lee@mediatek.com>"); +MODULE_DESCRIPTION("Cryptographic accelerator driver for EIP97"); diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h new file mode 100644 index 000000000000..ed6d8717f7f4 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -0,0 +1,231 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MTK_PLATFORM_H_ +#define __MTK_PLATFORM_H_ + +#include <crypto/algapi.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/hash.h> +#include <crypto/scatterwalk.h> +#include <crypto/skcipher.h> +#include <linux/crypto.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/scatterlist.h> +#include "mtk-regs.h" + +#define MTK_RDR_PROC_THRESH BIT(0) +#define MTK_RDR_PROC_MODE BIT(23) +#define MTK_CNT_RST BIT(31) +#define MTK_IRQ_RDR0 BIT(1) +#define MTK_IRQ_RDR1 BIT(3) +#define MTK_IRQ_RDR2 BIT(5) +#define MTK_IRQ_RDR3 BIT(7) + +#define SIZE_IN_WORDS(x) ((x) >> 2) + +/** + * Ring 0/1 are used by AES encrypt and decrypt. + * Ring 2/3 are used by SHA. + */ +enum { + RING0 = 0, + RING1, + RING2, + RING3, + RING_MAX, +}; + +#define MTK_REC_NUM (RING_MAX / 2) +#define MTK_IRQ_NUM 5 + +/** + * struct mtk_desc - DMA descriptor + * @hdr: the descriptor control header + * @buf: DMA address of input buffer segment + * @ct: DMA address of command token that control operation flow + * @ct_hdr: the command token control header + * @tag: the user-defined field + * @tfm: DMA address of transform state + * @bound: align descriptors offset boundary + * + * Structure passed to the crypto engine to describe where source + * data needs to be fetched and how it needs to be processed. + */ +struct mtk_desc { + __le32 hdr; + __le32 buf; + __le32 ct; + __le32 ct_hdr; + __le32 tag; + __le32 tfm; + __le32 bound[2]; +}; + +#define MTK_DESC_NUM 512 +#define MTK_DESC_OFF SIZE_IN_WORDS(sizeof(struct mtk_desc)) +#define MTK_DESC_SZ (MTK_DESC_OFF - 2) +#define MTK_DESC_RING_SZ ((sizeof(struct mtk_desc) * MTK_DESC_NUM)) +#define MTK_DESC_CNT(x) ((MTK_DESC_OFF * (x)) << 2) +#define MTK_DESC_LAST cpu_to_le32(BIT(22)) +#define MTK_DESC_FIRST cpu_to_le32(BIT(23)) +#define MTK_DESC_BUF_LEN(x) cpu_to_le32(x) +#define MTK_DESC_CT_LEN(x) cpu_to_le32((x) << 24) + +/** + * struct mtk_ring - Descriptor ring + * @cmd_base: pointer to command descriptor ring base + * @cmd_dma: DMA address of command descriptor ring + * @cmd_pos: current position in the command descriptor ring + * @res_base: pointer to result descriptor ring base + * @res_dma: DMA address of result descriptor ring + * @res_pos: current position in the result descriptor ring + * + * A descriptor ring is a circular buffer that is used to manage + * one or more descriptors. There are two type of descriptor rings; + * the command descriptor ring and result descriptor ring. + */ +struct mtk_ring { + struct mtk_desc *cmd_base; + dma_addr_t cmd_dma; + u32 cmd_pos; + struct mtk_desc *res_base; + dma_addr_t res_dma; + u32 res_pos; +}; + +/** + * struct mtk_aes_dma - Structure that holds sg list info + * @sg: pointer to scatter-gather list + * @nents: number of entries in the sg list + * @remainder: remainder of sg list + * @sg_len: number of entries in the sg mapped list + */ +struct mtk_aes_dma { + struct scatterlist *sg; + int nents; + u32 remainder; + u32 sg_len; +}; + +struct mtk_aes_base_ctx; +struct mtk_aes_rec; +struct mtk_cryp; + +typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); + +/** + * struct mtk_aes_rec - AES operation record + * @queue: crypto request queue + * @areq: pointer to async request + * @task: the tasklet is use in AES interrupt + * @ctx: pointer to current context + * @src: the structure that holds source sg list info + * @dst: the structure that holds destination sg list info + * @aligned_sg: the scatter list is use to alignment + * @real_dst: pointer to the destination sg list + * @resume: pointer to resume function + * @total: request buffer length + * @buf: pointer to page buffer + * @id: record identification + * @flags: it's describing AES operation state + * @lock: the async queue lock + * + * Structure used to record AES execution state. + */ +struct mtk_aes_rec { + struct crypto_queue queue; + struct crypto_async_request *areq; + struct tasklet_struct task; + struct mtk_aes_base_ctx *ctx; + struct mtk_aes_dma src; + struct mtk_aes_dma dst; + + struct scatterlist aligned_sg; + struct scatterlist *real_dst; + + mtk_aes_fn resume; + + size_t total; + void *buf; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_sha_rec - SHA operation record + * @queue: crypto request queue + * @req: pointer to ahash request + * @task: the tasklet is use in SHA interrupt + * @id: record identification + * @flags: it's describing SHA operation state + * @lock: the ablkcipher queue lock + * + * Structure used to record SHA execution state. + */ +struct mtk_sha_rec { + struct crypto_queue queue; + struct ahash_request *req; + struct tasklet_struct task; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_cryp - Cryptographic device + * @base: pointer to mapped register I/O base + * @dev: pointer to device + * @clk_ethif: pointer to ethif clock + * @clk_cryp: pointer to crypto clock + * @irq: global system and rings IRQ + * @ring: pointer to execution state of AES + * @aes: pointer to execution state of SHA + * @sha: each execution record map to a ring + * @aes_list: device list of AES + * @sha_list: device list of SHA + * @tmp: pointer to temporary buffer for internal use + * @tmp_dma: DMA address of temporary buffer + * @rec: it's used to select SHA record for tfm + * + * Structure storing cryptographic device information. + */ +struct mtk_cryp { + void __iomem *base; + struct device *dev; + struct clk *clk_ethif; + struct clk *clk_cryp; + int irq[MTK_IRQ_NUM]; + + struct mtk_ring *ring[RING_MAX]; + struct mtk_aes_rec *aes[MTK_REC_NUM]; + struct mtk_sha_rec *sha[MTK_REC_NUM]; + + struct list_head aes_list; + struct list_head sha_list; + + void *tmp; + dma_addr_t tmp_dma; + bool rec; +}; + +int mtk_cipher_alg_register(struct mtk_cryp *cryp); +void mtk_cipher_alg_release(struct mtk_cryp *cryp); +int mtk_hash_alg_register(struct mtk_cryp *cryp); +void mtk_hash_alg_release(struct mtk_cryp *cryp); + +#endif /* __MTK_PLATFORM_H_ */ diff --git a/drivers/crypto/mediatek/mtk-regs.h b/drivers/crypto/mediatek/mtk-regs.h new file mode 100644 index 000000000000..94f4eb85be3f --- /dev/null +++ b/drivers/crypto/mediatek/mtk-regs.h @@ -0,0 +1,194 @@ +/* + * Support for MediaTek cryptographic accelerator. + * + * Copyright (c) 2016 MediaTek Inc. + * Author: Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + */ + +#ifndef __MTK_REGS_H__ +#define __MTK_REGS_H__ + +/* HIA, Command Descriptor Ring Manager */ +#define CDR_BASE_ADDR_LO(x) (0x0 + ((x) << 12)) +#define CDR_BASE_ADDR_HI(x) (0x4 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_LO(x) (0x8 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_HI(x) (0xC + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_LO(x) (0x10 + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_HI(x) (0x14 + ((x) << 12)) +#define CDR_RING_SIZE(x) (0x18 + ((x) << 12)) +#define CDR_DESC_SIZE(x) (0x1C + ((x) << 12)) +#define CDR_CFG(x) (0x20 + ((x) << 12)) +#define CDR_DMA_CFG(x) (0x24 + ((x) << 12)) +#define CDR_THRESH(x) (0x28 + ((x) << 12)) +#define CDR_PREP_COUNT(x) (0x2C + ((x) << 12)) +#define CDR_PROC_COUNT(x) (0x30 + ((x) << 12)) +#define CDR_PREP_PNTR(x) (0x34 + ((x) << 12)) +#define CDR_PROC_PNTR(x) (0x38 + ((x) << 12)) +#define CDR_STAT(x) (0x3C + ((x) << 12)) + +/* HIA, Result Descriptor Ring Manager */ +#define RDR_BASE_ADDR_LO(x) (0x800 + ((x) << 12)) +#define RDR_BASE_ADDR_HI(x) (0x804 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_LO(x) (0x808 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_HI(x) (0x80C + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_LO(x) (0x810 + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_HI(x) (0x814 + ((x) << 12)) +#define RDR_RING_SIZE(x) (0x818 + ((x) << 12)) +#define RDR_DESC_SIZE(x) (0x81C + ((x) << 12)) +#define RDR_CFG(x) (0x820 + ((x) << 12)) +#define RDR_DMA_CFG(x) (0x824 + ((x) << 12)) +#define RDR_THRESH(x) (0x828 + ((x) << 12)) +#define RDR_PREP_COUNT(x) (0x82C + ((x) << 12)) +#define RDR_PROC_COUNT(x) (0x830 + ((x) << 12)) +#define RDR_PREP_PNTR(x) (0x834 + ((x) << 12)) +#define RDR_PROC_PNTR(x) (0x838 + ((x) << 12)) +#define RDR_STAT(x) (0x83C + ((x) << 12)) + +/* HIA, Ring AIC */ +#define AIC_POL_CTRL(x) (0xE000 - ((x) << 12)) +#define AIC_TYPE_CTRL(x) (0xE004 - ((x) << 12)) +#define AIC_ENABLE_CTRL(x) (0xE008 - ((x) << 12)) +#define AIC_RAW_STAL(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLE_SET(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLED_STAT(x) (0xE010 - ((x) << 12)) +#define AIC_ACK(x) (0xE010 - ((x) << 12)) +#define AIC_ENABLE_CLR(x) (0xE014 - ((x) << 12)) +#define AIC_OPTIONS(x) (0xE018 - ((x) << 12)) +#define AIC_VERSION(x) (0xE01C - ((x) << 12)) + +/* HIA, Global AIC */ +#define AIC_G_POL_CTRL 0xF800 +#define AIC_G_TYPE_CTRL 0xF804 +#define AIC_G_ENABLE_CTRL 0xF808 +#define AIC_G_RAW_STAT 0xF80C +#define AIC_G_ENABLE_SET 0xF80C +#define AIC_G_ENABLED_STAT 0xF810 +#define AIC_G_ACK 0xF810 +#define AIC_G_ENABLE_CLR 0xF814 +#define AIC_G_OPTIONS 0xF818 +#define AIC_G_VERSION 0xF81C + +/* HIA, Data Fetch Engine */ +#define DFE_CFG 0xF000 +#define DFE_PRIO_0 0xF010 +#define DFE_PRIO_1 0xF014 +#define DFE_PRIO_2 0xF018 +#define DFE_PRIO_3 0xF01C + +/* HIA, Data Fetch Engine access monitoring for CDR */ +#define DFE_RING_REGION_LO(x) (0xF080 + ((x) << 3)) +#define DFE_RING_REGION_HI(x) (0xF084 + ((x) << 3)) + +/* HIA, Data Fetch Engine thread control and status for thread */ +#define DFE_THR_CTRL 0xF200 +#define DFE_THR_STAT 0xF204 +#define DFE_THR_DESC_CTRL 0xF208 +#define DFE_THR_DESC_DPTR_LO 0xF210 +#define DFE_THR_DESC_DPTR_HI 0xF214 +#define DFE_THR_DESC_ACDPTR_LO 0xF218 +#define DFE_THR_DESC_ACDPTR_HI 0xF21C + +/* HIA, Data Store Engine */ +#define DSE_CFG 0xF400 +#define DSE_PRIO_0 0xF410 +#define DSE_PRIO_1 0xF414 +#define DSE_PRIO_2 0xF418 +#define DSE_PRIO_3 0xF41C + +/* HIA, Data Store Engine access monitoring for RDR */ +#define DSE_RING_REGION_LO(x) (0xF480 + ((x) << 3)) +#define DSE_RING_REGION_HI(x) (0xF484 + ((x) << 3)) + +/* HIA, Data Store Engine thread control and status for thread */ +#define DSE_THR_CTRL 0xF600 +#define DSE_THR_STAT 0xF604 +#define DSE_THR_DESC_CTRL 0xF608 +#define DSE_THR_DESC_DPTR_LO 0xF610 +#define DSE_THR_DESC_DPTR_HI 0xF614 +#define DSE_THR_DESC_S_DPTR_LO 0xF618 +#define DSE_THR_DESC_S_DPTR_HI 0xF61C +#define DSE_THR_ERROR_STAT 0xF620 + +/* HIA Global */ +#define HIA_MST_CTRL 0xFFF4 +#define HIA_OPTIONS 0xFFF8 +#define HIA_VERSION 0xFFFC + +/* Processing Engine Input Side, Processing Engine */ +#define PE_IN_DBUF_THRESH 0x10000 +#define PE_IN_TBUF_THRESH 0x10100 + +/* Packet Engine Configuration / Status Registers */ +#define PE_TOKEN_CTRL_STAT 0x11000 +#define PE_FUNCTION_EN 0x11004 +#define PE_CONTEXT_CTRL 0x11008 +#define PE_INTERRUPT_CTRL_STAT 0x11010 +#define PE_CONTEXT_STAT 0x1100C +#define PE_OUT_TRANS_CTRL_STAT 0x11018 +#define PE_OUT_BUF_CTRL 0x1101C + +/* Packet Engine PRNG Registers */ +#define PE_PRNG_STAT 0x11040 +#define PE_PRNG_CTRL 0x11044 +#define PE_PRNG_SEED_L 0x11048 +#define PE_PRNG_SEED_H 0x1104C +#define PE_PRNG_KEY_0_L 0x11050 +#define PE_PRNG_KEY_0_H 0x11054 +#define PE_PRNG_KEY_1_L 0x11058 +#define PE_PRNG_KEY_1_H 0x1105C +#define PE_PRNG_RES_0 0x11060 +#define PE_PRNG_RES_1 0x11064 +#define PE_PRNG_RES_2 0x11068 +#define PE_PRNG_RES_3 0x1106C +#define PE_PRNG_LFSR_L 0x11070 +#define PE_PRNG_LFSR_H 0x11074 + +/* Packet Engine AIC */ +#define PE_EIP96_AIC_POL_CTRL 0x113C0 +#define PE_EIP96_AIC_TYPE_CTRL 0x113C4 +#define PE_EIP96_AIC_ENABLE_CTRL 0x113C8 +#define PE_EIP96_AIC_RAW_STAT 0x113CC +#define PE_EIP96_AIC_ENABLE_SET 0x113CC +#define PE_EIP96_AIC_ENABLED_STAT 0x113D0 +#define PE_EIP96_AIC_ACK 0x113D0 +#define PE_EIP96_AIC_ENABLE_CLR 0x113D4 +#define PE_EIP96_AIC_OPTIONS 0x113D8 +#define PE_EIP96_AIC_VERSION 0x113DC + +/* Packet Engine Options & Version Registers */ +#define PE_EIP96_OPTIONS 0x113F8 +#define PE_EIP96_VERSION 0x113FC + +/* Processing Engine Output Side */ +#define PE_OUT_DBUF_THRESH 0x11C00 +#define PE_OUT_TBUF_THRESH 0x11D00 + +/* Processing Engine Local AIC */ +#define PE_AIC_POL_CTRL 0x11F00 +#define PE_AIC_TYPE_CTRL 0x11F04 +#define PE_AIC_ENABLE_CTRL 0x11F08 +#define PE_AIC_RAW_STAT 0x11F0C +#define PE_AIC_ENABLE_SET 0x11F0C +#define PE_AIC_ENABLED_STAT 0x11F10 +#define PE_AIC_ENABLE_CLR 0x11F14 +#define PE_AIC_OPTIONS 0x11F18 +#define PE_AIC_VERSION 0x11F1C + +/* Processing Engine General Configuration and Version */ +#define PE_IN_FLIGHT 0x11FF0 +#define PE_OPTIONS 0x11FF8 +#define PE_VERSION 0x11FFC + +/* EIP-97 - Global */ +#define EIP97_CLOCK_STATE 0x1FFE4 +#define EIP97_FORCE_CLOCK_ON 0x1FFE8 +#define EIP97_FORCE_CLOCK_OFF 0x1FFEC +#define EIP97_MST_CTRL 0x1FFF4 +#define EIP97_OPTIONS 0x1FFF8 +#define EIP97_VERSION 0x1FFFC +#endif /* __MTK_REGS_H__ */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c new file mode 100644 index 000000000000..55e3805fba07 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -0,0 +1,1435 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 SHA1/SHA2(HMAC) acceleration. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-sha.c and omap-sham.c drivers. + */ + +#include <crypto/sha.h> +#include "mtk-platform.h" + +#define SHA_ALIGN_MSK (sizeof(u32) - 1) +#define SHA_QUEUE_SIZE 512 +#define SHA_TMP_BUF_SIZE 512 +#define SHA_BUF_SIZE ((u32)PAGE_SIZE) + +#define SHA_OP_UPDATE 1 +#define SHA_OP_FINAL 2 + +#define SHA_DATA_LEN_MSK cpu_to_le32(GENMASK(16, 0)) + +/* SHA command token */ +#define SHA_CT_SIZE 5 +#define SHA_CT_CTRL_HDR cpu_to_le32(0x02220000) +#define SHA_CMD0 cpu_to_le32(0x03020000) +#define SHA_CMD1 cpu_to_le32(0x21060000) +#define SHA_CMD2 cpu_to_le32(0xe0e63802) + +/* SHA transform information */ +#define SHA_TFM_HASH cpu_to_le32(0x2 << 0) +#define SHA_TFM_INNER_DIG cpu_to_le32(0x1 << 21) +#define SHA_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define SHA_TFM_START cpu_to_le32(0x1 << 4) +#define SHA_TFM_CONTINUE cpu_to_le32(0x1 << 5) +#define SHA_TFM_HASH_STORE cpu_to_le32(0x1 << 19) +#define SHA_TFM_SHA1 cpu_to_le32(0x2 << 23) +#define SHA_TFM_SHA256 cpu_to_le32(0x3 << 23) +#define SHA_TFM_SHA224 cpu_to_le32(0x4 << 23) +#define SHA_TFM_SHA512 cpu_to_le32(0x5 << 23) +#define SHA_TFM_SHA384 cpu_to_le32(0x6 << 23) +#define SHA_TFM_DIGEST(x) cpu_to_le32(((x) & GENMASK(3, 0)) << 24) + +/* SHA flags */ +#define SHA_FLAGS_BUSY BIT(0) +#define SHA_FLAGS_FINAL BIT(1) +#define SHA_FLAGS_FINUP BIT(2) +#define SHA_FLAGS_SG BIT(3) +#define SHA_FLAGS_ALGO_MSK GENMASK(8, 4) +#define SHA_FLAGS_SHA1 BIT(4) +#define SHA_FLAGS_SHA224 BIT(5) +#define SHA_FLAGS_SHA256 BIT(6) +#define SHA_FLAGS_SHA384 BIT(7) +#define SHA_FLAGS_SHA512 BIT(8) +#define SHA_FLAGS_HMAC BIT(9) +#define SHA_FLAGS_PAD BIT(10) + +/** + * mtk_sha_ct is a set of hardware instructions(command token) + * that are used to control engine's processing flow of SHA, + * and it contains the first two words of transform state. + */ +struct mtk_sha_ct { + __le32 ctrl[2]; + __le32 cmd[3]; +}; + +/** + * mtk_sha_tfm is used to define SHA transform state + * and store result digest that produced by engine. + */ +struct mtk_sha_tfm { + __le32 ctrl[2]; + __le32 digest[SIZE_IN_WORDS(SHA512_DIGEST_SIZE)]; +}; + +/** + * mtk_sha_info consists of command token and transform state + * of SHA, its role is similar to mtk_aes_info. + */ +struct mtk_sha_info { + struct mtk_sha_ct ct; + struct mtk_sha_tfm tfm; +}; + +struct mtk_sha_reqctx { + struct mtk_sha_info info; + unsigned long flags; + unsigned long op; + + u64 digcnt; + bool start; + size_t bufcnt; + dma_addr_t dma_addr; + + __le32 ct_hdr; + u32 ct_size; + dma_addr_t ct_dma; + dma_addr_t tfm_dma; + + /* Walk state */ + struct scatterlist *sg; + u32 offset; /* Offset in current sg */ + u32 total; /* Total request */ + size_t ds; + size_t bs; + + u8 *buffer; +}; + +struct mtk_sha_hmac_ctx { + struct crypto_shash *shash; + u8 ipad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); + u8 opad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); +}; + +struct mtk_sha_ctx { + struct mtk_cryp *cryp; + unsigned long flags; + u8 id; + u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); + + struct mtk_sha_hmac_ctx base[0]; +}; + +struct mtk_sha_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_sha_drv mtk_sha = { + .dev_list = LIST_HEAD_INIT(mtk_sha.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_sha.lock), +}; + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req); + +static inline u32 mtk_sha_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_sha_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_sha_find_dev(struct mtk_sha_ctx *tctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_sha.lock); + if (!tctx->cryp) { + list_for_each_entry(tmp, &mtk_sha.dev_list, sha_list) { + cryp = tmp; + break; + } + tctx->cryp = cryp; + } else { + cryp = tctx->cryp; + } + + /* + * Assign record id to tfm in round-robin fashion, and this + * will help tfm to bind to corresponding descriptor rings. + */ + tctx->id = cryp->rec; + cryp->rec = !cryp->rec; + + spin_unlock_bh(&mtk_sha.lock); + + return cryp; +} + +static int mtk_sha_append_sg(struct mtk_sha_reqctx *ctx) +{ + size_t count; + + while ((ctx->bufcnt < SHA_BUF_SIZE) && ctx->total) { + count = min(ctx->sg->length - ctx->offset, ctx->total); + count = min(count, SHA_BUF_SIZE - ctx->bufcnt); + + if (count <= 0) { + /* + * Check if count <= 0 because the buffer is full or + * because the sg length is 0. In the latest case, + * check if there is another sg in the list, a 0 length + * sg doesn't necessarily mean the end of the sg list. + */ + if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { + ctx->sg = sg_next(ctx->sg); + continue; + } else { + break; + } + } + + scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, + ctx->offset, count, 0); + + ctx->bufcnt += count; + ctx->offset += count; + ctx->total -= count; + + if (ctx->offset == ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + else + ctx->total = 0; + } + } + + return 0; +} + +/* + * The purpose of this padding is to ensure that the padded message is a + * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). + * The bit "1" is appended at the end of the message followed by + * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or + * 128 bits block (SHA384/SHA512) equals to the message length in bits + * is appended. + * + * For SHA1/SHA224/SHA256, padlen is calculated as followed: + * - if message length < 56 bytes then padlen = 56 - message length + * - else padlen = 64 + 56 - message length + * + * For SHA384/SHA512, padlen is calculated as followed: + * - if message length < 112 bytes then padlen = 112 - message length + * - else padlen = 128 + 112 - message length + */ +static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len) +{ + u32 index, padlen; + u64 bits[2]; + u64 size = ctx->digcnt; + + size += ctx->bufcnt; + size += len; + + bits[1] = cpu_to_be64(size << 3); + bits[0] = cpu_to_be64(size >> 61); + + if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { + index = ctx->bufcnt & 0x7f; + padlen = (index < 112) ? (112 - index) : ((128 + 112) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); + ctx->bufcnt += padlen + 16; + ctx->flags |= SHA_FLAGS_PAD; + } else { + index = ctx->bufcnt & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); + ctx->bufcnt += padlen + 8; + ctx->flags |= SHA_FLAGS_PAD; + } +} + +/* Initialize basic transform information of SHA */ +static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx) +{ + struct mtk_sha_ct *ct = &ctx->info.ct; + struct mtk_sha_tfm *tfm = &ctx->info.tfm; + + ctx->ct_hdr = SHA_CT_CTRL_HDR; + ctx->ct_size = SHA_CT_SIZE; + + tfm->ctrl[0] = SHA_TFM_HASH | SHA_TFM_INNER_DIG | + SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); + + switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { + case SHA_FLAGS_SHA1: + tfm->ctrl[0] |= SHA_TFM_SHA1; + break; + case SHA_FLAGS_SHA224: + tfm->ctrl[0] |= SHA_TFM_SHA224; + break; + case SHA_FLAGS_SHA256: + tfm->ctrl[0] |= SHA_TFM_SHA256; + break; + case SHA_FLAGS_SHA384: + tfm->ctrl[0] |= SHA_TFM_SHA384; + break; + case SHA_FLAGS_SHA512: + tfm->ctrl[0] |= SHA_TFM_SHA512; + break; + + default: + /* Should not happen... */ + return; + } + + tfm->ctrl[1] = SHA_TFM_HASH_STORE; + ct->ctrl[0] = tfm->ctrl[0] | SHA_TFM_CONTINUE | SHA_TFM_START; + ct->ctrl[1] = tfm->ctrl[1]; + + ct->cmd[0] = SHA_CMD0; + ct->cmd[1] = SHA_CMD1; + ct->cmd[2] = SHA_CMD2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); +} + +/* + * Update input data length field of transform information and + * map it to DMA region. + */ +static int mtk_sha_info_update(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + size_t len) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + struct mtk_sha_info *info = &ctx->info; + struct mtk_sha_ct *ct = &info->ct; + + if (ctx->start) + ctx->start = false; + else + ct->ctrl[0] &= ~SHA_TFM_START; + + ctx->ct_hdr &= ~SHA_DATA_LEN_MSK; + ctx->ct_hdr |= cpu_to_le32(len); + ct->cmd[0] &= ~SHA_DATA_LEN_MSK; + ct->cmd[0] |= cpu_to_le32(len); + + ctx->digcnt += len; + + ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) { + dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); + return -EINVAL; + } + ctx->tfm_dma = ctx->ct_dma + sizeof(*ct); + + return 0; +} + +/* + * Because of hardware limitation, we must pre-calculate the inner + * and outer digest that need to be processed firstly by engine, then + * apply the result digest to the input message. These complex hashing + * procedures limits HMAC performance, so we use fallback SW encoding. + */ +static int mtk_sha_finish_hmac(struct ahash_request *req) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */ + + return crypto_shash_init(shash) ?: + crypto_shash_update(shash, bctx->opad, ctx->bs) ?: + crypto_shash_finup(shash, req->result, ctx->ds, req->result); +} + +/* Initialize request context */ +static int mtk_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags = 0; + ctx->ds = crypto_ahash_digestsize(tfm); + + switch (ctx->ds) { + case SHA1_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA1; + ctx->bs = SHA1_BLOCK_SIZE; + break; + case SHA224_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA224; + ctx->bs = SHA224_BLOCK_SIZE; + break; + case SHA256_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA256; + ctx->bs = SHA256_BLOCK_SIZE; + break; + case SHA384_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA384; + ctx->bs = SHA384_BLOCK_SIZE; + break; + case SHA512_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA512; + ctx->bs = SHA512_BLOCK_SIZE; + break; + default: + return -EINVAL; + } + + ctx->bufcnt = 0; + ctx->digcnt = 0; + ctx->buffer = tctx->buf; + ctx->start = true; + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + memcpy(ctx->buffer, bctx->ipad, ctx->bs); + ctx->bufcnt = ctx->bs; + ctx->flags |= SHA_FLAGS_HMAC; + } + + return 0; +} + +static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, + dma_addr_t addr, size_t len) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; + int err; + + err = mtk_sha_info_update(cryp, sha, len); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len); + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len) | + MTK_DESC_CT_LEN(ctx->ct_size); + + cmd->buf = cpu_to_le32(addr); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + + return -EINPROGRESS; +} + +static int mtk_sha_xmit2(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t len1, size_t len2) +{ + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; + int err; + + err = mtk_sha_info_update(cryp, sha, len1 + len2); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST | + MTK_DESC_CT_LEN(ctx->ct_size); + cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg)); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + + cmd = ring->cmd_base + ring->cmd_pos; + res = ring->res_base + ring->res_pos; + + res->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + cmd->buf = cpu_to_le32(ctx->dma_addr); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + + return -EINPROGRESS; +} + +static int mtk_sha_dma_map(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t count) +{ + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map error\n"); + return -EINVAL; + } + + ctx->flags &= ~SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); +} + +static int mtk_sha_update_slow(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + size_t count; + u32 final; + + mtk_sha_append_sg(ctx); + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + dev_dbg(cryp->dev, "slow: bufcnt: %zu\n", ctx->bufcnt); + + if (final) { + sha->flags |= SHA_FLAGS_FINAL; + mtk_sha_fill_padding(ctx, 0); + } + + if (final || (ctx->bufcnt == SHA_BUF_SIZE && ctx->total)) { + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); + } + return 0; +} + +static int mtk_sha_update_start(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + u32 len, final, tail; + struct scatterlist *sg; + + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return mtk_sha_update_slow(cryp, sha); + + sg = ctx->sg; + + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return mtk_sha_update_slow(cryp, sha); + + if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->bs)) + /* size is not ctx->bs aligned */ + return mtk_sha_update_slow(cryp, sha); + + len = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & SHA_FLAGS_FINUP)) { + /* not last sg must be ctx->bs aligned */ + tail = len & (ctx->bs - 1); + len -= tail; + } + } + + ctx->total -= len; + ctx->offset = len; /* offset where to start slow */ + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + /* Add padding */ + if (final) { + size_t count; + + tail = len & (ctx->bs - 1); + len -= tail; + ctx->total += tail; + ctx->offset = len; /* offset where to start slow */ + + sg = ctx->sg; + mtk_sha_append_sg(ctx); + mtk_sha_fill_padding(ctx, len); + + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map bytes error\n"); + return -EINVAL; + } + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + if (len == 0) { + ctx->flags &= ~SHA_FLAGS_SG; + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); + + } else { + ctx->sg = sg; + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + return mtk_sha_xmit2(cryp, sha, ctx, len, count); + } + } + + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), len); +} + +static int mtk_sha_final_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + size_t count; + + mtk_sha_fill_padding(ctx, 0); + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); +} + +/* Copy ready hash (+ finalize hmac) */ +static int mtk_sha_finish(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + u32 *digest = ctx->info.tfm.digest; + u32 *result = (u32 *)req->result; + int i; + + /* Get the hash from the digest buffer */ + for (i = 0; i < SIZE_IN_WORDS(ctx->ds); i++) + result[i] = le32_to_cpu(digest[i]); + + if (ctx->flags & SHA_FLAGS_HMAC) + return mtk_sha_finish_hmac(req); + + return 0; +} + +static void mtk_sha_finish_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + int err) +{ + if (likely(!err && (SHA_FLAGS_FINAL & sha->flags))) + err = mtk_sha_finish(sha->req); + + sha->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL); + + sha->req->base.complete(&sha->req->base, err); + + /* Handle new request */ + mtk_sha_handle_queue(cryp, sha->id - RING2, NULL); +} + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req) +{ + struct mtk_sha_rec *sha = cryp->sha[id]; + struct crypto_async_request *async_req, *backlog; + struct mtk_sha_reqctx *ctx; + unsigned long flags; + int err = 0, ret = 0; + + spin_lock_irqsave(&sha->lock, flags); + if (req) + ret = ahash_enqueue_request(&sha->queue, req); + + if (SHA_FLAGS_BUSY & sha->flags) { + spin_unlock_irqrestore(&sha->lock, flags); + return ret; + } + + backlog = crypto_get_backlog(&sha->queue); + async_req = crypto_dequeue_request(&sha->queue); + if (async_req) + sha->flags |= SHA_FLAGS_BUSY; + spin_unlock_irqrestore(&sha->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + ctx = ahash_request_ctx(req); + + sha->req = req; + + mtk_sha_info_init(ctx); + + if (ctx->op == SHA_OP_UPDATE) { + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) + /* No final() after finup() */ + err = mtk_sha_final_req(cryp, sha); + } else if (ctx->op == SHA_OP_FINAL) { + err = mtk_sha_final_req(cryp, sha); + } + + if (unlikely(err != -EINPROGRESS)) + /* Task will not finish it, so do it here */ + mtk_sha_finish_req(cryp, sha, err); + + return ret; +} + +static int mtk_sha_enqueue(struct ahash_request *req, u32 op) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + + ctx->op = op; + + return mtk_sha_handle_queue(tctx->cryp, tctx->id, req); +} + +static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info), + DMA_BIDIRECTIONAL); + + if (ctx->flags & SHA_FLAGS_SG) { + dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE); + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + if (ctx->flags & SHA_FLAGS_PAD) { + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); + } + } else + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); +} + +static void mtk_sha_complete(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + int err = 0; + + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS) + mtk_sha_finish_req(cryp, sha, err); +} + +static int mtk_sha_update(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->offset = 0; + + if ((ctx->bufcnt + ctx->total < SHA_BUF_SIZE) && + !(ctx->flags & SHA_FLAGS_FINUP)) + return mtk_sha_append_sg(ctx); + + return mtk_sha_enqueue(req, SHA_OP_UPDATE); +} + +static int mtk_sha_final(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags |= SHA_FLAGS_FINUP; + + if (ctx->flags & SHA_FLAGS_PAD) + return mtk_sha_finish(req); + + return mtk_sha_enqueue(req, SHA_OP_FINAL); +} + +static int mtk_sha_finup(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + int err1, err2; + + ctx->flags |= SHA_FLAGS_FINUP; + + err1 = mtk_sha_update(req); + if (err1 == -EINPROGRESS || err1 == -EBUSY) + return err1; + /* + * final() has to be always called to cleanup resources + * even if update() failed + */ + err2 = mtk_sha_final(req); + + return err1 ?: err2; +} + +static int mtk_sha_digest(struct ahash_request *req) +{ + return mtk_sha_init(req) ?: mtk_sha_finup(req); +} + +static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, + u32 keylen) +{ + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + size_t bs = crypto_shash_blocksize(bctx->shash); + size_t ds = crypto_shash_digestsize(bctx->shash); + int err, i; + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = crypto_shash_get_flags(bctx->shash) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + if (keylen > bs) { + err = crypto_shash_digest(shash, key, keylen, bctx->ipad); + if (err) + return err; + keylen = ds; + } else { + memcpy(bctx->ipad, key, keylen); + } + + memset(bctx->ipad + keylen, 0, bs - keylen); + memcpy(bctx->opad, bctx->ipad, bs); + + for (i = 0; i < bs; i++) { + bctx->ipad[i] ^= 0x36; + bctx->opad[i] ^= 0x5c; + } + + return 0; +} + +static int mtk_sha_export(struct ahash_request *req, void *out) +{ + const struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(out, ctx, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_import(struct ahash_request *req, const void *in) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(ctx, in, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_cra_init_alg(struct crypto_tfm *tfm, + const char *alg_base) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_sha_find_dev(tctx); + if (!cryp) + return -ENODEV; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mtk_sha_reqctx)); + + if (alg_base) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + tctx->flags |= SHA_FLAGS_HMAC; + bctx->shash = crypto_alloc_shash(alg_base, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(bctx->shash)) { + pr_err("base driver %s could not be loaded.\n", + alg_base); + + return PTR_ERR(bctx->shash); + } + } + return 0; +} + +static int mtk_sha_cra_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, NULL); +} + +static int mtk_sha_cra_sha1_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha1"); +} + +static int mtk_sha_cra_sha224_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha224"); +} + +static int mtk_sha_cra_sha256_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha256"); +} + +static int mtk_sha_cra_sha384_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha384"); +} + +static int mtk_sha_cra_sha512_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha512"); +} + +static void mtk_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + crypto_free_shash(bctx->shash); + } +} + +static struct ahash_alg algs_sha1_sha224_sha256[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "mtk-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "mtk-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "mtk-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mtk-hmac-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha1_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "mtk-hmac-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha224_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "mtk-hmac-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha256_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static struct ahash_alg algs_sha384_sha512[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "mtk-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "mtk-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "mtk-hmac-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha384_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "mtk-hmac-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha512_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static void mtk_sha_task0(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[0]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static void mtk_sha_task1(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[1]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static irqreturn_t mtk_sha_ring2_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[0]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING2)); + + mtk_sha_write(cryp, RDR_STAT(RING2), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING2), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING2), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_sha_ring3_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[1]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING3)); + + mtk_sha_write(cryp, RDR_STAT(RING3), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING3), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING3), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of two SHA records is used to get extra performance. + * It is similar to mtk_aes_record_init(). + */ +static int mtk_sha_record_init(struct mtk_cryp *cryp) +{ + struct mtk_sha_rec **sha = cryp->sha; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + sha[i] = kzalloc(sizeof(**sha), GFP_KERNEL); + if (!sha[i]) + goto err_cleanup; + + sha[i]->id = i + RING2; + + spin_lock_init(&sha[i]->lock); + crypto_init_queue(&sha[i]->queue, SHA_QUEUE_SIZE); + } + + tasklet_init(&sha[0]->task, mtk_sha_task0, (unsigned long)cryp); + tasklet_init(&sha[1]->task, mtk_sha_task1, (unsigned long)cryp); + + cryp->rec = 1; + + return 0; + +err_cleanup: + for (; i--; ) + kfree(sha[i]); + return err; +} + +static void mtk_sha_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->sha[i]->task); + kfree(cryp->sha[i]); + } +} + +static void mtk_sha_unregister_algs(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) + crypto_unregister_ahash(&algs_sha384_sha512[i]); +} + +static int mtk_sha_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) { + err = crypto_register_ahash(&algs_sha1_sha224_sha256[i]); + if (err) + goto err_sha_224_256_algs; + } + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) { + err = crypto_register_ahash(&algs_sha384_sha512[i]); + if (err) + goto err_sha_384_512_algs; + } + + return 0; + +err_sha_384_512_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha384_sha512[i]); + i = ARRAY_SIZE(algs_sha1_sha224_sha256); +err_sha_224_256_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + return err; +} + +int mtk_hash_alg_register(struct mtk_cryp *cryp) +{ + int err; + + INIT_LIST_HEAD(&cryp->sha_list); + + /* Initialize two hash records */ + err = mtk_sha_record_init(cryp); + if (err) + goto err_record; + + /* Ring2 is use by SHA record0 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING2], + mtk_sha_ring2_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq0.\n"); + goto err_res; + } + + /* Ring3 is use by SHA record1 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING3], + mtk_sha_ring3_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq1.\n"); + goto err_res; + } + + /* Enable ring2 and ring3 interrupt for hash */ + mtk_sha_write(cryp, AIC_ENABLE_SET(RING2), MTK_IRQ_RDR2); + mtk_sha_write(cryp, AIC_ENABLE_SET(RING3), MTK_IRQ_RDR3); + + cryp->tmp = dma_alloc_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + &cryp->tmp_dma, GFP_KERNEL); + if (!cryp->tmp) { + dev_err(cryp->dev, "unable to allocate tmp buffer.\n"); + err = -EINVAL; + goto err_res; + } + + spin_lock(&mtk_sha.lock); + list_add_tail(&cryp->sha_list, &mtk_sha.dev_list); + spin_unlock(&mtk_sha.lock); + + err = mtk_sha_register_algs(); + if (err) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); +err_res: + mtk_sha_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-sha initialization failed.\n"); + return err; +} + +void mtk_hash_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + + mtk_sha_unregister_algs(); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); + mtk_sha_record_free(cryp); +} diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 47576098831f..b6f14844702e 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1616,32 +1616,17 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ -static bool spacc_is_compatible(struct platform_device *pdev, - const char *spacc_type) -{ - const struct platform_device_id *platid = platform_get_device_id(pdev); - - if (platid && !strcmp(platid->name, spacc_type)) - return true; - -#ifdef CONFIG_OF - if (of_device_is_compatible(pdev->dev.of_node, spacc_type)) - return true; -#endif /* CONFIG_OF */ - - return false; -} - static int spacc_probe(struct platform_device *pdev) { int i, err, ret = -EINVAL; struct resource *mem, *irq; + struct device_node *np = pdev->dev.of_node; struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine), GFP_KERNEL); if (!engine) return -ENOMEM; - if (spacc_is_compatible(pdev, "picochip,spacc-ipsec")) { + if (of_device_is_compatible(np, "picochip,spacc-ipsec")) { engine->max_ctxs = SPACC_CRYPTO_IPSEC_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_IPSEC_HASH_PG_SZ; @@ -1650,7 +1635,7 @@ static int spacc_probe(struct platform_device *pdev) engine->num_algs = ARRAY_SIZE(ipsec_engine_algs); engine->aeads = ipsec_engine_aeads; engine->num_aeads = ARRAY_SIZE(ipsec_engine_aeads); - } else if (spacc_is_compatible(pdev, "picochip,spacc-l2")) { + } else if (of_device_is_compatible(np, "picochip,spacc-l2")) { engine->max_ctxs = SPACC_CRYPTO_L2_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_L2_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_L2_HASH_PG_SZ; @@ -1803,12 +1788,6 @@ static int spacc_remove(struct platform_device *pdev) return 0; } -static const struct platform_device_id spacc_id_table[] = { - { "picochip,spacc-ipsec", }, - { "picochip,spacc-l2", }, - { } -}; - static struct platform_driver spacc_driver = { .probe = spacc_probe, .remove = spacc_remove, @@ -1819,7 +1798,6 @@ static struct platform_driver spacc_driver = { #endif /* CONFIG_PM */ .of_match_table = of_match_ptr(spacc_of_id_table), }, - .id_table = spacc_id_table, }; module_platform_driver(spacc_driver); diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 640c3fc870fd..f172171668ee 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 949d77b79fbe..24ec908eb26c 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 5b2d78a5b5aa..58a984c9c3ec 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 7540ce13b0d0..b9f3e0e4fde9 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_common/adf_cfg_common.h b/drivers/crypto/qat/qat_common/adf_cfg_common.h index 8c4f6573ce59..1211261de7c2 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_common.h @@ -61,6 +61,7 @@ #define ADF_CFG_AFFINITY_WHATEVER 0xFF #define MAX_DEVICE_NAME_SIZE 32 #define ADF_MAX_DEVICES (32 * 32) +#define ADF_DEVS_ARRAY_SIZE BITS_TO_LONGS(ADF_MAX_DEVICES) enum adf_cfg_val_type { ADF_DEC, diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 980e07475012..5c4c0a253129 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -87,8 +87,8 @@ enum adf_event { struct service_hndl { int (*event_hld)(struct adf_accel_dev *accel_dev, enum adf_event event); - unsigned long init_status; - unsigned long start_status; + unsigned long init_status[ADF_DEVS_ARRAY_SIZE]; + unsigned long start_status[ADF_DEVS_ARRAY_SIZE]; char *name; struct list_head list; }; diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index b3ebb25f9ca7..8afac52677a6 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c @@ -152,7 +152,7 @@ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data) ptr->hw_device->instance_id = i++; if (i == class->instances) - break; + break; } } EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index); diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 888c6675e7e5..26556c713049 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -64,8 +64,8 @@ static void adf_service_add(struct service_hndl *service) int adf_service_register(struct service_hndl *service) { - service->init_status = 0; - service->start_status = 0; + memset(service->init_status, 0, sizeof(service->init_status)); + memset(service->start_status, 0, sizeof(service->start_status)); adf_service_add(service); return 0; } @@ -79,9 +79,13 @@ static void adf_service_remove(struct service_hndl *service) int adf_service_unregister(struct service_hndl *service) { - if (service->init_status || service->start_status) { - pr_err("QAT: Could not remove active service\n"); - return -EFAULT; + int i; + + for (i = 0; i < ARRAY_SIZE(service->init_status); i++) { + if (service->init_status[i] || service->start_status[i]) { + pr_err("QAT: Could not remove active service\n"); + return -EFAULT; + } } adf_service_remove(service); return 0; @@ -163,7 +167,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->init_status); + set_bit(accel_dev->accel_id, service->init_status); } hw_data->enable_error_correction(accel_dev); @@ -210,7 +214,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->start_status); + set_bit(accel_dev->accel_id, service->start_status); } clear_bit(ADF_STATUS_STARTING, &accel_dev->status); @@ -259,14 +263,14 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->start_status)) + if (!test_bit(accel_dev->accel_id, service->start_status)) continue; ret = service->event_hld(accel_dev, ADF_EVENT_STOP); if (!ret) { - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } else if (ret == -EAGAIN) { wait = true; - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } } @@ -317,14 +321,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->init_status)) + if (!test_bit(accel_dev->accel_id, service->init_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) dev_err(&GET_DEV(accel_dev), "Failed to shutdown service %s\n", service->name); else - clear_bit(accel_dev->accel_id, &service->init_status); + clear_bit(accel_dev->accel_id, service->init_status); } hw_data->disable_iov(accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 9320ae1d005b..b36d8653b1ba 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -162,9 +162,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) /** * adf_disable_sriov() - Disable SRIOV for the device - * @pdev: Pointer to pci device. + * @accel_dev: Pointer to accel device. * - * Function disables SRIOV for the pci device. + * Function disables SRIOV for the accel device. * * Return: 0 on success, error code otherwise. */ diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index bf99e11a3403..4a73fc70f7a9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -148,7 +148,7 @@ static void adf_pf2vf_bh_handler(void *data) INIT_WORK(&stop_data->work, adf_dev_stop_async); queue_work(adf_vf_stop_wq, &stop_data->work); /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); return; } @@ -168,7 +168,7 @@ static void adf_pf2vf_bh_handler(void *data) } /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); /* Re-enable PF2VF interrupts */ diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 4d2de2838451..2ce01f010c74 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 60df98632fa2..26ab17bfc6da 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig index d80f73366ae2..5db07495ddc5 100644 --- a/drivers/crypto/virtio/Kconfig +++ b/drivers/crypto/virtio/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_VIRTIO select CRYPTO_AEAD select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER + select CRYPTO_ENGINE default m help This driver provides support for virtio crypto device. If you diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index c2374df9abae..49defda4e03d 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -288,8 +288,7 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm, static int __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, struct ablkcipher_request *req, - struct data_queue *data_vq, - __u8 op) + struct data_queue *data_vq) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); @@ -329,7 +328,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, vc_req->req_data = req_data; vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER; /* Head of operation */ - if (op) { + if (vc_req->encrypt) { req_data->header.session_id = cpu_to_le64(ctx->enc_sess_info.session_id); req_data->header.opcode = @@ -424,19 +423,15 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 1); - if (ret < 0) { - pr_err("virtio_crypto: Encryption failed!\n"); - return ret; - } + vc_req->encrypt = true; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) @@ -445,20 +440,16 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 0); - if (ret < 0) { - pr_err("virtio_crypto: Decryption failed!\n"); - return ret; - } + vc_req->encrypt = false; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm) @@ -484,10 +475,37 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm) ctx->vcrypto = NULL; } +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req) +{ + struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct data_queue *data_vq = vc_req->dataq; + int ret; + + ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq); + if (ret < 0) + return ret; + + virtqueue_kick(data_vq->vq); + + return 0; +} + +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err) +{ + crypto_finalize_cipher_request(vc_req->dataq->engine, req, err); + + virtcrypto_clear_request(vc_req); +} + static struct crypto_alg virtio_crypto_algs[] = { { .cra_name = "cbc(aes)", .cra_driver_name = "virtio_crypto_aes_cbc", - .cra_priority = 501, + .cra_priority = 150, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct virtio_crypto_ablkcipher_ctx), diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 3d6566b02876..da6d8c0ea407 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -25,6 +25,7 @@ #include <crypto/aead.h> #include <crypto/aes.h> #include <crypto/authenc.h> +#include <crypto/engine.h> /* Internal representation of a data virtqueue */ @@ -37,6 +38,8 @@ struct data_queue { /* Name of the tx queue: dataq.$index */ char name[32]; + + struct crypto_engine *engine; }; struct virtio_crypto { @@ -97,6 +100,9 @@ struct virtio_crypto_request { struct virtio_crypto_op_data_req *req_data; struct scatterlist **sgs; uint8_t *iv; + /* Encryption? */ + bool encrypt; + struct data_queue *dataq; }; int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev); @@ -110,6 +116,16 @@ int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev); struct virtio_crypto *virtcrypto_get_dev_node(int node); int virtcrypto_dev_start(struct virtio_crypto *vcrypto); void virtcrypto_dev_stop(struct virtio_crypto *vcrypto); +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req); +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err); + +void +virtcrypto_clear_request(struct virtio_crypto_request *vc_req); static inline int virtio_crypto_get_current_node(void) { diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index fe70ec823b27..b5b153317376 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -25,7 +25,7 @@ #include "virtio_crypto_common.h" -static void +void virtcrypto_clear_request(struct virtio_crypto_request *vc_req) { if (vc_req) { @@ -66,12 +66,12 @@ static void virtcrypto_dataq_callback(struct virtqueue *vq) break; } ablk_req = vc_req->ablkcipher_req; - virtcrypto_clear_request(vc_req); spin_unlock_irqrestore( &vcrypto->data_vq[qid].lock, flags); /* Finish the encrypt or decrypt process */ - ablk_req->base.complete(&ablk_req->base, error); + virtio_crypto_ablkcipher_finalize_req(vc_req, + ablk_req, error); spin_lock_irqsave( &vcrypto->data_vq[qid].lock, flags); } @@ -87,6 +87,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; + struct device *dev = &vi->vdev->dev; /* * We expect 1 data virtqueue, followed by @@ -128,6 +129,15 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) for (i = 0; i < vi->max_data_queues; i++) { spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; + /* Initialize crypto engine */ + vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); + if (!vi->data_vq[i].engine) { + ret = -ENOMEM; + goto err_engine; + } + + vi->data_vq[i].engine->cipher_one_request = + virtio_crypto_ablkcipher_crypt_req; } kfree(names); @@ -136,6 +146,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) return 0; +err_engine: err_find: kfree(names); err_names: @@ -269,6 +280,38 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) return 0; } +static int virtcrypto_start_crypto_engines(struct virtio_crypto *vcrypto) +{ + int32_t i; + int ret; + + for (i = 0; i < vcrypto->max_data_queues; i++) { + if (vcrypto->data_vq[i].engine) { + ret = crypto_engine_start(vcrypto->data_vq[i].engine); + if (ret) + goto err; + } + } + + return 0; + +err: + while (--i >= 0) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); + + return ret; +} + +static void virtcrypto_clear_crypto_engines(struct virtio_crypto *vcrypto) +{ + u32 i; + + for (i = 0; i < vcrypto->max_data_queues; i++) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); +} + static void virtcrypto_del_vqs(struct virtio_crypto *vcrypto) { struct virtio_device *vdev = vcrypto->vdev; @@ -355,14 +398,21 @@ static int virtcrypto_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "Failed to initialize vqs.\n"); goto free_dev; } + + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); err = virtcrypto_update_status(vcrypto); if (err) - goto free_vqs; + goto free_engines; return 0; +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); free_vqs: vcrypto->vdev->config->reset(vdev); virtcrypto_del_vqs(vcrypto); @@ -398,6 +448,7 @@ static void virtcrypto_remove(struct virtio_device *vdev) virtcrypto_dev_stop(vcrypto); vdev->config->reset(vdev); virtcrypto_free_unused_reqs(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); virtcrypto_devmgr_rm_dev(vcrypto); kfree(vcrypto); @@ -420,6 +471,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev) if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); return 0; } @@ -433,14 +485,26 @@ static int virtcrypto_restore(struct virtio_device *vdev) if (err) return err; + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); + err = virtcrypto_dev_start(vcrypto); if (err) { dev_err(&vdev->dev, "Failed to start virtio crypto device.\n"); - return -EFAULT; + goto free_engines; } return 0; + +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); +free_vqs: + vcrypto->vdev->config->reset(vdev); + virtcrypto_del_vqs(vcrypto); + return err; } #endif diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 38ed10d761d0..7cf6d31c1123 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -80,11 +80,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, int ret; struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); return ret; @@ -99,11 +101,13 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, u8 *dst = walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); crypto_xor(keystream, src, nbytes); memcpy(dst, keystream, nbytes); @@ -132,6 +136,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, @@ -143,6 +148,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, walk.iv); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); /* We need to update IV mostly for last bytes/round */ inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE; diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index a324801d6a66..47206a21bb90 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -593,11 +593,16 @@ struct devfreq *devfreq_add_device(struct device *dev, list_add(&devfreq->node, &devfreq_list); governor = find_devfreq_governor(devfreq->governor_name); - if (!IS_ERR(governor)) - devfreq->governor = governor; - if (devfreq->governor) - err = devfreq->governor->event_handler(devfreq, - DEVFREQ_GOV_START, NULL); + if (IS_ERR(governor)) { + dev_err(dev, "%s: Unable to find governor for the device\n", + __func__); + err = PTR_ERR(governor); + goto err_init; + } + + devfreq->governor = governor; + err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START, + NULL); if (err) { dev_err(dev, "%s: Unable to start governor for the device\n", __func__); diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index a8ed7792ece2..9af86f46fbec 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -497,7 +497,7 @@ passive: if (IS_ERR(bus->devfreq)) { dev_err(dev, "failed to add devfreq dev with passive governor\n"); - ret = -EPROBE_DEFER; + ret = PTR_ERR(bus->devfreq); goto err; } diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index 70e13230d8db..9ad0b1934be9 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -721,11 +721,17 @@ static int scpi_sensor_get_value(u16 sensor, u64 *val) ret = scpi_send_message(CMD_SENSOR_VALUE, &id, sizeof(id), &buf, sizeof(buf)); - if (!ret) + if (ret) + return ret; + + if (scpi_info->is_legacy) + /* only 32-bits supported, hi_val can be junk */ + *val = le32_to_cpu(buf.lo_val); + else *val = (u64)le32_to_cpu(buf.hi_val) << 32 | le32_to_cpu(buf.lo_val); - return ret; + return 0; } static int scpi_device_get_power_state(u16 dev_id) diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci_checker.c index 44bdb78f837b..29d58feaf675 100644 --- a/drivers/firmware/psci_checker.c +++ b/drivers/firmware/psci_checker.c @@ -270,8 +270,7 @@ static int suspend_test_thread(void *arg) struct cpuidle_device *dev; struct cpuidle_driver *drv; /* No need for an actual callback, we just want to wake up the CPU. */ - struct timer_list wakeup_timer = - TIMER_INITIALIZER(dummy_callback, 0, 0); + struct timer_list wakeup_timer; /* Wait for the main thread to give the start signal. */ wait_for_completion(&suspend_threads_started); @@ -287,6 +286,7 @@ static int suspend_test_thread(void *arg) pr_info("CPU %d entering suspend cycles, states 1 through %d\n", cpu, drv->state_count - 1); + setup_timer_on_stack(&wakeup_timer, dummy_callback, 0); for (i = 0; i < NUM_SUSPEND_CYCLE; ++i) { int index; /* diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index db516382a4d4..711c31c8d8b4 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -123,6 +123,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, u8 changed = old ^ new; int ret; + memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); if (!(changed & PCI_COMMAND_MEMORY)) return 0; @@ -142,7 +143,6 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, return ret; } - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); return 0; } @@ -240,7 +240,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(bytes > 4)) return -EINVAL; - if (WARN_ON(offset + bytes >= INTEL_GVT_MAX_CFG_SPACE_SZ)) + if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) return -EINVAL; /* First check if it's PCI_COMMAND */ diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 7eaaf1c9ed2b..6c5fdf5b2ce2 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1998,6 +1998,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) INIT_LIST_HEAD(>t->oos_page_list_head); INIT_LIST_HEAD(>t->post_shadow_list_head); + intel_vgpu_reset_ggtt(vgpu); + ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, NULL, 1, 0); if (IS_ERR(ggtt_mm)) { @@ -2206,6 +2208,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, int intel_gvt_init_gtt(struct intel_gvt *gvt) { int ret; + void *page_addr; gvt_dbg_core("init gtt\n"); @@ -2218,6 +2221,23 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) return -ENODEV; } + gvt->gtt.scratch_ggtt_page = + alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); + if (!gvt->gtt.scratch_ggtt_page) { + gvt_err("fail to allocate scratch ggtt page\n"); + return -ENOMEM; + } + + page_addr = page_address(gvt->gtt.scratch_ggtt_page); + + gvt->gtt.scratch_ggtt_mfn = + intel_gvt_hypervisor_virt_to_mfn(page_addr); + if (gvt->gtt.scratch_ggtt_mfn == INTEL_GVT_INVALID_ADDR) { + gvt_err("fail to translate scratch ggtt page\n"); + __free_page(gvt->gtt.scratch_ggtt_page); + return -EFAULT; + } + if (enable_out_of_sync) { ret = setup_spt_oos(gvt); if (ret) { @@ -2239,6 +2259,41 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) */ void intel_gvt_clean_gtt(struct intel_gvt *gvt) { + __free_page(gvt->gtt.scratch_ggtt_page); + if (enable_out_of_sync) clean_spt_oos(gvt); } + +/** + * intel_vgpu_reset_ggtt - reset the GGTT entry + * @vgpu: a vGPU + * + * This function is called at the vGPU create stage + * to reset all the GGTT entries. + * + */ +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + u32 index; + u32 offset; + u32 num_entries; + struct intel_gvt_gtt_entry e; + + memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); + e.type = GTT_TYPE_GGTT_PTE; + ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); + e.val64 |= _PAGE_PRESENT; + + index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; + num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; + for (offset = 0; offset < num_entries; offset++) + ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + + index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; + num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; + for (offset = 0; offset < num_entries; offset++) + ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); +} diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index d250013bc37b..b315ab3593ec 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -81,6 +81,9 @@ struct intel_gvt_gtt { struct list_head oos_page_use_list_head; struct list_head oos_page_free_list_head; struct list_head mm_lru_list_head; + + struct page *scratch_ggtt_page; + unsigned long scratch_ggtt_mfn; }; enum { @@ -202,6 +205,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index ad0e9364ee70..0af17016f33f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -175,6 +175,7 @@ struct intel_vgpu { struct notifier_block group_notifier; struct kvm *kvm; struct work_struct release_work; + atomic_t released; } vdev; #endif }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 4dd6722a7339..faaae07ae487 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -114,12 +114,15 @@ out: static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) { struct gvt_dma *entry; + kvm_pfn_t pfn; mutex_lock(&vgpu->vdev.cache_lock); + entry = __gvt_cache_find(vgpu, gfn); - mutex_unlock(&vgpu->vdev.cache_lock); + pfn = (entry == NULL) ? 0 : entry->pfn; - return entry == NULL ? 0 : entry->pfn; + mutex_unlock(&vgpu->vdev.cache_lock); + return pfn; } static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn) @@ -166,7 +169,7 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) { - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); struct gvt_dma *this; unsigned long g1; int rc; @@ -195,7 +198,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); unsigned long gfn; mutex_lock(&vgpu->vdev.cache_lock); @@ -396,7 +399,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) struct device *pdev; void *gvt; - pdev = mdev->parent->dev; + pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); @@ -418,7 +421,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) mdev_set_drvdata(mdev, vgpu); gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", - dev_name(&mdev->dev)); + dev_name(mdev_dev(mdev))); return 0; } @@ -482,7 +485,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, &vgpu->vdev.iommu_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for iommu failed: %d\n", ret); @@ -490,17 +493,26 @@ static int intel_vgpu_open(struct mdev_device *mdev) } events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(&mdev->dev, VFIO_GROUP_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, &vgpu->vdev.group_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for group failed: %d\n", ret); goto undo_iommu; } - return kvmgt_guest_init(mdev); + ret = kvmgt_guest_init(mdev); + if (ret) + goto undo_group; + + atomic_set(&vgpu->vdev.released, 0); + return ret; + +undo_group: + vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, + &vgpu->vdev.group_notifier); undo_iommu: - vfio_unregister_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); out: return ret; @@ -509,17 +521,26 @@ out: static void __intel_vgpu_release(struct intel_vgpu *vgpu) { struct kvmgt_guest_info *info; + int ret; if (!handle_valid(vgpu->handle)) return; - vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_IOMMU_NOTIFY, + if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) + return; + + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); - vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_GROUP_NOTIFY, + WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); + + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY, &vgpu->vdev.group_notifier); + WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); info = (struct kvmgt_guest_info *)vgpu->handle; kvmgt_guest_exit(info); + + vgpu->vdev.kvm = NULL; vgpu->handle = 0; } @@ -534,6 +555,7 @@ static void intel_vgpu_release_work(struct work_struct *work) { struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu, vdev.release_work); + __intel_vgpu_release(vgpu); } @@ -1089,7 +1111,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return 0; } -static const struct parent_ops intel_vgpu_ops = { +static const struct mdev_parent_ops intel_vgpu_ops = { .supported_type_groups = intel_vgpu_type_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, @@ -1134,6 +1156,10 @@ static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } spin_lock(&kvm->mmu_lock); @@ -1164,6 +1190,10 @@ static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } spin_lock(&kvm->mmu_lock); @@ -1311,18 +1341,14 @@ static int kvmgt_guest_init(struct mdev_device *mdev) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { - struct intel_vgpu *vgpu; - if (!info) { gvt_err("kvmgt_guest_info invalid\n"); return false; } - vgpu = info->vgpu; - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvmgt_protect_table_destroy(info); - gvt_cache_destroy(vgpu); + gvt_cache_destroy(info->vgpu); vfree(info); return true; @@ -1372,7 +1398,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) return pfn; pfn = INTEL_GVT_INVALID_ADDR; - dev = &info->vgpu->vdev.mdev->dev; + dev = mdev_dev(info->vgpu->vdev.mdev); rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (rc != 1) { gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc); diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d2a0fbc896c3..81cd921770c6 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -65,7 +65,7 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) int i, ret; for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) { - mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu) + mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)->va + i * PAGE_SIZE); if (mfn == INTEL_GVT_INVALID_ADDR) { gvt_err("fail to get MFN from VA\n"); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a31b7a891ec..3dd7fc662859 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -244,14 +244,16 @@ err_phys: static void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + bool needs_clflush) { GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); if (obj->mm.madv == I915_MADV_DONTNEED) obj->mm.dirty = false; - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && + if (needs_clflush && + (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) drm_clflush_sg(pages); @@ -263,7 +265,7 @@ static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { - __i915_gem_object_release_shmem(obj, pages); + __i915_gem_object_release_shmem(obj, pages, false); if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; @@ -2231,7 +2233,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - __i915_gem_object_release_shmem(obj, pages); + __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -2304,15 +2306,6 @@ unlock: mutex_unlock(&obj->mm.lock); } -static unsigned int swiotlb_max_size(void) -{ -#if IS_ENABLED(CONFIG_SWIOTLB) - return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE); -#else - return 0; -#endif -} - static void i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; @@ -2322,7 +2315,7 @@ static void i915_sg_trim(struct sg_table *orig_st) if (orig_st->nents == orig_st->orig_nents) return; - if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL)) + if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) return; new_sg = new_st.sgl; @@ -2360,7 +2353,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - max_segment = swiotlb_max_size(); + max_segment = swiotlb_max_segment(); if (!max_segment) max_segment = rounddown(UINT_MAX, PAGE_SIZE); @@ -2728,6 +2721,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) struct drm_i915_gem_request *request; struct i915_gem_context *incomplete_ctx; struct intel_timeline *timeline; + unsigned long flags; bool ring_hung; if (engine->irq_seqno_barrier) @@ -2763,13 +2757,20 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (i915_gem_context_is_default(incomplete_ctx)) return; + timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); + + spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock(&timeline->lock); + list_for_each_entry_continue(request, &engine->timeline->requests, link) if (request->ctx == incomplete_ctx) reset_request(request); - timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); list_for_each_entry(request, &timeline->requests, link) reset_request(request); + + spin_unlock(&timeline->lock); + spin_unlock_irqrestore(&engine->timeline->lock, flags); } void i915_gem_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e2b077df2da0..d229f47d1028 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -413,6 +413,25 @@ i915_gem_active_set(struct i915_gem_active *active, rcu_assign_pointer(active->request, request); } +/** + * i915_gem_active_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_gem_active_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_gem_active_set_retire_fn(struct i915_gem_active *active, + i915_gem_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_gem_retire_noop; +} + static inline struct drm_i915_gem_request * __i915_gem_active_peek(const struct i915_gem_active *active) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6daad8613760..3dc8724df400 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16791,7 +16791,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = crtc->config; - int pixclk = 0; __drm_atomic_helper_crtc_destroy_state(&crtc_state->base); memset(crtc_state, 0, sizeof(*crtc_state)); @@ -16803,23 +16802,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->base.enabled = crtc_state->base.enable; crtc->active = crtc_state->base.active; - if (crtc_state->base.active) { + if (crtc_state->base.active) dev_priv->active_crtcs |= 1 << crtc->pipe; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; - else - WARN_ON(dev_priv->display.modeset_calc_cdclk); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixclk = DIV_ROUND_UP(pixclk * 100, 95); - } - - dev_priv->min_pixclk[crtc->pipe] = pixclk; - readout_plane_state(crtc); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", @@ -16892,6 +16877,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } for_each_intel_crtc(dev, crtc) { + int pixclk = 0; + crtc->base.hwmode = crtc->config->base.adjusted_mode; memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); @@ -16919,10 +16906,23 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc->base.state->mode.private_flags = I915_MODE_FLAG_INHERITED; + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + pixclk = ilk_pipe_pixel_rate(crtc->config); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc->config->base.adjusted_mode.crtc_clock; + else + WARN_ON(dev_priv->display.modeset_calc_cdclk); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc->config->ips_enabled) + pixclk = DIV_ROUND_UP(pixclk * 100, 95); + drm_calc_timestamping_constants(&crtc->base, &crtc->base.hwmode); update_scanline_offset(crtc); } + dev_priv->min_pixclk[crtc->pipe] = pixclk; + intel_pipe_config_sanity_check(dev_priv, crtc->config); } } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d9bc19be855e..0b8e8eb85c19 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -355,7 +355,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, struct intel_dp *intel_dp); static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp); + struct intel_dp *intel_dp, + bool force_disable_vdd); static void intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); @@ -516,7 +517,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); /* * Even vdd force doesn't work until we've made @@ -553,7 +554,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) * Only the HW needs to be reprogrammed, the SW state is fixed and * has been setup during connector init. */ - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); return 0; } @@ -636,7 +637,7 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) port_name(port), pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) @@ -2912,7 +2913,7 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); } static void vlv_pre_enable_dp(struct intel_encoder *encoder, @@ -5055,7 +5056,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp) + struct intel_dp *intel_dp, + bool force_disable_vdd) { struct drm_i915_private *dev_priv = to_i915(dev); u32 pp_on, pp_off, pp_div, port_sel = 0; @@ -5068,6 +5070,31 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, intel_pps_get_registers(dev_priv, intel_dp, ®s); + /* + * On some VLV machines the BIOS can leave the VDD + * enabled even on power seqeuencers which aren't + * hooked up to any port. This would mess up the + * power domain tracking the first time we pick + * one of these power sequencers for use since + * edp_panel_vdd_on() would notice that the VDD was + * already on and therefore wouldn't grab the power + * domain reference. Disable VDD first to avoid this. + * This also avoids spuriously turning the VDD on as + * soon as the new power seqeuencer gets initialized. + */ + if (force_disable_vdd) { + u32 pp = ironlake_get_pp_control(intel_dp); + + WARN(pp & PANEL_POWER_ON, "Panel power already on\n"); + + if (pp & EDP_FORCE_VDD) + DRM_DEBUG_KMS("VDD already on, disabling first\n"); + + pp &= ~EDP_FORCE_VDD; + + I915_WRITE(regs.pp_ctrl, pp); + } + pp_on = (seq->t1_t3 << PANEL_POWER_UP_DELAY_SHIFT) | (seq->t8 << PANEL_LIGHT_ON_DELAY_SHIFT); pp_off = (seq->t9 << PANEL_LIGHT_OFF_DELAY_SHIFT) | @@ -5122,7 +5149,7 @@ static void intel_dp_pps_init(struct drm_device *dev, vlv_initial_power_sequencer_setup(intel_dp); } else { intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index fd0e4dac7cc1..e589e17876dc 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -216,7 +216,8 @@ static void intel_overlay_submit_request(struct intel_overlay *overlay, { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); - overlay->last_flip.retire = retire; + i915_gem_active_set_retire_fn(&overlay->last_flip, retire, + &overlay->i915->drm.struct_mutex); i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); } @@ -839,8 +840,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vma->obj, new_bo, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, + vma->obj, INTEL_FRONTBUFFER_OVERLAY(pipe)); overlay->old_vma = overlay->vma; overlay->vma = vma; @@ -1430,6 +1431,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; + init_request_active(&overlay->last_flip, NULL); + regs = intel_overlay_map_regs(overlay); if (!regs) goto out_unpin_bo; diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index d40ed9fdf68d..70b12f89a193 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -64,7 +64,8 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad"); #define QUIRK_SKIP_INPUT_MAPPING BIT(2) #define QUIRK_IS_MULTITOUCH BIT(3) -#define NOTEBOOK_QUIRKS QUIRK_FIX_NOTEBOOK_REPORT +#define KEYBOARD_QUIRKS (QUIRK_FIX_NOTEBOOK_REPORT | \ + QUIRK_NO_INIT_REPORTS) #define TOUCHPAD_QUIRKS (QUIRK_NO_INIT_REPORTS | \ QUIRK_SKIP_INPUT_MAPPING | \ QUIRK_IS_MULTITOUCH) @@ -170,11 +171,11 @@ static int asus_raw_event(struct hid_device *hdev, static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi) { + struct input_dev *input = hi->input; struct asus_drvdata *drvdata = hid_get_drvdata(hdev); if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { int ret; - struct input_dev *input = hi->input; input_set_abs_params(input, ABS_MT_POSITION_X, 0, MAX_X, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, MAX_Y, 0, 0); @@ -191,10 +192,10 @@ static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi) hid_err(hdev, "Asus input mt init slots failed: %d\n", ret); return ret; } - - drvdata->input = input; } + drvdata->input = input; + return 0; } @@ -286,7 +287,11 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) goto err_stop_hw; } - drvdata->input->name = "Asus TouchPad"; + if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { + drvdata->input->name = "Asus TouchPad"; + } else { + drvdata->input->name = "Asus Keyboard"; + } if (drvdata->quirks & QUIRK_IS_MULTITOUCH) { ret = asus_start_multitouch(hdev); @@ -315,7 +320,7 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, static const struct hid_device_id asus_devices[] = { { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, - USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), NOTEBOOK_QUIRKS}, + USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), KEYBOARD_QUIRKS}, { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_TOUCHPAD), TOUCHPAD_QUIRKS }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ec277b96eaa1..54bd22dc1411 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -319,6 +319,7 @@ #define USB_VENDOR_ID_DRAGONRISE 0x0079 #define USB_DEVICE_ID_DRAGONRISE_WIIU 0x1800 #define USB_DEVICE_ID_DRAGONRISE_PS3 0x1801 +#define USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR 0x1803 #define USB_DEVICE_ID_DRAGONRISE_GAMECUBE 0x1843 #define USB_VENDOR_ID_DWAV 0x0eef @@ -365,6 +366,9 @@ #define USB_VENDOR_ID_FLATFROG 0x25b5 #define USB_DEVICE_ID_MULTITOUCH_3200 0x0002 +#define USB_VENDOR_ID_FUTABA 0x0547 +#define USB_DEVICE_ID_LED_DISPLAY 0x7000 + #define USB_VENDOR_ID_ESSENTIAL_REALITY 0x0d7f #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100 diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 5c925228847c..4ef73374a8f9 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -212,7 +212,6 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, __s32 value; int ret = 0; - memset(buffer, 0, buffer_size); mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield)) { @@ -256,6 +255,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, int buffer_index = 0; int i; + memset(buffer, 0, buffer_size); + mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield) || diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 7687c0875395..f405b07d0381 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1099,8 +1099,11 @@ struct sony_sc { u8 led_delay_on[MAX_LEDS]; u8 led_delay_off[MAX_LEDS]; u8 led_count; + bool ds4_dongle_connected; }; +static void sony_set_leds(struct sony_sc *sc); + static inline void sony_schedule_work(struct sony_sc *sc) { if (!sc->defer_initialization) @@ -1430,6 +1433,31 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report, return -EILSEQ; } } + + /* + * In the case of a DS4 USB dongle, bit[2] of byte 31 indicates + * if a DS4 is actually connected (indicated by '0'). + * For non-dongle, this bit is always 0 (connected). + */ + if (sc->hdev->vendor == USB_VENDOR_ID_SONY && + sc->hdev->product == USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) { + bool connected = (rd[31] & 0x04) ? false : true; + + if (!sc->ds4_dongle_connected && connected) { + hid_info(sc->hdev, "DualShock 4 USB dongle: controller connected\n"); + sony_set_leds(sc); + sc->ds4_dongle_connected = true; + } else if (sc->ds4_dongle_connected && !connected) { + hid_info(sc->hdev, "DualShock 4 USB dongle: controller disconnected\n"); + sc->ds4_dongle_connected = false; + /* Return 0, so hidraw can get the report. */ + return 0; + } else if (!sc->ds4_dongle_connected) { + /* Return 0, so hidraw can get the report. */ + return 0; + } + } + dualshock4_parse_report(sc, rd, size); } @@ -2390,6 +2418,12 @@ static int sony_check_add(struct sony_sc *sc) } memcpy(sc->mac_address, &buf[1], sizeof(sc->mac_address)); + + snprintf(sc->hdev->uniq, sizeof(sc->hdev->uniq), + "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + sc->mac_address[5], sc->mac_address[4], + sc->mac_address[3], sc->mac_address[2], + sc->mac_address[1], sc->mac_address[0]); } else if ((sc->quirks & SIXAXIS_CONTROLLER_USB) || (sc->quirks & NAVIGATION_CONTROLLER_USB)) { buf = kmalloc(SIXAXIS_REPORT_0xF2_SIZE, GFP_KERNEL); @@ -2548,7 +2582,7 @@ static int sony_input_configured(struct hid_device *hdev, hid_err(sc->hdev, "Unable to initialize multi-touch slots: %d\n", ret); - return ret; + goto err_stop; } sony_init_output_report(sc, dualshock4_send_output_report); diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index b3e01c82af05..e9d6cc7cdfc5 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -83,11 +83,13 @@ static const struct hid_blacklist { { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 322ed9272811..841f2428e84a 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1036,7 +1036,7 @@ static const u8 lm90_temp_emerg_index[3] = { }; static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; -static const u8 lm90_max_alarm_bits[3] = { 0, 4, 12 }; +static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index f6b6d42385e1..784670e2736b 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -353,12 +353,12 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { [0] = { .num = ST_ACCEL_FS_AVL_2G, .value = 0x00, - .gain = IIO_G_TO_M_S_2(1024), + .gain = IIO_G_TO_M_S_2(1000), }, [1] = { .num = ST_ACCEL_FS_AVL_6G, .value = 0x01, - .gain = IIO_G_TO_M_S_2(340), + .gain = IIO_G_TO_M_S_2(3000), }, }, }, @@ -366,6 +366,14 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .addr = 0x21, .mask = 0x40, }, + /* + * Data Alignment Setting - needs to be set to get + * left-justified data like all other sensors. + */ + .das = { + .addr = 0x21, + .mask = 0x01, + }, .drdy_irq = { .addr = 0x21, .mask_int1 = 0x04, diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 38bc319904c4..9c8b558ba19e 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -561,7 +561,7 @@ config TI_ADS8688 config TI_AM335X_ADC tristate "TI's AM335X ADC driver" - depends on MFD_TI_AM335X_TSCADC + depends on MFD_TI_AM335X_TSCADC && HAS_DMA select IIO_BUFFER select IIO_KFIFO_BUF help diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index fe7775bb3740..df4045203a07 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -30,7 +30,9 @@ static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf) for_each_set_bit(i, indio_dev->active_scan_mask, num_data_channels) { const struct iio_chan_spec *channel = &indio_dev->channels[i]; - unsigned int bytes_to_read = channel->scan_type.realbits >> 3; + unsigned int bytes_to_read = + DIV_ROUND_UP(channel->scan_type.realbits + + channel->scan_type.shift, 8); unsigned int storage_bytes = channel->scan_type.storagebits >> 3; diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 975a1f19f747..79c8c7cd70d5 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -401,6 +401,15 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, return err; } + /* set DAS */ + if (sdata->sensor_settings->das.addr) { + err = st_sensors_write_data_with_mask(indio_dev, + sdata->sensor_settings->das.addr, + sdata->sensor_settings->das.mask, 1); + if (err < 0) + return err; + } + if (sdata->int_pin_open_drain) { dev_info(&indio_dev->dev, "set interrupt line to open drain mode\n"); @@ -483,8 +492,10 @@ static int st_sensors_read_axis_data(struct iio_dev *indio_dev, int err; u8 *outdata; struct st_sensor_data *sdata = iio_priv(indio_dev); - unsigned int byte_for_channel = ch->scan_type.realbits >> 3; + unsigned int byte_for_channel; + byte_for_channel = DIV_ROUND_UP(ch->scan_type.realbits + + ch->scan_type.shift, 8); outdata = kmalloc(byte_for_channel, GFP_KERNEL); if (!outdata) return -ENOMEM; diff --git a/drivers/iio/counter/104-quad-8.c b/drivers/iio/counter/104-quad-8.c index 2d2ee353dde7..a5913e97945e 100644 --- a/drivers/iio/counter/104-quad-8.c +++ b/drivers/iio/counter/104-quad-8.c @@ -153,7 +153,7 @@ static int quad8_write_raw(struct iio_dev *indio_dev, ior_cfg = val | priv->preset_enable[chan->channel] << 1; /* Load I/O control configuration */ - outb(0x40 | ior_cfg, base_offset); + outb(0x40 | ior_cfg, base_offset + 1); return 0; case IIO_CHAN_INFO_SCALE: @@ -233,7 +233,7 @@ static ssize_t quad8_read_set_to_preset_on_index(struct iio_dev *indio_dev, const struct quad8_iio *const priv = iio_priv(indio_dev); return snprintf(buf, PAGE_SIZE, "%u\n", - priv->preset_enable[chan->channel]); + !priv->preset_enable[chan->channel]); } static ssize_t quad8_write_set_to_preset_on_index(struct iio_dev *indio_dev, @@ -241,7 +241,7 @@ static ssize_t quad8_write_set_to_preset_on_index(struct iio_dev *indio_dev, size_t len) { struct quad8_iio *const priv = iio_priv(indio_dev); - const int base_offset = priv->base + 2 * chan->channel; + const int base_offset = priv->base + 2 * chan->channel + 1; bool preset_enable; int ret; unsigned int ior_cfg; @@ -250,6 +250,9 @@ static ssize_t quad8_write_set_to_preset_on_index(struct iio_dev *indio_dev, if (ret) return ret; + /* Preset enable is active low in Input/Output Control register */ + preset_enable = !preset_enable; + priv->preset_enable[chan->channel] = preset_enable; ior_cfg = priv->ab_enable[chan->channel] | @@ -362,7 +365,7 @@ static int quad8_set_synchronous_mode(struct iio_dev *indio_dev, priv->synchronous_mode[chan->channel] = synchronous_mode; /* Load Index Control configuration to Index Control Register */ - outb(0x40 | idr_cfg, base_offset); + outb(0x60 | idr_cfg, base_offset); return 0; } @@ -444,7 +447,7 @@ static int quad8_set_index_polarity(struct iio_dev *indio_dev, priv->index_polarity[chan->channel] = index_polarity; /* Load Index Control configuration to Index Control Register */ - outb(0x40 | idr_cfg, base_offset); + outb(0x60 | idr_cfg, base_offset); return 0; } diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 5355507f8fa1..c9e319bff58b 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -66,10 +66,8 @@ #define BMI160_REG_DUMMY 0x7F -#define BMI160_ACCEL_PMU_MIN_USLEEP 3200 -#define BMI160_ACCEL_PMU_MAX_USLEEP 3800 -#define BMI160_GYRO_PMU_MIN_USLEEP 55000 -#define BMI160_GYRO_PMU_MAX_USLEEP 80000 +#define BMI160_ACCEL_PMU_MIN_USLEEP 3800 +#define BMI160_GYRO_PMU_MIN_USLEEP 80000 #define BMI160_SOFTRESET_USLEEP 1000 #define BMI160_CHANNEL(_type, _axis, _index) { \ @@ -151,20 +149,9 @@ static struct bmi160_regs bmi160_regs[] = { }, }; -struct bmi160_pmu_time { - unsigned long min; - unsigned long max; -}; - -static struct bmi160_pmu_time bmi160_pmu_time[] = { - [BMI160_ACCEL] = { - .min = BMI160_ACCEL_PMU_MIN_USLEEP, - .max = BMI160_ACCEL_PMU_MAX_USLEEP - }, - [BMI160_GYRO] = { - .min = BMI160_GYRO_PMU_MIN_USLEEP, - .max = BMI160_GYRO_PMU_MIN_USLEEP, - }, +static unsigned long bmi160_pmu_time[] = { + [BMI160_ACCEL] = BMI160_ACCEL_PMU_MIN_USLEEP, + [BMI160_GYRO] = BMI160_GYRO_PMU_MIN_USLEEP, }; struct bmi160_scale { @@ -289,7 +276,7 @@ int bmi160_set_mode(struct bmi160_data *data, enum bmi160_sensor_type t, if (ret < 0) return ret; - usleep_range(bmi160_pmu_time[t].min, bmi160_pmu_time[t].max); + usleep_range(bmi160_pmu_time[t], bmi160_pmu_time[t] + 1000); return 0; } diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index a144ca3461fc..81bd8e8da4a6 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -113,7 +113,7 @@ static const char max44000_int_time_avail_str[] = "0.100 " "0.025 " "0.00625 " - "0.001625"; + "0.0015625"; /* Available scales (internal to ulux) with pretty manual alignment: */ static const int max44000_scale_avail_ulux_array[] = { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c8413fc120e6..7031a8dd4d14 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 019e02707cd5..3ef0f42984f2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1023,7 +1023,7 @@ again: next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; left = (head - next_tail) % CMD_BUFFER_SIZE; - if (left <= 2) { + if (left <= 0x20) { struct iommu_cmd sync_cmd; int ret; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index a88576d50740..8ccbd7023194 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -903,8 +903,10 @@ int __init detect_intel_iommu(void) x86_init.iommu.iommu_init = intel_iommu_init; #endif - acpi_put_table(dmar_tbl); - dmar_tbl = NULL; + if (dmar_tbl) { + acpi_put_table(dmar_tbl); + dmar_tbl = NULL; + } up_write(&dmar_global_lock); return ret ? 1 : -ENODEV; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c66c273dfd8a..8a185250ae5a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2037,6 +2037,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context)) goto out_unlock; + /* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ + if (context_copied(context)) { + u16 did_old = context_domain_id(context); + + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + pgd = domain->pgd; context_clear_entry(context); @@ -5185,6 +5204,25 @@ static void intel_iommu_remove_device(struct device *dev) } #ifdef CONFIG_INTEL_IOMMU_SVM +#define MAX_NR_PASID_BITS (20) +static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu) +{ + /* + * Convert ecap_pss to extend context entry pts encoding, also + * respect the soft pasid_max value set by the iommu. + * - number of PASID bits = ecap_pss + 1 + * - number of PASID table entries = 2^(pts + 5) + * Therefore, pts = ecap_pss - 4 + * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15 + */ + if (ecap_pss(iommu->ecap) < 5) + return 0; + + /* pasid_max is encoded as actual number of entries not the bits */ + return find_first_bit((unsigned long *)&iommu->pasid_max, + MAX_NR_PASID_BITS) - 5; +} + int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5217,7 +5255,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!(ctx_lo & CONTEXT_PASIDE)) { context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | + intel_iommu_get_pts(iommu); + wmb(); /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both * extended to permit requests-with-PASID if the PASIDE bit diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 0037153c80a6..2d9c5dd06e42 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -450,7 +450,7 @@ bool mei_cldev_enabled(struct mei_cl_device *cldev) EXPORT_SYMBOL_GPL(mei_cldev_enabled); /** - * mei_cldev_enable_device - enable me client device + * mei_cldev_enable - enable me client device * create connection with me client * * @cldev: me client device diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 391936c1aa04..b0395601c6ae 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -1541,7 +1541,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1; if (rets < 0) - return rets; + goto err; if (rets == 0) { cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); @@ -1575,11 +1575,8 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->buf.size, cb->buf_idx); rets = mei_write_message(dev, &mei_hdr, buf->data + cb->buf_idx); - if (rets) { - cl->status = rets; - list_move_tail(&cb->list, &cmpl_list->list); - return rets; - } + if (rets) + goto err; cl->status = 0; cl->writing_state = MEI_WRITING; @@ -1587,14 +1584,21 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->completed = mei_hdr.msg_complete == 1; if (first_chunk) { - if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) - return -EIO; + if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) { + rets = -EIO; + goto err; + } } if (mei_hdr.msg_complete) list_move_tail(&cb->list, &dev->write_waiting_list.list); return 0; + +err: + cl->status = rets; + list_move_tail(&cb->list, &cmpl_list->list); + return rets; } /** diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 25d1eb4933d0..7e8cf213fd81 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1012,15 +1012,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } - /* Insert TSB and checksum infos */ - if (priv->tsb_en) { - skb = bcm_sysport_insert_tsb(skb, dev); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } - } - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -1028,13 +1019,21 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. */ - if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { ret = NETDEV_TX_OK; goto out; } - skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ? - ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len; + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { + skb = bcm_sysport_insert_tsb(skb, dev); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } + } + + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); if (dma_mapping_error(kdev, mapping)) { diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 92be2cd8f817..9906fda76087 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -1,5 +1,5 @@ /** - * macb_pci.c - Cadence GEM PCI wrapper. + * Cadence GEM PCI wrapper. * * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com * @@ -45,32 +45,27 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct macb_platform_data plat_data; struct resource res[2]; - /* sanity check */ - if (!id) - return -EINVAL; - /* enable pci device */ - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err < 0) { - dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X", - err); - return -EACCES; + dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err); + return err; } pci_set_master(pdev); /* set up resources */ memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res)); - res[0].start = pdev->resource[0].start; - res[0].end = pdev->resource[0].end; + res[0].start = pci_resource_start(pdev, 0); + res[0].end = pci_resource_end(pdev, 0); res[0].name = PCI_DRIVER_NAME; res[0].flags = IORESOURCE_MEM; - res[1].start = pdev->irq; + res[1].start = pci_irq_vector(pdev, 0); res[1].name = PCI_DRIVER_NAME; res[1].flags = IORESOURCE_IRQ; - dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n", - (void *)(uintptr_t)pci_resource_start(pdev, 0)); + dev_info(&pdev->dev, "EMAC physical base addr: %pa\n", + &res[0].start); /* set up macb platform data */ memset(&plat_data, 0, sizeof(plat_data)); @@ -100,7 +95,7 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) plat_info.num_res = ARRAY_SIZE(res); plat_info.data = &plat_data; plat_info.size_data = sizeof(plat_data); - plat_info.dma_mask = DMA_BIT_MASK(32); + plat_info.dma_mask = pdev->dma_mask; /* register platform device */ plat_dev = platform_device_register_full(&plat_info); @@ -120,7 +115,6 @@ err_hclk_register: clk_unregister(plat_data.pclk); err_pclk_register: - pci_disable_device(pdev); return err; } @@ -130,7 +124,6 @@ static void macb_remove(struct pci_dev *pdev) struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); platform_device_unregister(plat_dev); - pci_disable_device(pdev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); } diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index bbc8bd16cb97..dcbce6cac63e 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -77,7 +77,7 @@ config OCTEON_MGMT_ETHERNET config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI - select PTP_1588_CLOCK + imply PTP_1588_CLOCK ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index 0f0de5b63622..d04a6c163445 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi, if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = sin6_scope_id; dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgb_our_interface(lldi, get_real_dev, - ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + if (dst->error || + (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) { dst_release(dst); - dst = NULL; + return NULL; } } -out: return dst; } EXPORT_SYMBOL(cxgb_find_route6); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e1633bf5a22..225e9a4877d7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5155,7 +5155,9 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) || + !adapter->vxlan_port || + udp_hdr(skb)->dest != adapter->vxlan_port) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 624ba9058dc4..c9b7ad65e563 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -733,6 +733,7 @@ static int dpaa_eth_cgr_init(struct dpaa_priv *priv) priv->cgr_data.cgr.cb = dpaa_eth_cgscn; /* Enable Congestion State Change Notifications and CS taildrop */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; @@ -2291,7 +2292,8 @@ static int dpaa_open(struct net_device *net_dev) net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev); if (!net_dev->phydev) { netif_err(priv, ifup, net_dev, "init_phy() failed\n"); - return -ENODEV; + err = -ENODEV; + goto phy_init_failed; } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { @@ -2314,6 +2316,7 @@ mac_start_failed: for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) fman_port_disable(mac_dev->port[i]); +phy_init_failed: dpaa_eth_napi_disable(priv); return err; @@ -2420,6 +2423,7 @@ static int dpaa_ingress_cgr_init(struct dpaa_priv *priv) } /* Enable CS TD, but disable Congestion State Change Notifications. */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; cs_th = DPAA_INGRESS_CS_THRESHOLD; diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index cbeea915f026..8037426ec50f 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work) DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); + korina_free_ring(dev); + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); return; @@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev) tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; writel(tmp, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); cancel_work_sync(&lp->restart_task); + korina_free_ring(dev); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 015198c14fa8..504461a464c5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bcd955339058..edbe200ac2fa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev) /* Configure tx cq's and rings */ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { - u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; + u8 num_tx_rings_p_up = t == TX ? + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; for (i = 0; i < priv->tx_ring_num[t]; i++) { /* Configure cq */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 3c37e216bbf3..eac527e25ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..bffa6f345f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/kmod.h> +#include <linux/etherdevice.h> #include <net/devlink.h> #include <linux/mlx4/device.h> @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c548beaaf910..56185a0b827d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, @@ -4473,6 +4453,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4489,10 +4470,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4517,7 +4499,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 7f6c225666c1..f0b460f47f29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -723,6 +723,9 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) int i; struct ieee_ets ets; + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + memset(&ets, 0, sizeof(ets)); ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; for (i = 0; i < ets.ets_cap; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 352462af8d51..33a399a8b5d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -171,7 +171,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - NUM_PCIE_COUNTERS + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -219,14 +218,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_tas_stats_desc[i].format); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -339,14 +330,6 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters, - pcie_tas_stats_desc, i); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3691451c728c..d088effd7160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, } if (fs->flow_type & FLOW_MAC_EXT && !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), fs->m_ext.h_dest); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cbfa38fc72c0..1236b27b1493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -291,36 +291,12 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) &qcnt->rx_out_of_buffer); } -static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; - struct mlx5_core_dev *mdev = priv->mdev; - int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); - void *out; - u32 *in; - - in = mlx5_vzalloc(sz); - if (!in) - return; - - out = pcie_stats->pcie_perf_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - out = pcie_stats->pcie_tas_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - kvfree(in); -} - void mlx5e_update_stats(struct mlx5e_priv *priv) { mlx5e_update_q_counter(priv); mlx5e_update_vport_counters(priv); mlx5e_update_pport_counters(priv); mlx5e_update_sw_counters(priv); - mlx5e_update_pcie_counters(priv); } void mlx5e_update_stats_work(struct work_struct *work) @@ -3805,14 +3781,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); @@ -3822,6 +3791,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rep.netdev = netdev; mlx5_eswitch_register_vport_rep(esw, 0, &rep); } + + if (netdev->reg_state != NETREG_REGISTERED) + return; + + /* Device already registered: sync netdev system state */ + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + queue_work(priv->wq, &priv->set_rx_mode_work); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) @@ -3966,10 +3947,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); rtnl_lock(); if (netif_running(netdev)) @@ -3977,6 +3954,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) netif_device_detach(netdev); rtnl_unlock(); + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f202f872f57f..ba5db1dd23a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -39,7 +39,7 @@ #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ (*(u32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ - be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) @@ -276,32 +276,6 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; -#define PCIE_PERF_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_PERF_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \ - counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_TAS_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c) -#define PCIE_TAS_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \ - counter_set.pcie_tas_cntrs_grp_data_layout.c) - -struct mlx5e_pcie_stats { - __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; - __be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; -}; - -static const struct counter_desc pcie_perf_stats_desc[] = { - { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, - { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, -}; - -static const struct counter_desc pcie_tas_stats_desc[] = { - { "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) }, - { "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) }, -}; - struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -386,8 +360,6 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) -#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) -#define NUM_PCIE_TAS_COUNTERS ARRAY_SIZE(pcie_tas_stats_desc) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ @@ -397,7 +369,6 @@ static const struct counter_desc sq_stats_desc[] = { NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS (NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) @@ -406,7 +377,6 @@ struct mlx5e_stats { struct mlx5e_qcounter_stats qcnt; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct mlx5e_pcie_stats pcie; struct rtnl_link_stats64 vf_vport; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d6807c3cc461..f14d9c9ba773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 466e161010f7..03293ed1cc22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -695,6 +695,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; } + + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return 0; err_reps: @@ -718,6 +724,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a263d8904a4c..0ac7a2fc916c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1263,6 +1263,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { + unlock_ref_node(&fte->node); kfree(fte); goto unlock_fg; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 54e5a786f191..6547f22e6b9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,6 +503,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); @@ -575,7 +582,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -583,7 +589,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return -ENOMEM; } - cpumask_set_cpu(cpumask_local_spread(i, numa_node), + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); @@ -1189,6 +1195,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; + mlx5_drain_health_wq(dev); + mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1351,10 +1359,9 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain health wq */ + /* In case of kernel call save the pci state */ if (state) { pci_save_state(pdev); - mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index f9b97f5946f8..44389c90056a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -326,6 +326,7 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f341c1bc7001..00fafabab1d0 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -819,6 +819,7 @@ static struct sh_eth_cpu_data sh7734_data = { .tsu = 1, .hw_crc = 1, .select_mii = 1, + .shift_rd0 = 1, }; /* SH7763 */ @@ -1656,7 +1657,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) else goto out; - if (!likely(mdp->irq_enabled)) { + if (unlikely(!mdp->irq_enabled)) { sh_eth_write(ndev, 0, EESIPR); goto out; } diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index de2947ccc5ad..5eb0e684fd76 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1323,7 +1323,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx) } /* don't fail init if RSS setup doesn't work */ - efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = (rc == 0); return 0; } diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 87bdc56b4e3a..18ebaea44e82 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -975,6 +975,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXFH: { info->data = 0; + if (!efx->rss_active) /* No RSS */ + return 0; switch (info->flow_type) { case UDP_V4_FLOW: if (efx->rx_hash_udp_4tuple) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 1a635ced62d0..1c62c1a00fca 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -860,6 +860,7 @@ struct vfdi_status; * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS * @rx_scatter: Scatter mode enabled for receives + * @rss_active: RSS enabled on hardware * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired @@ -998,6 +999,7 @@ struct efx_nic { u8 rx_hash_key[40]; u32 rx_indir_table[128]; bool rx_scatter; + bool rss_active; bool rx_hash_udp_4tuple; unsigned int_error_count; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index a3901bc96586..4e54e5dc9fcb 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -403,6 +403,7 @@ static int siena_init_nic(struct efx_nic *efx) efx_writeo(efx, &temp, FR_AZ_RX_CFG); siena_rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = true; /* Enable event logging */ rc = efx_mcdi_log_ctrl(efx, true, false, 0); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index c35597586121..3dc7d279f805 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -60,8 +60,9 @@ struct oxnas_dwmac { struct regmap *regmap; }; -static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) +static int oxnas_dwmac_init(struct platform_device *pdev, void *priv) { + struct oxnas_dwmac *dwmac = priv; unsigned int value; int ret; @@ -105,20 +106,20 @@ static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) return 0; } +static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct oxnas_dwmac *dwmac = priv; + + clk_disable_unprepare(dwmac->clk); +} + static int oxnas_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - struct device_node *sysctrl; struct oxnas_dwmac *dwmac; int ret; - sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0); - if (!sysctrl) { - dev_err(&pdev->dev, "failed to get sys-ctrl node\n"); - return -EINVAL; - } - ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) return ret; @@ -128,72 +129,48 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) - return -ENOMEM; + if (!dwmac) { + ret = -ENOMEM; + goto err_remove_config_dt; + } dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; + plat_dat->init = oxnas_dwmac_init; + plat_dat->exit = oxnas_dwmac_exit; - dwmac->regmap = syscon_node_to_regmap(sysctrl); + dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "oxsemi,sys-ctrl"); if (IS_ERR(dwmac->regmap)) { dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); - return PTR_ERR(dwmac->regmap); + ret = PTR_ERR(dwmac->regmap); + goto err_remove_config_dt; } dwmac->clk = devm_clk_get(&pdev->dev, "gmac"); - if (IS_ERR(dwmac->clk)) - return PTR_ERR(dwmac->clk); + if (IS_ERR(dwmac->clk)) { + ret = PTR_ERR(dwmac->clk); + goto err_remove_config_dt; + } - ret = oxnas_dwmac_init(dwmac); + ret = oxnas_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) - return ret; + goto err_remove_config_dt; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - clk_disable_unprepare(dwmac->clk); + goto err_dwmac_exit; - return ret; -} -static int oxnas_dwmac_remove(struct platform_device *pdev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - int ret = stmmac_dvr_remove(&pdev->dev); - - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -#ifdef CONFIG_PM_SLEEP -static int oxnas_dwmac_suspend(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = stmmac_suspend(dev); - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -static int oxnas_dwmac_resume(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = oxnas_dwmac_init(dwmac); - if (ret) - return ret; + return 0; - ret = stmmac_resume(dev); +err_dwmac_exit: + oxnas_dwmac_exit(pdev, plat_dat->bsp_priv); +err_remove_config_dt: + stmmac_remove_config_dt(pdev, plat_dat); return ret; } -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops, - oxnas_dwmac_suspend, oxnas_dwmac_resume); static const struct of_device_id oxnas_dwmac_match[] = { { .compatible = "oxsemi,ox820-dwmac" }, @@ -203,10 +180,10 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); static struct platform_driver oxnas_dwmac_driver = { .probe = oxnas_dwmac_probe, - .remove = oxnas_dwmac_remove, + .remove = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", - .pm = &oxnas_dwmac_pm_ops, + .pm = &stmmac_pltfr_pm_ops, .of_match_table = oxnas_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bb40382e205d..39eb7a65bb9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3339,13 +3339,6 @@ int stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); - ret = register_netdev(ndev); - if (ret) { - netdev_err(priv->dev, "%s: ERROR %i registering the device\n", - __func__, ret); - goto error_netdev_register; - } - /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -3372,11 +3365,21 @@ int stmmac_dvr_probe(struct device *device, } } - return 0; + ret = register_netdev(ndev); + if (ret) { + netdev_err(priv->dev, "%s: ERROR %i registering the device\n", + __func__, ret); + goto error_netdev_register; + } + + return ret; -error_mdio_register: - unregister_netdev(ndev); error_netdev_register: + if (priv->hw->pcs != STMMAC_PCS_RGMII && + priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); +error_mdio_register: netif_napi_del(&priv->napi); error_hw_init: clk_disable_unprepare(priv->pclk); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index fda01f770eff..b0344c213752 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 value = MII_WRITE | MII_BUSY; + u32 value = MII_BUSY; value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; @@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, & priv->hw->mii.clk_csr_mask; if (priv->plat->has_gmac4) value |= MII_GMAC4_WRITE; + else + value |= MII_WRITE; /* Wait until any existing MII operation is complete */ if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 031093e1c25f..dbfbb33ac66c 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -99,6 +99,11 @@ struct ipvl_port { int count; }; +struct ipvl_skb_cb { + bool tx_pkt; +}; +#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0])) + static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) { return rcu_dereference(d->rx_handler_data); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b4e990743e1d..83ce74acf82d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work) unsigned int mac_hash; int ret; u8 pkt_type; - bool hlocal, dlocal; + bool tx_pkt; __skb_queue_head_init(&list); @@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work) spin_unlock_bh(&port->backlog.lock); while ((skb = __skb_dequeue(&list)) != NULL) { + struct net_device *dev = skb->dev; + bool consumed = false; + ethh = eth_hdr(skb); - hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; mac_hash = ipvlan_mac_hash(ethh->h_dest); if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) @@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work) else pkt_type = PACKET_MULTICAST; - dlocal = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (hlocal && (ipvlan->dev == skb->dev)) { - dlocal = true; + if (tx_pkt && (ipvlan->dev == skb->dev)) continue; - } if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; - + if (!(ipvlan->dev->flags & IFF_UP)) + continue; ret = NET_RX_DROP; len = skb->len + ETH_HLEN; nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto acct; - - nskb->pkt_type = pkt_type; - nskb->dev = ipvlan->dev; - if (hlocal) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -acct: + local_bh_disable(); + if (nskb) { + consumed = true; + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (tx_pkt) + ret = dev_forward_skb(ipvlan->dev, nskb); + else + ret = netif_rx(nskb); + } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + local_bh_enable(); } rcu_read_unlock(); - if (dlocal) { + if (tx_pkt) { /* If the packet originated here, send it out. */ skb->dev = port->dev; skb->pkt_type = pkt_type; dev_queue_xmit(skb); } else { - kfree_skb(skb); + if (consumed) + consume_skb(skb); + else + kfree_skb(skb); } + if (dev) + dev_put(dev); } } @@ -470,15 +477,24 @@ out: } static void ipvlan_multicast_enqueue(struct ipvl_port *port, - struct sk_buff *skb) + struct sk_buff *skb, bool tx_pkt) { if (skb->protocol == htons(ETH_P_PAUSE)) { kfree_skb(skb); return; } + /* Record that the deferred packet is from TX or RX path. By + * looking at mac-addresses on packet will lead to erronus decisions. + * (This would be true for a loopback-mode on master device or a + * hair-pin mode of the switch.) + */ + IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; + spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + if (skb->dev) + dev_hold(skb->dev); __skb_queue_tail(&port->backlog, skb); spin_unlock(&port->backlog.lock); schedule_work(&port->wq); @@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) } else if (is_multicast_ether_addr(eth->h_dest)) { ipvlan_skb_crossing_ns(skb, NULL); - ipvlan_multicast_enqueue(ipvlan->port, skb); + ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; } @@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, */ if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); - ipvlan_multicast_enqueue(port, nskb); + ipvlan_multicast_enqueue(port, nskb, false); } } } else { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 693ec5b66222..8b0f99300cbc 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -135,6 +135,7 @@ err: static void ipvlan_port_destroy(struct net_device *dev) { struct ipvl_port *port = ipvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_IPVLAN_MASTER; if (port->mode == IPVLAN_MODE_L3S) { @@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev) } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); - __skb_queue_purge(&port->backlog); + while ((skb = __skb_dequeue(&port->backlog)) != NULL) { + if (skb->dev) + dev_put(skb->dev); + kfree_skb(skb); + } kfree(port); } diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 6c646e228833..6e98ede997d3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1367,6 +1367,7 @@ static struct usb_driver asix_driver = { .probe = usbnet_probe, .suspend = asix_suspend, .resume = asix_resume, + .reset_resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7532646c3b7b..23dfb0eac098 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -967,6 +967,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, */ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); if (!ipv6_ndisc_frame(skb) && !need_strict) { + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1011,6 +1012,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } + vrf_rx_stats(vrf_dev, skb->len); + skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index b776a0ab106c..9d9b4e0def2a 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -218,7 +218,7 @@ static int slic_ds26522_probe(struct spi_device *spi) ret = slic_ds26522_init_configure(spi); if (ret == 0) - pr_info("DS26522 cs%d configurated\n", spi->chip_select); + pr_info("DS26522 cs%d configured\n", spi->chip_select); return ret; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b40cfb076f02..2fc86dc7a8df 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1193,8 +1193,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if (ctrl->stripe_size) - blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); + if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) + blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; @@ -1250,19 +1250,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_hw_sectors = min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { - unsigned int max_hw_sectors; - - ctrl->stripe_size = 1 << (id->vs[3] + page_shift); - max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); - if (ctrl->max_hw_sectors) { - ctrl->max_hw_sectors = min(max_hw_sectors, - ctrl->max_hw_sectors); - } else { - ctrl->max_hw_sectors = max_hw_sectors; - } - } - nvme_set_queue_limits(ctrl, ctrl->admin_q); ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 771e2e761872..aa0bc60810a7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1491,19 +1491,20 @@ static int nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) { struct nvme_fc_queue *queue = &ctrl->queues[1]; - int i, j, ret; + int i, ret; for (i = 1; i < ctrl->queue_count; i++, queue++) { ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); - if (ret) { - for (j = i-1; j >= 0; j--) - __nvme_fc_delete_hw_queue(ctrl, - &ctrl->queues[j], j); - return ret; - } + if (ret) + goto delete_queues; } return 0; + +delete_queues: + for (; i >= 0; i--) + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); + return ret; } static int @@ -2401,8 +2402,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, WARN_ON_ONCE(!changed); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl); + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); kref_get(&ctrl->ctrl.kref); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bd5321441d12..6377e14586dc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -135,7 +135,6 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; - u32 stripe_size; u16 oncs; u16 vid; atomic_t abort_limit; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3d21a154dce7..19beeb7b2ac2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -712,15 +712,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); nvme_req(req)->result = cqe.result; blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); - } - /* If the controller ignores the cq head doorbell and continuously - * writes to the queue, it is theoretically possible to wrap around - * the queue twice and mistakenly return IRQ_NONE. Linux only - * requires that 0.1% of your interrupts are handled, so this isn't - * a big problem. - */ if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; @@ -1909,10 +1902,10 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!dev->bar) goto release; - return 0; + return 0; release: - pci_release_mem_regions(pdev); - return -ENODEV; + pci_release_mem_regions(pdev); + return -ENODEV; } static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index b71e95044b43..a5c09e703bd8 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -2160,30 +2160,6 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, return nvme_trans_status_code(hdr, nvme_sc); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) -{ - u8 immed, no_flush; - - immed = cmd[1] & 0x01; - no_flush = cmd[4] & 0x04; - - if (immed != 0) { - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - } else { - if (no_flush == 0) { - /* Issue NVME FLUSH command prior to START STOP UNIT */ - int res = nvme_trans_synchronize_cache(ns, hdr); - if (res) - return res; - } - - return 0; - } -} - static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { @@ -2439,9 +2415,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case SECURITY_PROTOCOL_OUT: retcode = nvme_trans_security_protocol(ns, hdr, cmd); break; - case START_STOP: - retcode = nvme_trans_start_stop(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index ec1ad2aa0a4c..95ae52390478 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -382,7 +382,6 @@ static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); - u64 val; u32 val32; u16 status = 0; @@ -392,8 +391,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; case NVME_FEAT_KATO: - val = le64_to_cpu(req->cmd->prop_set.value); - val32 = val & 0xffff; + val32 = le32_to_cpu(req->cmd->common.cdw10[1]); req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index bcb8ebeb01c5..4e8e6a22bce1 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -845,7 +845,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->lport = nport->lport; nport->rport = rport; - return ret ? ret : count; + return count; } @@ -952,7 +952,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->lport = nport->lport; nport->tport = tport; - return ret ? ret : count; + return count; } diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 965911d9b36a..398ea7f54826 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -981,8 +981,8 @@ static int __nvmem_cell_read(struct nvmem_device *nvmem, * @cell: nvmem cell to be read. * @len: pointer to length of cell which will be populated on successful read. * - * Return: ERR_PTR() on error or a valid pointer to a char * buffer on success. - * The buffer should be freed by the consumer with a kfree(). + * Return: ERR_PTR() on error or a valid pointer to a buffer on success. The + * buffer should be freed by the consumer with a kfree(). */ void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) { diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index ac27b9bac3b9..8e7b120696fa 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -71,7 +71,7 @@ static struct nvmem_config imx_ocotp_nvmem_config = { static const struct of_device_id imx_ocotp_dt_ids[] = { { .compatible = "fsl,imx6q-ocotp", (void *)128 }, - { .compatible = "fsl,imx6sl-ocotp", (void *)32 }, + { .compatible = "fsl,imx6sl-ocotp", (void *)64 }, { .compatible = "fsl,imx6sx-ocotp", (void *)128 }, { }, }; diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index b5305f08b184..2bdb6c389328 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -21,11 +21,11 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { void __iomem *base = context; - u32 *val = _val; - int i = 0, words = bytes / 4; + u8 *val = _val; + int i = 0, words = bytes; while (words--) - *val++ = readl(base + reg + (i++ * 4)); + *val++ = readb(base + reg + i++); return 0; } @@ -34,11 +34,11 @@ static int qfprom_reg_write(void *context, unsigned int reg, void *_val, size_t bytes) { void __iomem *base = context; - u32 *val = _val; - int i = 0, words = bytes / 4; + u8 *val = _val; + int i = 0, words = bytes; while (words--) - writel(*val++, base + reg + (i++ * 4)); + writeb(*val++, base + reg + i++); return 0; } @@ -53,7 +53,7 @@ static int qfprom_remove(struct platform_device *pdev) static struct nvmem_config econfig = { .name = "qfprom", .owner = THIS_MODULE, - .stride = 4, + .stride = 1, .word_size = 1, .reg_read = qfprom_reg_read, .reg_write = qfprom_reg_write, diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index a579126832af..620c231a2889 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -212,7 +212,7 @@ static int meson_pmx_request_gpio(struct pinctrl_dev *pcdev, { struct meson_pinctrl *pc = pinctrl_dev_get_drvdata(pcdev); - meson_pmx_disable_other_groups(pc, range->pin_base + offset, -1); + meson_pmx_disable_other_groups(pc, offset, -1); return 0; } diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index aea310a91821..c9a146948192 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -382,26 +382,21 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) { int ret = 0; u32 pin_reg; - unsigned long flags; - bool level_trig; - u32 active_level; + unsigned long flags, irq_flags; struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - * When level_trig is set EDGE and active_level is set HIGH in BIOS - * default settings, ignore incoming settings from client and use - * BIOS settings to configure GPIO register. + /* Ignore the settings coming from the client and + * read the values from the ACPI tables + * while setting the trigger type */ - level_trig = !(pin_reg & (LEVEL_TRIGGER << LEVEL_TRIG_OFF)); - active_level = pin_reg & (ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); - if(level_trig && - ((active_level >> ACTIVE_LEVEL_OFF) == ACTIVE_HIGH)) - type = IRQ_TYPE_EDGE_FALLING; + irq_flags = irq_get_trigger_type(d->irq); + if (irq_flags != IRQ_TYPE_NONE) + type = irq_flags; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 12f7d1eb65bc..07409fde02b2 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -56,6 +56,17 @@ static const struct samsung_pin_bank_type bank_type_alive = { .reg_offset = { 0x00, 0x04, 0x08, 0x0c, }, }; +/* Exynos5433 has the 4bit widths for PINCFG_TYPE_DRV bitfields. */ +static const struct samsung_pin_bank_type exynos5433_bank_type_off = { + .fld_width = { 4, 1, 2, 4, 2, 2, }, + .reg_offset = { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, }, +}; + +static const struct samsung_pin_bank_type exynos5433_bank_type_alive = { + .fld_width = { 4, 1, 2, 4, }, + .reg_offset = { 0x00, 0x04, 0x08, 0x0c, }, +}; + static void exynos_irq_mask(struct irq_data *irqd) { struct irq_chip *chip = irq_data_get_irq_chip(irqd); @@ -1335,82 +1346,82 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { /* pin banks of exynos5433 pin-controller - ALIVE */ static const struct samsung_pin_bank_data exynos5433_pin_banks0[] = { - EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), - EXYNOS_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), - EXYNOS_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), - EXYNOS_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), + EXYNOS5433_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), + EXYNOS5433_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), + EXYNOS5433_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), + EXYNOS5433_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), }; /* pin banks of exynos5433 pin-controller - AUD */ static const struct samsung_pin_bank_data exynos5433_pin_banks1[] = { - EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), - EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), + EXYNOS5433_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), }; /* pin banks of exynos5433 pin-controller - CPIF */ static const struct samsung_pin_bank_data exynos5433_pin_banks2[] = { - EXYNOS_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), + EXYNOS5433_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), }; /* pin banks of exynos5433 pin-controller - eSE */ static const struct samsung_pin_bank_data exynos5433_pin_banks3[] = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), }; /* pin banks of exynos5433 pin-controller - FINGER */ static const struct samsung_pin_bank_data exynos5433_pin_banks4[] = { - EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), }; /* pin banks of exynos5433 pin-controller - FSYS */ static const struct samsung_pin_bank_data exynos5433_pin_banks5[] = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), - EXYNOS_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), - EXYNOS_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), - EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), - EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), + EXYNOS5433_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), }; /* pin banks of exynos5433 pin-controller - IMEM */ static const struct samsung_pin_bank_data exynos5433_pin_banks6[] = { - EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), }; /* pin banks of exynos5433 pin-controller - NFC */ static const struct samsung_pin_bank_data exynos5433_pin_banks7[] = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), }; /* pin banks of exynos5433 pin-controller - PERIC */ static const struct samsung_pin_bank_data exynos5433_pin_banks8[] = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), - EXYNOS_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), - EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), - EXYNOS_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), - EXYNOS_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), - EXYNOS_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), - EXYNOS_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), - EXYNOS_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), - EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), - EXYNOS_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), - EXYNOS_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), - EXYNOS_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), - EXYNOS_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), - EXYNOS_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), - EXYNOS_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), - EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), + EXYNOS5433_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), + EXYNOS5433_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), + EXYNOS5433_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), + EXYNOS5433_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), + EXYNOS5433_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), + EXYNOS5433_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), + EXYNOS5433_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), + EXYNOS5433_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), + EXYNOS5433_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), + EXYNOS5433_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), + EXYNOS5433_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), }; /* pin banks of exynos5433 pin-controller - TOUCH */ static const struct samsung_pin_bank_data exynos5433_pin_banks9[] = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), }; /* diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index 5821525a2c84..a473092fb8d2 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -90,6 +90,37 @@ .pctl_res_idx = pctl_idx, \ } \ +#define EXYNOS5433_PIN_BANK_EINTG(pins, reg, id, offs) \ + { \ + .type = &exynos5433_bank_type_off, \ + .pctl_offset = reg, \ + .nr_pins = pins, \ + .eint_type = EINT_TYPE_GPIO, \ + .eint_offset = offs, \ + .name = id \ + } + +#define EXYNOS5433_PIN_BANK_EINTW(pins, reg, id, offs) \ + { \ + .type = &exynos5433_bank_type_alive, \ + .pctl_offset = reg, \ + .nr_pins = pins, \ + .eint_type = EINT_TYPE_WKUP, \ + .eint_offset = offs, \ + .name = id \ + } + +#define EXYNOS5433_PIN_BANK_EINTW_EXT(pins, reg, id, offs, pctl_idx) \ + { \ + .type = &exynos5433_bank_type_alive, \ + .pctl_offset = reg, \ + .nr_pins = pins, \ + .eint_type = EINT_TYPE_WKUP, \ + .eint_offset = offs, \ + .name = id, \ + .pctl_res_idx = pctl_idx, \ + } \ + /** * struct exynos_weint_data: irq specific data for all the wakeup interrupts * generated by the external wakeup interrupt controller. diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5fe8be089b8b..59aa8e302bc3 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1034,7 +1034,7 @@ config SURFACE_PRO3_BUTTON config SURFACE_3_BUTTON tristate "Power/home/volume buttons driver for Microsoft Surface 3 tablet" - depends on ACPI && KEYBOARD_GPIO + depends on ACPI && KEYBOARD_GPIO && I2C ---help--- This driver handles the power/home/volume buttons on the Microsoft Surface 3 tablet. diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 61f39abf5dc8..82d67715ce76 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -177,43 +177,43 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event); #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev logolamp_led = { .name = "fujitsu::logolamp", .brightness_get = logolamp_get, - .brightness_set = logolamp_set + .brightness_set_blocking = logolamp_set }; static enum led_brightness kblamps_get(struct led_classdev *cdev); -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev kblamps_led = { .name = "fujitsu::kblamps", .brightness_get = kblamps_get, - .brightness_set = kblamps_set + .brightness_set_blocking = kblamps_set }; static enum led_brightness radio_led_get(struct led_classdev *cdev); -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev radio_led = { .name = "fujitsu::radio_led", .brightness_get = radio_led_get, - .brightness_set = radio_led_set + .brightness_set_blocking = radio_led_set }; static enum led_brightness eco_led_get(struct led_classdev *cdev); -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev eco_led = { .name = "fujitsu::eco_led", .brightness_get = eco_led_get, - .brightness_set = eco_led_set + .brightness_set_blocking = eco_led_set }; #endif @@ -267,48 +267,48 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) #if IS_ENABLED(CONFIG_LEDS_CLASS) /* LED class callbacks */ -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); } else if (brightness >= LED_HALF) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); } else { - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); } } -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); } -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); else - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); } -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness) { int curr; curr = call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0); if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); } static enum led_brightness logolamp_get(struct led_classdev *cdev) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index 8130dfe89745..4971aa54756a 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -770,6 +770,7 @@ static int cvm_oct_probe(struct platform_device *pdev) /* Initialize the device private structure. */ struct octeon_ethernet *priv = netdev_priv(dev); + SET_NETDEV_DEV(dev, &pdev->dev); dev->netdev_ops = &cvm_oct_pow_netdev_ops; priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED; priv->port = CVMX_PIP_NUM_INPUT_PORTS; @@ -816,6 +817,7 @@ static int cvm_oct_probe(struct platform_device *pdev) } /* Initialize the device private structure. */ + SET_NETDEV_DEV(dev, &pdev->dev); priv = netdev_priv(dev); priv->netdev = dev; priv->of_node = cvm_oct_node_for_port(pip, interface, diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 0aa9e7d697a5..25dbd8c7aec7 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -239,6 +239,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Check for duplicate endpoint addresses */ + for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { + if (ifp->endpoint[i].desc.bEndpointAddress == + d->bEndpointAddress) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1fa5c0f29c64..a56c75e09786 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -103,8 +103,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); -static void hub_usb3_port_prepare_disable(struct usb_hub *hub, - struct usb_port *port_dev); +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -903,34 +902,6 @@ static int hub_set_port_link_state(struct usb_hub *hub, int port1, } /* - * USB-3 does not have a similar link state as USB-2 that will avoid negotiating - * a connection with a plugged-in cable but will signal the host when the cable - * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices - */ -static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) -{ - struct usb_port *port_dev = hub->ports[port1 - 1]; - struct usb_device *hdev = hub->hdev; - int ret = 0; - - if (!hub->error) { - if (hub_is_superspeed(hub->hdev)) { - hub_usb3_port_prepare_disable(hub, port_dev); - ret = hub_set_port_link_state(hub, port_dev->portnum, - USB_SS_PORT_LS_U3); - } else { - ret = usb_clear_port_feature(hdev, port1, - USB_PORT_FEAT_ENABLE); - } - } - if (port_dev->child && set_state) - usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); - if (ret && ret != -ENODEV) - dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); - return ret; -} - -/* * Disable a port and mark a logical connect-change event, so that some * time later hub_wq will disconnect() any existing usb_device on the port * and will re-enumerate if there actually is a device attached. @@ -4162,6 +4133,34 @@ static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port, #endif /* CONFIG_PM */ +/* + * USB-3 does not have a similar link state as USB-2 that will avoid negotiating + * a connection with a plugged-in cable but will signal the host when the cable + * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices + */ +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) +{ + struct usb_port *port_dev = hub->ports[port1 - 1]; + struct usb_device *hdev = hub->hdev; + int ret = 0; + + if (!hub->error) { + if (hub_is_superspeed(hub->hdev)) { + hub_usb3_port_prepare_disable(hub, port_dev); + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U3); + } else { + ret = usb_clear_port_feature(hdev, port1, + USB_PORT_FEAT_ENABLE); + } + } + if (port_dev->child && set_state) + usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); + if (ret && ret != -ENODEV) + dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); + return ret; +} + /* USB 2.0 spec, 7.1.7.3 / fig 7-29: * diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index b95930f20d90..c55db4aa54d6 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3753,7 +3753,7 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, hs_ep->desc_list = dma_alloc_coherent(hsotg->dev, MAX_DMA_DESC_NUM_GENERIC * sizeof(struct dwc2_dma_desc), - &hs_ep->desc_list_dma, GFP_KERNEL); + &hs_ep->desc_list_dma, GFP_ATOMIC); if (!hs_ep->desc_list) { ret = -ENOMEM; goto error2; diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index a786256535b6..11fe68a4627b 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -247,8 +247,6 @@ MODULE_DEVICE_TABLE(of, dwc2_of_match_table); static void dwc2_get_device_property(struct dwc2_hsotg *hsotg, char *property, u8 size, u64 *value) { - u8 val8; - u16 val16; u32 val32; switch (size) { @@ -256,17 +254,7 @@ static void dwc2_get_device_property(struct dwc2_hsotg *hsotg, *value = device_property_read_bool(hsotg->dev, property); break; case 1: - if (device_property_read_u8(hsotg->dev, property, &val8)) - return; - - *value = val8; - break; case 2: - if (device_property_read_u16(hsotg->dev, property, &val16)) - return; - - *value = val16; - break; case 4: if (device_property_read_u32(hsotg->dev, property, &val32)) return; @@ -1100,13 +1088,13 @@ static void dwc2_set_gadget_dma(struct dwc2_hsotg *hsotg) /* Buffer DMA */ dwc2_set_param_bool(hsotg, &p->g_dma, false, "gadget-dma", - true, false, + dma_capable, false, dma_capable); /* DMA Descriptor */ dwc2_set_param_bool(hsotg, &p->g_dma_desc, false, "gadget-dma-desc", - p->g_dma, false, + !!hw->dma_desc_enable, false, !!hw->dma_desc_enable); } @@ -1130,8 +1118,14 @@ static void dwc2_set_parameters(struct dwc2_hsotg *hsotg, dwc2_set_param_bool(hsotg, &p->host_dma, false, "host-dma", - true, false, + dma_capable, false, dma_capable); + dwc2_set_param_host_rx_fifo_size(hsotg, + params->host_rx_fifo_size); + dwc2_set_param_host_nperio_tx_fifo_size(hsotg, + params->host_nperio_tx_fifo_size); + dwc2_set_param_host_perio_tx_fifo_size(hsotg, + params->host_perio_tx_fifo_size); } dwc2_set_param_dma_desc_enable(hsotg, params->dma_desc_enable); dwc2_set_param_dma_desc_fs_enable(hsotg, params->dma_desc_fs_enable); @@ -1140,12 +1134,6 @@ static void dwc2_set_parameters(struct dwc2_hsotg *hsotg, params->host_support_fs_ls_low_power); dwc2_set_param_enable_dynamic_fifo(hsotg, params->enable_dynamic_fifo); - dwc2_set_param_host_rx_fifo_size(hsotg, - params->host_rx_fifo_size); - dwc2_set_param_host_nperio_tx_fifo_size(hsotg, - params->host_nperio_tx_fifo_size); - dwc2_set_param_host_perio_tx_fifo_size(hsotg, - params->host_perio_tx_fifo_size); dwc2_set_param_max_transfer_size(hsotg, params->max_transfer_size); dwc2_set_param_max_packet_count(hsotg, diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index de5a8570be04..14b760209680 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -45,9 +45,7 @@ #define DWC3_XHCI_RESOURCES_NUM 2 #define DWC3_SCRATCHBUF_SIZE 4096 /* each buffer is assumed to be 4KiB */ -#define DWC3_EVENT_SIZE 4 /* bytes */ -#define DWC3_EVENT_MAX_NUM 64 /* 2 events/endpoint */ -#define DWC3_EVENT_BUFFERS_SIZE (DWC3_EVENT_SIZE * DWC3_EVENT_MAX_NUM) +#define DWC3_EVENT_BUFFERS_SIZE 4096 #define DWC3_EVENT_TYPE_MASK 0xfe #define DWC3_EVENT_TYPE_DEV 0 @@ -311,9 +309,8 @@ #define DWC3_DCFG_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DCFG_SUPERSPEED (4 << 0) #define DWC3_DCFG_HIGHSPEED (0 << 0) -#define DWC3_DCFG_FULLSPEED2 (1 << 0) +#define DWC3_DCFG_FULLSPEED (1 << 0) #define DWC3_DCFG_LOWSPEED (2 << 0) -#define DWC3_DCFG_FULLSPEED1 (3 << 0) #define DWC3_DCFG_NUMP_SHIFT 17 #define DWC3_DCFG_NUMP(n) (((n) >> DWC3_DCFG_NUMP_SHIFT) & 0x1f) @@ -405,9 +402,8 @@ #define DWC3_DSTS_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DSTS_SUPERSPEED (4 << 0) #define DWC3_DSTS_HIGHSPEED (0 << 0) -#define DWC3_DSTS_FULLSPEED2 (1 << 0) +#define DWC3_DSTS_FULLSPEED (1 << 0) #define DWC3_DSTS_LOWSPEED (2 << 0) -#define DWC3_DSTS_FULLSPEED1 (3 << 0) /* Device Generic Command Register */ #define DWC3_DGCMD_SET_LMP 0x01 diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 29e80cc9b634..eb1b9cb3f9d1 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/platform_data/dwc3-omap.h> @@ -510,7 +511,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) /* check the DMA Status */ reg = dwc3_omap_readl(omap->base, USBOTGSS_SYSCONFIG); - + irq_set_status_flags(omap->irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt, dwc3_omap_interrupt_thread, IRQF_SHARED, "dwc3-omap", omap); @@ -531,7 +532,7 @@ static int dwc3_omap_probe(struct platform_device *pdev) } dwc3_omap_enable_irqs(omap); - + enable_irq(omap->irq); return 0; err2: @@ -552,6 +553,7 @@ static int dwc3_omap_remove(struct platform_device *pdev) extcon_unregister_notifier(omap->edev, EXTCON_USB, &omap->vbus_nb); extcon_unregister_notifier(omap->edev, EXTCON_USB_HOST, &omap->id_nb); dwc3_omap_disable_irqs(omap); + disable_irq(omap->irq); of_platform_depopulate(omap->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 2b73339f286b..cce0a220b6b0 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +#define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_INTEL_BXT_DSM_UUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -73,16 +74,6 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) { struct platform_device *dwc3 = dwc->dwc3; struct pci_dev *pdev = dwc->pci; - int ret; - - struct property_entry sysdev_property[] = { - PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), - { }, - }; - - ret = platform_device_add_properties(dwc3, sysdev_property); - if (ret) - return ret; if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == PCI_DEVICE_ID_AMD_NL_USB) { @@ -105,6 +96,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) PROPERTY_ENTRY_BOOL("snps,disable_scramble_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), { }, }; @@ -115,7 +107,8 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) int ret; struct property_entry properties[] = { - PROPERTY_ENTRY_STRING("dr-mode", "peripheral"), + PROPERTY_ENTRY_STRING("dr_mode", "peripheral"), + PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), { } }; @@ -167,6 +160,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) PROPERTY_ENTRY_BOOL("snps,usb3_lpm_capable"), PROPERTY_ENTRY_BOOL("snps,has-lpm-erratum"), PROPERTY_ENTRY_BOOL("snps,dis_enblslpm_quirk"), + PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), { }, }; @@ -274,6 +268,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 4878d187c7d4..9bb1f8526f3e 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -39,18 +39,13 @@ static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep); static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req); -static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, - u32 len, u32 type, bool chain) +static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, + dma_addr_t buf_dma, u32 len, u32 type, bool chain) { - struct dwc3_gadget_ep_cmd_params params; struct dwc3_trb *trb; struct dwc3_ep *dep; - int ret; - dep = dwc->eps[epnum]; - if (dep->flags & DWC3_EP_BUSY) - return 0; trb = &dwc->ep0_trb[dep->trb_enqueue]; @@ -71,15 +66,23 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, trb->ctrl |= (DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_LST); - if (chain) + trace_dwc3_prepare_trb(dep, trb); +} + +static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum) +{ + struct dwc3_gadget_ep_cmd_params params; + struct dwc3_ep *dep; + int ret; + + dep = dwc->eps[epnum]; + if (dep->flags & DWC3_EP_BUSY) return 0; memset(¶ms, 0, sizeof(params)); params.param0 = upper_32_bits(dwc->ep0_trb_addr); params.param1 = lower_32_bits(dwc->ep0_trb_addr); - trace_dwc3_prepare_trb(dep, trb); - ret = dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_STARTTRANSFER, ¶ms); if (ret < 0) return ret; @@ -280,8 +283,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) complete(&dwc->ep0_in_setup); - ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8, + dwc3_ep0_prepare_one_trb(dwc, 0, dwc->ctrl_req_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); + ret = dwc3_ep0_start_trans(dwc, 0); WARN_ON(ret < 0); } @@ -912,9 +916,9 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, dwc->ep0_next_event = DWC3_EP0_COMPLETE; - ret = dwc3_ep0_start_trans(dwc, epnum, - dwc->ctrl_req_addr, 0, - DWC3_TRBCTL_CONTROL_DATA, false); + dwc3_ep0_prepare_one_trb(dwc, epnum, dwc->ctrl_req_addr, + 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, epnum); WARN_ON(ret < 0); } } @@ -993,9 +997,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->direction = !!dep->number; if (req->request.length == 0) { - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && (dep->number == 0)) { u32 transfer_size = 0; @@ -1011,7 +1016,7 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, if (req->request.length > DWC3_EP0_BOUNCE_SIZE) { transfer_size = ALIGN(req->request.length - maxpacket, maxpacket); - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, transfer_size, DWC3_TRBCTL_CONTROL_DATA, @@ -1023,18 +1028,20 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, dwc->ep0_bounced = true; - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ep0_bounce_addr, transfer_size, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else { ret = usb_gadget_map_request_by_dev(dwc->sysdev, &req->request, dep->number); if (ret) return; - ret = dwc3_ep0_start_trans(dwc, dep->number, req->request.dma, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, req->request.length, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } WARN_ON(ret < 0); @@ -1048,8 +1055,9 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) type = dwc->three_stage_setup ? DWC3_TRBCTL_CONTROL_STATUS3 : DWC3_TRBCTL_CONTROL_STATUS2; - return dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, type, false); + return dwc3_ep0_start_trans(dwc, dep->number); } static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index efddaf5d11d1..204c754cc647 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -180,11 +180,11 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number == 0) + if (dwc->ep0_bounced && dep->number <= 1) dwc->ep0_bounced = false; - else - usb_gadget_unmap_request_by_dev(dwc->sysdev, - &req->request, req->direction); + + usb_gadget_unmap_request_by_dev(dwc->sysdev, + &req->request, req->direction); trace_dwc3_gadget_giveback(req); @@ -1720,7 +1720,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) reg |= DWC3_DCFG_LOWSPEED; break; case USB_SPEED_FULL: - reg |= DWC3_DCFG_FULLSPEED1; + reg |= DWC3_DCFG_FULLSPEED; break; case USB_SPEED_HIGH: reg |= DWC3_DCFG_HIGHSPEED; @@ -2232,9 +2232,14 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, dep = dwc->eps[epnum]; - if (!(dep->flags & DWC3_EP_ENABLED) && - !(dep->flags & DWC3_EP_END_TRANSFER_PENDING)) - return; + if (!(dep->flags & DWC3_EP_ENABLED)) { + if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING)) + return; + + /* Handle only EPCMDCMPLT when EP disabled */ + if (event->endpoint_event != DWC3_DEPEVT_EPCMDCMPLT) + return; + } if (epnum == 0 || epnum == 1) { dwc3_ep0_interrupt(dwc, event); @@ -2531,8 +2536,7 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_HIGH; break; - case DWC3_DSTS_FULLSPEED2: - case DWC3_DSTS_FULLSPEED1: + case DWC3_DSTS_FULLSPEED: dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(64); dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_FULL; diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 41ab61f9b6e0..002822d98fda 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1694,9 +1694,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) value = min(w_length, (u16) 1); break; - /* function drivers must handle get/set altsetting; if there's - * no get() method, we know only altsetting zero works. - */ + /* function drivers must handle get/set altsetting */ case USB_REQ_SET_INTERFACE: if (ctrl->bRequestType != USB_RECIP_INTERFACE) goto unknown; @@ -1705,7 +1703,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) f = cdev->config->interface[intf]; if (!f) break; - if (w_value && !f->set_alt) + + /* + * If there's no get_alt() method, we know only altsetting zero + * works. There is no need to check if set_alt() is not NULL + * as we check this in usb_add_function(). + */ + if (w_value && !f->get_alt) break; value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index aab3fc1dbb94..5e746adc8a2d 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2091,8 +2091,8 @@ static int __ffs_data_do_entity(enum ffs_entity_type type, case FFS_STRING: /* - * Strings are indexed from 1 (0 is magic ;) reserved - * for languages list or some such) + * Strings are indexed from 1 (0 is reserved + * for languages list) */ if (*valuep > helper->ffs->strings_count) helper->ffs->strings_count = *valuep; @@ -2252,7 +2252,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || d->bFirstInterfaceNumber >= ffs->interfaces_count || - !d->Reserved1) + d->Reserved1) return -EINVAL; for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) @@ -3666,6 +3666,7 @@ static void ffs_closed(struct ffs_data *ffs) { struct ffs_dev *ffs_obj; struct f_fs_opts *opts; + struct config_item *ci; ENTER(); ffs_dev_lock(); @@ -3689,8 +3690,11 @@ static void ffs_closed(struct ffs_data *ffs) || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) goto done; - unregister_gadget_item(ffs_obj->opts-> - func_inst.group.cg_item.ci_parent->ci_parent); + ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; + ffs_dev_unlock(); + + unregister_gadget_item(ci); + return; done: ffs_dev_unlock(); } diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 3151d2a0fe59..5f8139b8e601 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -593,7 +593,7 @@ static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) } status = usb_ep_enable(hidg->out_ep); if (status < 0) { - ERROR(cdev, "Enable IN endpoint FAILED!\n"); + ERROR(cdev, "Enable OUT endpoint FAILED!\n"); goto fail; } hidg->out_ep->driver_data = hidg; diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index e8f4102d19df..6bde4396927c 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1126,7 +1126,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* data and/or status stage for control request */ } else if (dev->state == STATE_DEV_SETUP) { - /* IN DATA+STATUS caller makes len <= wLength */ + len = min_t(size_t, len, dev->setup_wLength); if (dev->setup_in) { retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { @@ -1734,10 +1734,12 @@ static struct usb_gadget_driver gadgetfs_driver = { * such as configuration notifications. */ -static int is_valid_config (struct usb_config_descriptor *config) +static int is_valid_config(struct usb_config_descriptor *config, + unsigned int total) { return config->bDescriptorType == USB_DT_CONFIG && config->bLength == USB_DT_CONFIG_SIZE + && total >= USB_DT_CONFIG_SIZE && config->bConfigurationValue != 0 && (config->bmAttributes & USB_CONFIG_ATT_ONE) != 0 && (config->bmAttributes & USB_CONFIG_ATT_WAKEUP) == 0; @@ -1762,7 +1764,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } spin_unlock_irq(&dev->lock); - if (len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) + if ((len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) || + (len > PAGE_SIZE * 4)) return -EINVAL; /* we might need to change message format someday */ @@ -1786,7 +1789,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* full or low speed config */ dev->config = (void *) kbuf; total = le16_to_cpu(dev->config->wTotalLength); - if (!is_valid_config (dev->config) || total >= length) + if (!is_valid_config(dev->config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; @@ -1795,10 +1799,13 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) if (kbuf [1] == USB_DT_CONFIG) { dev->hs_config = (void *) kbuf; total = le16_to_cpu(dev->hs_config->wTotalLength); - if (!is_valid_config (dev->hs_config) || total >= length) + if (!is_valid_config(dev->hs_config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; + } else { + dev->hs_config = NULL; } /* could support multiple configs, using another encoding! */ @@ -1811,7 +1818,6 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) || dev->dev->bDescriptorType != USB_DT_DEVICE || dev->dev->bNumConfigurations != 1) goto fail; - dev->dev->bNumConfigurations = 1; dev->dev->bcdUSB = cpu_to_le16 (0x0200); /* triggers gadgetfs_bind(); then we can enumerate. */ diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 9483489080f6..0402177f93cd 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1317,7 +1317,11 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver) if (!ret) break; } - if (!ret && !udc->driver) + if (ret) + ret = -ENODEV; + else if (udc->driver) + ret = -EBUSY; + else goto found; } else { list_for_each_entry(udc, &udc_list, list) { diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 02b14e91ae6c..c60abe3a68f9 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -330,7 +330,7 @@ static void nuke(struct dummy *dum, struct dummy_ep *ep) /* caller must hold lock */ static void stop_activity(struct dummy *dum) { - struct dummy_ep *ep; + int i; /* prevent any more requests */ dum->address = 0; @@ -338,8 +338,8 @@ static void stop_activity(struct dummy *dum) /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ - list_for_each_entry(ep, &dum->gadget.ep_list, ep.ep_list) - nuke(dum, ep); + for (i = 0; i < DUMMY_ENDPOINTS; ++i) + nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index be9e63836881..414e3c376dbb 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -43,7 +43,6 @@ struct at91_usbh_data { struct gpio_desc *overcurrent_pin[AT91_MAX_USBH_PORTS]; u8 ports; /* number of ports on root hub */ u8 overcurrent_supported; - u8 vbus_pin_active_low[AT91_MAX_USBH_PORTS]; u8 overcurrent_status[AT91_MAX_USBH_PORTS]; u8 overcurrent_changed[AT91_MAX_USBH_PORTS]; }; @@ -266,8 +265,7 @@ static void ohci_at91_usb_set_power(struct at91_usbh_data *pdata, int port, int if (!valid_port(port)) return; - gpiod_set_value(pdata->vbus_pin[port], - pdata->vbus_pin_active_low[port] ^ enable); + gpiod_set_value(pdata->vbus_pin[port], enable); } static int ohci_at91_usb_get_power(struct at91_usbh_data *pdata, int port) @@ -275,8 +273,7 @@ static int ohci_at91_usb_get_power(struct at91_usbh_data *pdata, int port) if (!valid_port(port)) return -EINVAL; - return gpiod_get_value(pdata->vbus_pin[port]) ^ - pdata->vbus_pin_active_low[port]; + return gpiod_get_value(pdata->vbus_pin[port]); } /* @@ -533,18 +530,17 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev) pdata->ports = ports; at91_for_each_port(i) { - pdata->vbus_pin[i] = devm_gpiod_get_optional(&pdev->dev, - "atmel,vbus-gpio", - GPIOD_IN); + if (i >= pdata->ports) + break; + + pdata->vbus_pin[i] = + devm_gpiod_get_index_optional(&pdev->dev, "atmel,vbus", + i, GPIOD_OUT_HIGH); if (IS_ERR(pdata->vbus_pin[i])) { err = PTR_ERR(pdata->vbus_pin[i]); dev_err(&pdev->dev, "unable to claim gpio \"vbus\": %d\n", err); continue; } - - pdata->vbus_pin_active_low[i] = gpiod_get_value(pdata->vbus_pin[i]); - - ohci_at91_usb_set_power(pdata, i, 1); } at91_for_each_port(i) { @@ -552,8 +548,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev) break; pdata->overcurrent_pin[i] = - devm_gpiod_get_optional(&pdev->dev, - "atmel,oc-gpio", GPIOD_IN); + devm_gpiod_get_index_optional(&pdev->dev, "atmel,oc", + i, GPIOD_IN); if (IS_ERR(pdata->overcurrent_pin[i])) { err = PTR_ERR(pdata->overcurrent_pin[i]); dev_err(&pdev->dev, "unable to claim gpio \"overcurrent\": %d\n", err); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 321de2e0161b..8414ed2a02de 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -979,6 +979,40 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) xhci->devs[slot_id] = NULL; } +/* + * Free a virt_device structure. + * If the virt_device added a tt_info (a hub) and has children pointing to + * that tt_info, then free the child first. Recursive. + * We can't rely on udev at this point to find child-parent relationships. + */ +void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) +{ + struct xhci_virt_device *vdev; + struct list_head *tt_list_head; + struct xhci_tt_bw_info *tt_info, *next; + int i; + + vdev = xhci->devs[slot_id]; + if (!vdev) + return; + + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); + list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { + /* is this a hub device that added a tt_info to the tts list */ + if (tt_info->slot_id == slot_id) { + /* are any devices using this tt_info? */ + for (i = 1; i < HCS_MAX_SLOTS(xhci->hcs_params1); i++) { + vdev = xhci->devs[i]; + if (vdev && (vdev->tt_info == tt_info)) + xhci_free_virt_devices_depth_first( + xhci, i); + } + } + } + /* we are now at a leaf device */ + xhci_free_virt_device(xhci, slot_id); +} + int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device *udev, gfp_t flags) { @@ -1795,7 +1829,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int size; int i, j, num_ports; - del_timer_sync(&xhci->cmd_timer); + cancel_delayed_work_sync(&xhci->cmd_timer); /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -1828,8 +1862,8 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) } } - for (i = 1; i < MAX_HC_SLOTS; ++i) - xhci_free_virt_device(xhci, i); + for (i = HCS_MAX_SLOTS(xhci->hcs_params1); i > 0; i--) + xhci_free_virt_devices_depth_first(xhci, i); dma_pool_destroy(xhci->segment_pool); xhci->segment_pool = NULL; @@ -2342,9 +2376,9 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_LIST_HEAD(&xhci->cmd_list); - /* init command timeout timer */ - setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, - (unsigned long)xhci); + /* init command timeout work */ + INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout); + init_completion(&xhci->cmd_ring_stop_completion); page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 1094ebd2838f..bac961cd24ad 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -579,8 +579,10 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto disable_ldos; irq = platform_get_irq(pdev, 0); - if (irq < 0) + if (irq < 0) { + ret = irq; goto disable_clk; + } /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index e96ae80d107e..954abfd5014d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -165,7 +165,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index bdf6b13d9b67..25f522b09dd9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -279,23 +279,76 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) readl(&xhci->dba->doorbell[0]); } -static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) +{ + return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); +} + +static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) +{ + return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command, + cmd_list); +} + +/* + * Turn all commands on command ring with status set to "aborted" to no-op trbs. + * If there are other commands waiting then restart the ring and kick the timer. + * This must be called with command ring stopped and xhci->lock held. + */ +static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, + struct xhci_command *cur_cmd) +{ + struct xhci_command *i_cmd; + u32 cycle_state; + + /* Turn all aborted commands in list to no-ops, then restart */ + list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) { + + if (i_cmd->status != COMP_CMD_ABORT) + continue; + + i_cmd->status = COMP_CMD_STOP; + + xhci_dbg(xhci, "Turn aborted command %p to no-op\n", + i_cmd->command_trb); + /* get cycle state from the original cmd trb */ + cycle_state = le32_to_cpu( + i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; + /* modify the command trb to no-op command */ + i_cmd->command_trb->generic.field[0] = 0; + i_cmd->command_trb->generic.field[1] = 0; + i_cmd->command_trb->generic.field[2] = 0; + i_cmd->command_trb->generic.field[3] = cpu_to_le32( + TRB_TYPE(TRB_CMD_NOOP) | cycle_state); + + /* + * caller waiting for completion is called when command + * completion event is received for these no-op commands + */ + } + + xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; + + /* ring command ring doorbell to restart the command ring */ + if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && + !(xhci->xhc_state & XHCI_STATE_DYING)) { + xhci->current_cmd = cur_cmd; + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_ring_cmd_db(xhci); + } +} + +/* Must be called with xhci->lock held, releases and aquires lock back */ +static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { u64 temp_64; int ret; xhci_dbg(xhci, "Abort command ring\n"); - temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; + reinit_completion(&xhci->cmd_ring_stop_completion); - /* - * Writing the CMD_RING_ABORT bit should cause a cmd completion event, - * however on some host hw the CMD_RING_RUNNING bit is correctly cleared - * but the completion event in never sent. Use the cmd timeout timer to - * handle those cases. Use twice the time to cover the bit polling retry - */ - mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT)); + temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); @@ -315,17 +368,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) udelay(1000); ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 3 * 1000 * 1000); - if (ret == 0) - return 0; - - xhci_err(xhci, "Stopped the command ring failed, " - "maybe the host is dead\n"); - del_timer(&xhci->cmd_timer); - xhci->xhc_state |= XHCI_STATE_DYING; - xhci_halt(xhci); - return -ESHUTDOWN; + if (ret < 0) { + xhci_err(xhci, "Stopped the command ring failed, " + "maybe the host is dead\n"); + xhci->xhc_state |= XHCI_STATE_DYING; + xhci_halt(xhci); + return -ESHUTDOWN; + } + } + /* + * Writing the CMD_RING_ABORT bit should cause a cmd completion event, + * however on some host hw the CMD_RING_RUNNING bit is correctly cleared + * but the completion event in never sent. Wait 2 secs (arbitrary + * number) to handle those cases after negation of CMD_RING_RUNNING. + */ + spin_unlock_irqrestore(&xhci->lock, flags); + ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion, + msecs_to_jiffies(2000)); + spin_lock_irqsave(&xhci->lock, flags); + if (!ret) { + xhci_dbg(xhci, "No stop event for abort, ring start fail?\n"); + xhci_cleanup_command_queue(xhci); + } else { + xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci)); } - return 0; } @@ -1207,101 +1273,62 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci) xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT); } -/* - * Turn all commands on command ring with status set to "aborted" to no-op trbs. - * If there are other commands waiting then restart the ring and kick the timer. - * This must be called with command ring stopped and xhci->lock held. - */ -static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, - struct xhci_command *cur_cmd) -{ - struct xhci_command *i_cmd, *tmp_cmd; - u32 cycle_state; - - /* Turn all aborted commands in list to no-ops, then restart */ - list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list, - cmd_list) { - - if (i_cmd->status != COMP_CMD_ABORT) - continue; - - i_cmd->status = COMP_CMD_STOP; - - xhci_dbg(xhci, "Turn aborted command %p to no-op\n", - i_cmd->command_trb); - /* get cycle state from the original cmd trb */ - cycle_state = le32_to_cpu( - i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; - /* modify the command trb to no-op command */ - i_cmd->command_trb->generic.field[0] = 0; - i_cmd->command_trb->generic.field[1] = 0; - i_cmd->command_trb->generic.field[2] = 0; - i_cmd->command_trb->generic.field[3] = cpu_to_le32( - TRB_TYPE(TRB_CMD_NOOP) | cycle_state); - - /* - * caller waiting for completion is called when command - * completion event is received for these no-op commands - */ - } - - xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; - - /* ring command ring doorbell to restart the command ring */ - if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && - !(xhci->xhc_state & XHCI_STATE_DYING)) { - xhci->current_cmd = cur_cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); - xhci_ring_cmd_db(xhci); - } - return; -} - - -void xhci_handle_command_timeout(unsigned long data) +void xhci_handle_command_timeout(struct work_struct *work) { struct xhci_hcd *xhci; int ret; unsigned long flags; u64 hw_ring_state; - bool second_timeout = false; - xhci = (struct xhci_hcd *) data; - /* mark this command to be cancelled */ + xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer); + spin_lock_irqsave(&xhci->lock, flags); - if (xhci->current_cmd) { - if (xhci->current_cmd->status == COMP_CMD_ABORT) - second_timeout = true; - xhci->current_cmd->status = COMP_CMD_ABORT; + + /* + * If timeout work is pending, or current_cmd is NULL, it means we + * raced with command completion. Command is handled so just return. + */ + if (!xhci->current_cmd || delayed_work_pending(&xhci->cmd_timer)) { + spin_unlock_irqrestore(&xhci->lock, flags); + return; } + /* mark this command to be cancelled */ + xhci->current_cmd->status = COMP_CMD_ABORT; /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { - spin_unlock_irqrestore(&xhci->lock, flags); + /* Prevent new doorbell, and start command abort */ + xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; xhci_dbg(xhci, "Command timeout\n"); - ret = xhci_abort_cmd_ring(xhci); + ret = xhci_abort_cmd_ring(xhci, flags); if (unlikely(ret == -ESHUTDOWN)) { xhci_err(xhci, "Abort command ring failed\n"); xhci_cleanup_command_queue(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); xhci_dbg(xhci, "xHCI host controller is dead.\n"); + + return; } - return; + + goto time_out_completed; } - /* command ring failed to restart, or host removed. Bail out */ - if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "command timed out twice, ring start fail?\n"); + /* host removed. Bail out */ + if (xhci->xhc_state & XHCI_STATE_REMOVING) { + xhci_dbg(xhci, "host removed, ring start fail?\n"); xhci_cleanup_command_queue(xhci); - return; + + goto time_out_completed; } /* command timeout on stopped ring, ring can't be aborted */ xhci_dbg(xhci, "Command timeout on stopped ring\n"); xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd); + +time_out_completed: spin_unlock_irqrestore(&xhci->lock, flags); return; } @@ -1333,7 +1360,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list); - del_timer(&xhci->cmd_timer); + cancel_delayed_work(&xhci->cmd_timer); trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event); @@ -1341,7 +1368,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, /* If CMD ring stopped we own the trbs between enqueue and dequeue */ if (cmd_comp_code == COMP_CMD_STOP) { - xhci_handle_stopped_cmd_ring(xhci, cmd); + complete_all(&xhci->cmd_ring_stop_completion); return; } @@ -1359,8 +1386,11 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, */ if (cmd_comp_code == COMP_CMD_ABORT) { xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; - if (cmd->status == COMP_CMD_ABORT) + if (cmd->status == COMP_CMD_ABORT) { + if (xhci->current_cmd == cmd) + xhci->current_cmd = NULL; goto event_handled; + } } cmd_type = TRB_FIELD_TO_TYPE(le32_to_cpu(cmd_trb->generic.field[3])); @@ -1421,7 +1451,9 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (cmd->cmd_list.next != &xhci->cmd_list) { xhci->current_cmd = list_entry(cmd->cmd_list.next, struct xhci_command, cmd_list); - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + } else if (xhci->current_cmd == cmd) { + xhci->current_cmd = NULL; } event_handled: @@ -1939,8 +1971,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, struct xhci_ep_ctx *ep_ctx; u32 trb_comp_code; u32 remaining, requested; - bool on_data_stage; + u32 trb_type; + trb_type = TRB_FIELD_TO_TYPE(le32_to_cpu(ep_trb->generic.field[3])); slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); xdev = xhci->devs[slot_id]; ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1; @@ -1950,14 +1983,11 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, requested = td->urb->transfer_buffer_length; remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); - /* not setup (dequeue), or status stage means we are at data stage */ - on_data_stage = (ep_trb != ep_ring->dequeue && ep_trb != td->last_trb); - switch (trb_comp_code) { case COMP_SUCCESS: - if (ep_trb != td->last_trb) { + if (trb_type != TRB_STATUS) { xhci_warn(xhci, "WARN: Success on ctrl %s TRB without IOC set?\n", - on_data_stage ? "data" : "setup"); + (trb_type == TRB_DATA) ? "data" : "setup"); *status = -ESHUTDOWN; break; } @@ -1967,15 +1997,25 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, *status = 0; break; case COMP_STOP_SHORT: - if (on_data_stage) + if (trb_type == TRB_DATA || trb_type == TRB_NORMAL) td->urb->actual_length = remaining; else xhci_warn(xhci, "WARN: Stopped Short Packet on ctrl setup or status TRB\n"); goto finish_td; case COMP_STOP: - if (on_data_stage) + switch (trb_type) { + case TRB_SETUP: + td->urb->actual_length = 0; + goto finish_td; + case TRB_DATA: + case TRB_NORMAL: td->urb->actual_length = requested - remaining; - goto finish_td; + goto finish_td; + default: + xhci_warn(xhci, "WARN: unexpected TRB Type %d\n", + trb_type); + goto finish_td; + } case COMP_STOP_INVAL: goto finish_td; default: @@ -1987,7 +2027,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, /* else fall through */ case COMP_STALL: /* Did we transfer part of the data (middle) phase? */ - if (on_data_stage) + if (trb_type == TRB_DATA || trb_type == TRB_NORMAL) td->urb->actual_length = requested - remaining; else if (!td->urb_length_set) td->urb->actual_length = 0; @@ -1995,14 +2035,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, } /* stopped at setup stage, no data transferred */ - if (ep_trb == ep_ring->dequeue) + if (trb_type == TRB_SETUP) goto finish_td; /* * if on data stage then update the actual_length of the URB and flag it * as set, so it won't be overwritten in the event for the last TRB. */ - if (on_data_stage) { + if (trb_type == TRB_DATA || + trb_type == TRB_NORMAL) { td->urb_length_set = true; td->urb->actual_length = requested - remaining; xhci_dbg(xhci, "Waiting for status stage event\n"); @@ -3790,9 +3831,9 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (xhci->cmd_list.next == &cmd->cmd_list && - !timer_pending(&xhci->cmd_timer)) { + !delayed_work_pending(&xhci->cmd_timer)) { xhci->current_cmd = cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } queue_trb(xhci, xhci->cmd_ring, false, field1, field2, field3, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1cd56417cbec..0c8deb9ed42d 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3787,8 +3787,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying, removing or halted */ + if (xhci->xhc_state) { /* dying, removing or halted */ + ret = -ESHUTDOWN; goto out; + } if (!udev->slot_id) { xhci_dbg_trace(xhci, trace_xhci_dbg_address, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 8ccc11a974b8..2d7b6374b58d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1568,7 +1568,8 @@ struct xhci_hcd { #define CMD_RING_STATE_STOPPED (1 << 2) struct list_head cmd_list; unsigned int cmd_ring_reserved_trbs; - struct timer_list cmd_timer; + struct delayed_work cmd_timer; + struct completion cmd_ring_stop_completion; struct xhci_command *current_cmd; struct xhci_ring *event_ring; struct xhci_erst erst; @@ -1934,7 +1935,7 @@ void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, struct xhci_dequeue_state *deq_state); void xhci_stop_endpoint_command_watchdog(unsigned long arg); -void xhci_handle_command_timeout(unsigned long data); +void xhci_handle_command_timeout(struct work_struct *work); void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, unsigned int stream_id); diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index 310238c6b5cd..896798071817 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -469,6 +469,7 @@ static const struct musb_platform_ops bfin_ops = { .init = bfin_musb_init, .exit = bfin_musb_exit, + .fifo_offset = bfin_fifo_offset, .readb = bfin_readb, .writeb = bfin_writeb, .readw = bfin_readw, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 9e226468a13e..fca288bbc800 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2050,6 +2050,7 @@ struct musb_pending_work { struct list_head node; }; +#ifdef CONFIG_PM /* * Called from musb_runtime_resume(), musb_resume(), and * musb_queue_resume_work(). Callers must take musb->lock. @@ -2077,6 +2078,7 @@ static int musb_run_resume_work(struct musb *musb) return error; } +#endif /* * Called to run work if device is active or else queue the work to happen diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index a611e2f67bdc..ade902ea1221 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -216,6 +216,7 @@ struct musb_platform_ops { void (*pre_root_reset_end)(struct musb *musb); void (*post_root_reset_end)(struct musb *musb); int (*phy_callback)(enum musb_vbus_id_status status); + void (*clear_ep_rxintr)(struct musb *musb, int epnum); }; /* @@ -626,6 +627,12 @@ static inline void musb_platform_post_root_reset_end(struct musb *musb) musb->ops->post_root_reset_end(musb); } +static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum) +{ + if (musb->ops->clear_ep_rxintr) + musb->ops->clear_ep_rxintr(musb, epnum); +} + /* * gets the "dr_mode" property from DT and converts it into musb_mode * if the property is not found or not recognized returns MUSB_OTG diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index feae1561b9ab..9f125e179acd 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -267,6 +267,17 @@ static void otg_timer(unsigned long _musb) pm_runtime_put_autosuspend(dev); } +void dsps_musb_clear_ep_rxintr(struct musb *musb, int epnum) +{ + u32 epintr; + struct dsps_glue *glue = dev_get_drvdata(musb->controller->parent); + const struct dsps_musb_wrapper *wrp = glue->wrp; + + /* musb->lock might already been held */ + epintr = (1 << epnum) << wrp->rxep_shift; + musb_writel(musb->ctrl_base, wrp->epintr_status, epintr); +} + static irqreturn_t dsps_interrupt(int irq, void *hci) { struct musb *musb = hci; @@ -622,6 +633,7 @@ static struct musb_platform_ops dsps_ops = { .set_mode = dsps_musb_set_mode, .recover = dsps_musb_recover, + .clear_ep_rxintr = dsps_musb_clear_ep_rxintr, }; static u64 musb_dmamask = DMA_BIT_MASK(32); diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index f6cdbad00dac..ac3a4952abb4 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2374,12 +2374,11 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) int is_in = usb_pipein(urb->pipe); int status = 0; u16 csr; + struct dma_channel *dma = NULL; musb_ep_select(regs, hw_end); if (is_dma_capable()) { - struct dma_channel *dma; - dma = is_in ? ep->rx_channel : ep->tx_channel; if (dma) { status = ep->musb->dma_controller->channel_abort(dma); @@ -2395,10 +2394,9 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) /* giveback saves bulk toggle */ csr = musb_h_flush_rxfifo(ep, 0); - /* REVISIT we still get an irq; should likely clear the - * endpoint's irq status here to avoid bogus irqs. - * clearing that status is platform-specific... - */ + /* clear the endpoint's irq status here to avoid bogus irqs */ + if (is_dma_capable() && dma) + musb_platform_clear_ep_rxintr(musb, ep->epnum); } else if (ep->epnum) { musb_h_tx_flush_fifo(ep); csr = musb_readw(epio, MUSB_TXCSR); diff --git a/drivers/usb/musb/musbhsdma.h b/drivers/usb/musb/musbhsdma.h index f7b13fd25257..a3dcbd55e436 100644 --- a/drivers/usb/musb/musbhsdma.h +++ b/drivers/usb/musb/musbhsdma.h @@ -157,5 +157,5 @@ struct musb_dma_controller { void __iomem *base; u8 channel_count; u8 used_channels; - u8 irq; + int irq; }; diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 5f17a3b9916d..80260b08398b 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -50,6 +50,7 @@ #define CYBERJACK_PRODUCT_ID 0x0100 /* Function prototypes */ +static int cyberjack_attach(struct usb_serial *serial); static int cyberjack_port_probe(struct usb_serial_port *port); static int cyberjack_port_remove(struct usb_serial_port *port); static int cyberjack_open(struct tty_struct *tty, @@ -77,6 +78,7 @@ static struct usb_serial_driver cyberjack_device = { .description = "Reiner SCT Cyberjack USB card reader", .id_table = id_table, .num_ports = 1, + .attach = cyberjack_attach, .port_probe = cyberjack_port_probe, .port_remove = cyberjack_port_remove, .open = cyberjack_open, @@ -100,6 +102,14 @@ struct cyberjack_private { short wrsent; /* Data already sent */ }; +static int cyberjack_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_out < serial->num_ports) + return -ENODEV; + + return 0; +} + static int cyberjack_port_probe(struct usb_serial_port *port) { struct cyberjack_private *priv; diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c index 8282a6a18fee..22f23a429a95 100644 --- a/drivers/usb/serial/f81534.c +++ b/drivers/usb/serial/f81534.c @@ -1237,6 +1237,7 @@ static int f81534_attach(struct usb_serial *serial) static int f81534_port_probe(struct usb_serial_port *port) { struct f81534_port_private *port_priv; + int ret; port_priv = devm_kzalloc(&port->dev, sizeof(*port_priv), GFP_KERNEL); if (!port_priv) @@ -1246,10 +1247,11 @@ static int f81534_port_probe(struct usb_serial_port *port) mutex_init(&port_priv->mcr_mutex); /* Assign logic-to-phy mapping */ - port_priv->phy_num = f81534_logic_to_phy_port(port->serial, port); - if (port_priv->phy_num < 0 || port_priv->phy_num >= F81534_NUM_PORT) - return -ENODEV; + ret = f81534_logic_to_phy_port(port->serial, port); + if (ret < 0) + return ret; + port_priv->phy_num = ret; usb_set_serial_port_data(port, port_priv); dev_dbg(&port->dev, "%s: port_number: %d, phy_num: %d\n", __func__, port->port_number, port_priv->phy_num); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 97cabf803c2f..b2f2e87aed94 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -1043,6 +1043,7 @@ static int garmin_write_bulk(struct usb_serial_port *port, "%s - usb_submit_urb(write bulk) failed with status = %d\n", __func__, status); count = status; + kfree(buffer); } /* we are done with this urb, so let the host driver diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index dcc0c58aaad5..d50e5773483f 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2751,6 +2751,11 @@ static int edge_startup(struct usb_serial *serial) EDGE_COMPATIBILITY_MASK1, EDGE_COMPATIBILITY_MASK2 }; + if (serial->num_bulk_in < 1 || serial->num_interrupt_in < 1) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev = serial->dev; /* create our private serial structure */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index c339163698eb..9a0db2965fbb 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1499,8 +1499,7 @@ static int do_boot_mode(struct edgeport_serial *serial, dev_dbg(dev, "%s - Download successful -- Device rebooting...\n", __func__); - /* return an error on purpose */ - return -ENODEV; + return 1; } stayinbootmode: @@ -1508,7 +1507,7 @@ stayinbootmode: dev_dbg(dev, "%s - STAYING IN BOOT MODE\n", __func__); serial->product_info.TiMode = TI_MODE_BOOT; - return 0; + return 1; } static int ti_do_config(struct edgeport_port *port, int feature, int on) @@ -2546,6 +2545,13 @@ static int edge_startup(struct usb_serial *serial) int status; u16 product_id; + /* Make sure we have the required endpoints when in download mode. */ + if (serial->interface->cur_altsetting->desc.bNumEndpoints > 1) { + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) + return -ENODEV; + } + /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (!edge_serial) @@ -2553,14 +2559,18 @@ static int edge_startup(struct usb_serial *serial) mutex_init(&edge_serial->es_lock); edge_serial->serial = serial; + INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); usb_set_serial_data(serial, edge_serial); status = download_fw(edge_serial); - if (status) { + if (status < 0) { kfree(edge_serial); return status; } + if (status > 0) + return 1; /* bind but do not register any ports */ + product_id = le16_to_cpu( edge_serial->serial->dev->descriptor.idProduct); @@ -2572,7 +2582,6 @@ static int edge_startup(struct usb_serial *serial) } } - INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); edge_heartbeat_schedule(edge_serial); return 0; @@ -2580,6 +2589,9 @@ static int edge_startup(struct usb_serial *serial) static void edge_disconnect(struct usb_serial *serial) { + struct edgeport_serial *edge_serial = usb_get_serial_data(serial); + + cancel_delayed_work_sync(&edge_serial->heartbeat_work); } static void edge_release(struct usb_serial *serial) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 344b4eea4bd5..d57fb5199218 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -68,6 +68,16 @@ struct iuu_private { u32 clk; }; +static int iuu_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || serial->num_bulk_out < num_ports) + return -ENODEV; + + return 0; +} + static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; @@ -1196,6 +1206,7 @@ static struct usb_serial_driver iuu_device = { .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, + .attach = iuu_attach, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index e49ad0c63ad8..83523fcf6fb9 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -699,6 +699,19 @@ MODULE_FIRMWARE("keyspan_pda/keyspan_pda.fw"); MODULE_FIRMWARE("keyspan_pda/xircom_pgs.fw"); #endif +static int keyspan_pda_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int keyspan_pda_port_probe(struct usb_serial_port *port) { @@ -776,6 +789,7 @@ static struct usb_serial_driver keyspan_pda_device = { .break_ctl = keyspan_pda_break_ctl, .tiocmget = keyspan_pda_tiocmget, .tiocmset = keyspan_pda_tiocmset, + .attach = keyspan_pda_attach, .port_probe = keyspan_pda_port_probe, .port_remove = keyspan_pda_port_remove, }; diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 2363654cafc9..813035f51fe7 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -51,6 +51,7 @@ /* Function prototypes */ +static int kobil_attach(struct usb_serial *serial); static int kobil_port_probe(struct usb_serial_port *probe); static int kobil_port_remove(struct usb_serial_port *probe); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port); @@ -86,6 +87,7 @@ static struct usb_serial_driver kobil_device = { .description = "KOBIL USB smart card terminal", .id_table = id_table, .num_ports = 1, + .attach = kobil_attach, .port_probe = kobil_port_probe, .port_remove = kobil_port_remove, .ioctl = kobil_ioctl, @@ -113,6 +115,16 @@ struct kobil_private { }; +static int kobil_attach(struct usb_serial *serial) +{ + if (serial->num_interrupt_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing interrupt-out endpoint\n"); + return -ENODEV; + } + + return 0; +} + static int kobil_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index d52caa03679c..91bc170b408a 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -65,8 +65,6 @@ struct moschip_port { struct urb *write_urb_pool[NUM_URBS]; }; -static struct usb_serial_driver moschip7720_2port_driver; - #define USB_VENDOR_ID_MOSCHIP 0x9710 #define MOSCHIP_DEVICE_ID_7720 0x7720 #define MOSCHIP_DEVICE_ID_7715 0x7715 @@ -970,25 +968,6 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) tty_port_tty_wakeup(&mos7720_port->port->port); } -/* - * mos77xx_probe - * this function installs the appropriate read interrupt endpoint callback - * depending on whether the device is a 7720 or 7715, thus avoiding costly - * run-time checks in the high-frequency callback routine itself. - */ -static int mos77xx_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - if (id->idProduct == MOSCHIP_DEVICE_ID_7715) - moschip7720_2port_driver.read_int_callback = - mos7715_interrupt_callback; - else - moschip7720_2port_driver.read_int_callback = - mos7720_interrupt_callback; - - return 0; -} - static int mos77xx_calc_num_ports(struct usb_serial *serial) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); @@ -1917,6 +1896,11 @@ static int mos7720_startup(struct usb_serial *serial) u16 product; int ret_val; + if (serial->num_bulk_in < 2 || serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk endpoints\n"); + return -ENODEV; + } + product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; @@ -1941,19 +1925,18 @@ static int mos7720_startup(struct usb_serial *serial) tmp->interrupt_in_endpointAddress; serial->port[1]->interrupt_in_urb = NULL; serial->port[1]->interrupt_in_buffer = NULL; + + if (serial->port[0]->interrupt_in_urb) { + struct urb *urb = serial->port[0]->interrupt_in_urb; + + urb->complete = mos7715_interrupt_callback; + } } /* setting configuration feature to one */ usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - /* start the interrupt urb */ - ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); - if (ret_val) - dev_err(&dev->dev, - "%s - Error %d submitting control urb\n", - __func__, ret_val); - #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); @@ -1961,6 +1944,13 @@ static int mos7720_startup(struct usb_serial *serial) return ret_val; } #endif + /* start the interrupt urb */ + ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); + if (ret_val) { + dev_err(&dev->dev, "failed to submit interrupt urb: %d\n", + ret_val); + } + /* LSR For Port 1 */ read_mos_reg(serial, 0, MOS7720_LSR, &data); dev_dbg(&dev->dev, "LSR:%x\n", data); @@ -1970,6 +1960,8 @@ static int mos7720_startup(struct usb_serial *serial) static void mos7720_release(struct usb_serial *serial) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); + #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT /* close the parallel port */ @@ -2019,11 +2011,6 @@ static int mos7720_port_probe(struct usb_serial_port *port) if (!mos7720_port) return -ENOMEM; - /* Initialize all port interrupt end point to port 0 int endpoint. - * Our device has only one interrupt endpoint common to all ports. - */ - port->interrupt_in_endpointAddress = - port->serial->port[0]->interrupt_in_endpointAddress; mos7720_port->port = port; usb_set_serial_port_data(port, mos7720_port); @@ -2053,7 +2040,6 @@ static struct usb_serial_driver moschip7720_2port_driver = { .close = mos7720_close, .throttle = mos7720_throttle, .unthrottle = mos7720_unthrottle, - .probe = mos77xx_probe, .attach = mos7720_startup, .release = mos7720_release, .port_probe = mos7720_port_probe, @@ -2067,7 +2053,7 @@ static struct usb_serial_driver moschip7720_2port_driver = { .chars_in_buffer = mos7720_chars_in_buffer, .break_ctl = mos7720_break, .read_bulk_callback = mos7720_bulk_in_callback, - .read_int_callback = NULL /* dynamically assigned in probe() */ + .read_int_callback = mos7720_interrupt_callback, }; static struct usb_serial_driver * const serial_drivers[] = { diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 9a220b8e810f..ea27fb23967a 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -214,7 +214,6 @@ MODULE_DEVICE_TABLE(usb, id_table); struct moschip_port { int port_num; /*Actual port number in the device(1,2,etc) */ - struct urb *write_urb; /* write URB for this port */ struct urb *read_urb; /* read URB for this port */ __u8 shadowLCR; /* last LCR value received */ __u8 shadowMCR; /* last MCR value received */ @@ -1037,9 +1036,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) serial, serial->port[0]->interrupt_in_urb->interval); - /* start interrupt read for mos7840 * - * will continue as long as mos7840 is connected */ - + /* start interrupt read for mos7840 */ response = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); @@ -1186,7 +1183,6 @@ static void mos7840_close(struct usb_serial_port *port) } } - usb_kill_urb(mos7840_port->write_urb); usb_kill_urb(mos7840_port->read_urb); mos7840_port->read_urb_busy = false; @@ -1199,12 +1195,6 @@ static void mos7840_close(struct usb_serial_port *port) } } - if (mos7840_port->write_urb) { - /* if this urb had a transfer buffer already (old tx) free it */ - kfree(mos7840_port->write_urb->transfer_buffer); - usb_free_urb(mos7840_port->write_urb); - } - Data = 0x0; mos7840_set_uart_reg(port, MODEM_CONTROL_REGISTER, Data); @@ -2113,6 +2103,17 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) return mos7840_num_ports; } +static int mos7840_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -2388,6 +2389,7 @@ static struct usb_serial_driver moschip7840_4port_device = { .tiocmset = mos7840_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, + .attach = mos7840_attach, .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index f6c6900bccf0..a180b17d2432 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -38,6 +38,7 @@ static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int omninet_write_room(struct tty_struct *tty); static void omninet_disconnect(struct usb_serial *serial); +static int omninet_attach(struct usb_serial *serial); static int omninet_port_probe(struct usb_serial_port *port); static int omninet_port_remove(struct usb_serial_port *port); @@ -56,6 +57,7 @@ static struct usb_serial_driver zyxel_omninet_device = { .description = "ZyXEL - omni.net lcd plus usb", .id_table = id_table, .num_ports = 1, + .attach = omninet_attach, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, .open = omninet_open, @@ -104,6 +106,17 @@ struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; +static int omninet_attach(struct usb_serial *serial) +{ + /* The second bulk-out endpoint is used for writing. */ + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int omninet_port_probe(struct usb_serial_port *port) { struct omninet_data *od; diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index a4b88bc038b6..b8bf52bf7a94 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -134,6 +134,7 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty); static int oti6858_tiocmget(struct tty_struct *tty); static int oti6858_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); +static int oti6858_attach(struct usb_serial *serial); static int oti6858_port_probe(struct usb_serial_port *port); static int oti6858_port_remove(struct usb_serial_port *port); @@ -158,6 +159,7 @@ static struct usb_serial_driver oti6858_device = { .write_bulk_callback = oti6858_write_bulk_callback, .write_room = oti6858_write_room, .chars_in_buffer = oti6858_chars_in_buffer, + .attach = oti6858_attach, .port_probe = oti6858_port_probe, .port_remove = oti6858_port_remove, }; @@ -324,6 +326,20 @@ static void send_data(struct work_struct *work) usb_serial_port_softint(port); } +static int oti6858_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int oti6858_port_probe(struct usb_serial_port *port) { struct oti6858_private *priv; diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ae682e4eeaef..46fca6b75846 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -220,9 +220,17 @@ static int pl2303_probe(struct usb_serial *serial, static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; + unsigned char num_ports = serial->num_ports; enum pl2303_type type = TYPE_01; unsigned char *buf; + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 659cb8606bd9..5709cc93b083 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -408,16 +408,12 @@ static void qt2_close(struct usb_serial_port *port) { struct usb_serial *serial; struct qt2_port_private *port_priv; - unsigned long flags; int i; serial = port->serial; port_priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&port_priv->urb_lock, flags); usb_kill_urb(port_priv->write_urb); - port_priv->urb_in_use = false; - spin_unlock_irqrestore(&port_priv->urb_lock, flags); /* flush the port transmit buffer */ i = usb_control_msg(serial->dev, diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index ef0dbf0703c5..475e6c31b266 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -154,6 +154,19 @@ static int spcp8x5_probe(struct usb_serial *serial, return 0; } +static int spcp8x5_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int spcp8x5_port_probe(struct usb_serial_port *port) { const struct usb_device_id *id = usb_get_serial_data(port->serial); @@ -477,6 +490,7 @@ static struct usb_serial_driver spcp8x5_device = { .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, .probe = spcp8x5_probe, + .attach = spcp8x5_attach, .port_probe = spcp8x5_port_probe, .port_remove = spcp8x5_port_remove, }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 8db9d071d940..64b85b8dedf3 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -579,6 +579,13 @@ static int ti_startup(struct usb_serial *serial) goto free_tdev; } + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + status = -ENODEV; + goto free_tdev; + } + return 0; free_tdev: diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index af3c7eecff91..16cc18369111 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2109,6 +2109,13 @@ UNUSUAL_DEV( 0x152d, 0x2566, 0x0114, 0x0114, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported-by George Cherian <george.cherian@cavium.com> */ +UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999, + "JMicron", + "JMS56x", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* * Entrega Technologies U1-SC25 (later Xircom PortGear PGSCSI) * and Mac USB Dock USB-SCSI */ diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index be1ee89ee917..36d75c367d22 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -27,6 +27,45 @@ static LIST_HEAD(parent_list); static DEFINE_MUTEX(parent_list_lock); static struct class_compat *mdev_bus_compat_class; +static LIST_HEAD(mdev_list); +static DEFINE_MUTEX(mdev_list_lock); + +struct device *mdev_parent_dev(struct mdev_device *mdev) +{ + return mdev->parent->dev; +} +EXPORT_SYMBOL(mdev_parent_dev); + +void *mdev_get_drvdata(struct mdev_device *mdev) +{ + return mdev->driver_data; +} +EXPORT_SYMBOL(mdev_get_drvdata); + +void mdev_set_drvdata(struct mdev_device *mdev, void *data) +{ + mdev->driver_data = data; +} +EXPORT_SYMBOL(mdev_set_drvdata); + +struct device *mdev_dev(struct mdev_device *mdev) +{ + return &mdev->dev; +} +EXPORT_SYMBOL(mdev_dev); + +struct mdev_device *mdev_from_dev(struct device *dev) +{ + return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL; +} +EXPORT_SYMBOL(mdev_from_dev); + +uuid_le mdev_uuid(struct mdev_device *mdev) +{ + return mdev->uuid; +} +EXPORT_SYMBOL(mdev_uuid); + static int _find_mdev_device(struct device *dev, void *data) { struct mdev_device *mdev; @@ -42,7 +81,7 @@ static int _find_mdev_device(struct device *dev, void *data) return 0; } -static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) +static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) { struct device *dev; @@ -56,9 +95,9 @@ static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) } /* Should be called holding parent_list_lock */ -static struct parent_device *__find_parent_device(struct device *dev) +static struct mdev_parent *__find_parent_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; list_for_each_entry(parent, &parent_list, next) { if (parent->dev == dev) @@ -69,8 +108,8 @@ static struct parent_device *__find_parent_device(struct device *dev) static void mdev_release_parent(struct kref *kref) { - struct parent_device *parent = container_of(kref, struct parent_device, - ref); + struct mdev_parent *parent = container_of(kref, struct mdev_parent, + ref); struct device *dev = parent->dev; kfree(parent); @@ -78,7 +117,7 @@ static void mdev_release_parent(struct kref *kref) } static -inline struct parent_device *mdev_get_parent(struct parent_device *parent) +inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) { if (parent) kref_get(&parent->ref); @@ -86,7 +125,7 @@ inline struct parent_device *mdev_get_parent(struct parent_device *parent) return parent; } -static inline void mdev_put_parent(struct parent_device *parent) +static inline void mdev_put_parent(struct mdev_parent *parent) { if (parent) kref_put(&parent->ref, mdev_release_parent); @@ -95,7 +134,7 @@ static inline void mdev_put_parent(struct parent_device *parent) static int mdev_device_create_ops(struct kobject *kobj, struct mdev_device *mdev) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; ret = parent->ops->create(kobj, mdev); @@ -122,7 +161,7 @@ static int mdev_device_create_ops(struct kobject *kobj, */ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; /* @@ -153,10 +192,10 @@ static int mdev_device_remove_cb(struct device *dev, void *data) * Add device to list of registered parent devices. * Returns a negative value on error, otherwise 0. */ -int mdev_register_device(struct device *dev, const struct parent_ops *ops) +int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) { int ret; - struct parent_device *parent; + struct mdev_parent *parent; /* check for mandatory ops */ if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups) @@ -229,7 +268,7 @@ EXPORT_SYMBOL(mdev_register_device); void mdev_unregister_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; bool force_remove = true; mutex_lock(&parent_list_lock); @@ -266,7 +305,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) { int ret; struct mdev_device *mdev; - struct parent_device *parent; + struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); parent = mdev_get_parent(type->parent); @@ -316,6 +355,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) dev_dbg(&mdev->dev, "MDEV: created\n"); mutex_unlock(&parent->lock); + + mutex_lock(&mdev_list_lock); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + return ret; create_failed: @@ -329,12 +373,30 @@ create_err: int mdev_device_remove(struct device *dev, bool force_remove) { - struct mdev_device *mdev; - struct parent_device *parent; + struct mdev_device *mdev, *tmp; + struct mdev_parent *parent; struct mdev_type *type; int ret; + bool found = false; mdev = to_mdev_device(dev); + + mutex_lock(&mdev_list_lock); + list_for_each_entry(tmp, &mdev_list, next) { + if (tmp == mdev) { + found = true; + break; + } + } + + if (found) + list_del(&mdev->next); + + mutex_unlock(&mdev_list_lock); + + if (!found) + return -ENODEV; + type = to_mdev_type(mdev->type_kobj); parent = mdev->parent; mutex_lock(&parent->lock); @@ -342,6 +404,11 @@ int mdev_device_remove(struct device *dev, bool force_remove) ret = mdev_device_remove_ops(mdev, force_remove); if (ret) { mutex_unlock(&parent->lock); + + mutex_lock(&mdev_list_lock); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + return ret; } @@ -349,7 +416,8 @@ int mdev_device_remove(struct device *dev, bool force_remove) device_unregister(dev); mutex_unlock(&parent->lock); mdev_put_parent(parent); - return ret; + + return 0; } static int __init mdev_init(void) diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index d35097cbf3d7..a9cefd70a705 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -16,10 +16,33 @@ int mdev_bus_register(void); void mdev_bus_unregister(void); +struct mdev_parent { + struct device *dev; + const struct mdev_parent_ops *ops; + struct kref ref; + struct mutex lock; + struct list_head next; + struct kset *mdev_types_kset; + struct list_head type_list; +}; + +struct mdev_device { + struct device dev; + struct mdev_parent *parent; + uuid_le uuid; + void *driver_data; + struct kref ref; + struct list_head next; + struct kobject *type_kobj; +}; + +#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) + struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; - struct parent_device *parent; + struct mdev_parent *parent; struct list_head next; struct attribute_group *group; }; @@ -29,8 +52,8 @@ struct mdev_type { #define to_mdev_type(_kobj) \ container_of(_kobj, struct mdev_type, kobj) -int parent_create_sysfs_files(struct parent_device *parent); -void parent_remove_sysfs_files(struct parent_device *parent); +int parent_create_sysfs_files(struct mdev_parent *parent); +void parent_remove_sysfs_files(struct mdev_parent *parent); int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type); void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type); diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 1a53deb2ee10..802df210929b 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -92,7 +92,7 @@ static struct kobj_type mdev_type_ktype = { .release = mdev_type_release, }; -struct mdev_type *add_mdev_supported_type(struct parent_device *parent, +struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, struct attribute_group *group) { struct mdev_type *type; @@ -158,7 +158,7 @@ static void remove_mdev_supported_type(struct mdev_type *type) kobject_put(&type->kobj); } -static int add_mdev_supported_type_groups(struct parent_device *parent) +static int add_mdev_supported_type_groups(struct mdev_parent *parent) { int i; @@ -183,7 +183,7 @@ static int add_mdev_supported_type_groups(struct parent_device *parent) } /* mdev sysfs functions */ -void parent_remove_sysfs_files(struct parent_device *parent) +void parent_remove_sysfs_files(struct mdev_parent *parent) { struct mdev_type *type, *tmp; @@ -196,7 +196,7 @@ void parent_remove_sysfs_files(struct parent_device *parent) kset_unregister(parent->mdev_types_kset); } -int parent_create_sysfs_files(struct parent_device *parent) +int parent_create_sysfs_files(struct mdev_parent *parent) { int ret; diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index ffc36758cb84..fa848a701b8b 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -27,7 +27,7 @@ static int vfio_mdev_open(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; if (unlikely(!parent->ops->open)) @@ -46,7 +46,7 @@ static int vfio_mdev_open(void *device_data) static void vfio_mdev_release(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (likely(parent->ops->release)) parent->ops->release(mdev); @@ -58,7 +58,7 @@ static long vfio_mdev_unlocked_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->ioctl)) return -EINVAL; @@ -70,7 +70,7 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->read)) return -EINVAL; @@ -82,7 +82,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->write)) return -EINVAL; @@ -93,7 +93,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->mmap)) return -EINVAL; diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index dcd7c2a99618..324c52e3a1a4 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1142,6 +1142,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return ret; vdev->barmap[index] = pci_iomap(pdev, index, 0); + if (!vdev->barmap[index]) { + pci_release_selected_regions(pdev, 1 << index); + return -ENOMEM; + } } vma->vm_private_data = vdev; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 5ffd1d9ad4bd..357243d76f10 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -193,7 +193,10 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, if (!vdev->has_vga) return -EINVAL; - switch (pos) { + if (pos > 0xbfffful) + return -EINVAL; + + switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f3726ba12aa6..9266271a787a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -268,28 +268,38 @@ static void vfio_lock_acct(struct task_struct *task, long npage) { struct vwork *vwork; struct mm_struct *mm; + bool is_current; if (!npage) return; - mm = get_task_mm(task); + is_current = (task->mm == current->mm); + + mm = is_current ? task->mm : get_task_mm(task); if (!mm) - return; /* process exited or nothing to do */ + return; /* process exited */ if (down_write_trylock(&mm->mmap_sem)) { mm->locked_vm += npage; up_write(&mm->mmap_sem); - mmput(mm); + if (!is_current) + mmput(mm); return; } + if (is_current) { + mm = get_task_mm(task); + if (!mm) + return; + } + /* * Couldn't get mmap_sem lock, so must setup to update * mm->locked_vm later. If locked_vm were atomic, we * wouldn't need this silliness */ vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); - if (!vwork) { + if (WARN_ON(!vwork)) { mmput(mm); return; } @@ -393,77 +403,71 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base) { - unsigned long limit; - bool lock_cap = ns_capable(task_active_pid_ns(dma->task)->user_ns, - CAP_IPC_LOCK); - struct mm_struct *mm; - long ret, i = 0, lock_acct = 0; + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); + long ret, pinned = 0, lock_acct = 0; bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; - mm = get_task_mm(dma->task); - if (!mm) + /* This code path is only user initiated */ + if (!current->mm) return -ENODEV; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); if (ret) - goto pin_pg_remote_exit; + return ret; + pinned++; rsvd = is_invalid_reserved_pfn(*pfn_base); - limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; /* * Reserved pages aren't counted against the user, externally pinned * pages are already counted against the user. */ if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && mm->locked_vm + 1 > limit) { + if (!lock_cap && current->mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); - ret = -ENOMEM; - goto pin_pg_remote_exit; + return -ENOMEM; } lock_acct++; } - i++; - if (likely(!disable_hugepages)) { - /* Lock all the consecutive pages from pfn_base */ - for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; i < npage; - i++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - unsigned long pfn = 0; + if (unlikely(disable_hugepages)) + goto out; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, &pfn); - if (ret) - break; + /* Lock all the consecutive pages from pfn_base */ + for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; + pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { + unsigned long pfn = 0; - if (pfn != *pfn_base + i || - rsvd != is_invalid_reserved_pfn(pfn)) { + ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + if (ret) + break; + + if (pfn != *pfn_base + pinned || + rsvd != is_invalid_reserved_pfn(pfn)) { + put_pfn(pfn, dma->prot); + break; + } + + if (!rsvd && !vfio_find_vpfn(dma, iova)) { + if (!lock_cap && + current->mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); break; } - - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && - mm->locked_vm + lock_acct + 1 > limit) { - put_pfn(pfn, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) " - "exceeded\n", __func__, - limit << PAGE_SHIFT); - break; - } - lock_acct++; - } + lock_acct++; } } - vfio_lock_acct(dma->task, lock_acct); - ret = i; +out: + vfio_lock_acct(current, lock_acct); -pin_pg_remote_exit: - mmput(mm); - return ret; + return pinned; } static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, @@ -473,10 +477,10 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, long unlocked = 0, locked = 0; long i; - for (i = 0; i < npage; i++) { + for (i = 0; i < npage; i++, iova += PAGE_SIZE) { if (put_pfn(pfn++, dma->prot)) { unlocked++; - if (vfio_find_vpfn(dma, iova + (i << PAGE_SHIFT))) + if (vfio_find_vpfn(dma, iova)) locked++; } } diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c index 2d3b691f3fc4..038ac6934fe9 100644 --- a/drivers/video/fbdev/cobalt_lcdfb.c +++ b/drivers/video/fbdev/cobalt_lcdfb.c @@ -308,6 +308,11 @@ static int cobalt_lcdfb_probe(struct platform_device *dev) info->screen_size = resource_size(res); info->screen_base = devm_ioremap(&dev->dev, res->start, info->screen_size); + if (!info->screen_base) { + framebuffer_release(info); + return -ENOMEM; + } + info->fbops = &cobalt_lcd_fbops; info->fix = cobalt_lcdfb_fix; info->fix.smem_start = res->start; diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c index 778acf80aacb..85dd20e05726 100644 --- a/drivers/xen/arm-device.c +++ b/drivers/xen/arm-device.c @@ -58,9 +58,13 @@ static int xen_map_device_mmio(const struct resource *resources, xen_pfn_t *gpfns; xen_ulong_t *idxs; int *errs; - struct xen_add_to_physmap_range xatp; for (i = 0; i < count; i++) { + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .space = XENMAPSPACE_dev_mmio + }; + r = &resources[i]; nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) @@ -87,9 +91,7 @@ static int xen_map_device_mmio(const struct resource *resources, idxs[j] = XEN_PFN_DOWN(r->start) + j; } - xatp.domid = DOMID_SELF; xatp.size = nr; - xatp.space = XENMAPSPACE_dev_mmio; set_xen_guest_handle(xatp.gpfns, gpfns); set_xen_guest_handle(xatp.idxs, idxs); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index c03f9c86c7e3..3c41470c7fc4 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -369,8 +369,7 @@ static void evtchn_fifo_resume(void) } ret = init_control_block(cpu, control_block); - if (ret < 0) - BUG(); + BUG_ON(ret < 0); } /* diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index e8c7f09d01be..6890897a6f30 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -125,7 +125,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) while (*new) { struct user_evtchn *this; - this = container_of(*new, struct user_evtchn, node); + this = rb_entry(*new, struct user_evtchn, node); parent = *new; if (this->port < evtchn->port) @@ -157,7 +157,7 @@ static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) while (node) { struct user_evtchn *evtchn; - evtchn = container_of(node, struct user_evtchn, node); + evtchn = rb_entry(node, struct user_evtchn, node); if (evtchn->port < port) node = node->rb_left; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 478fb91e3df2..f905d6eeb048 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -275,6 +275,10 @@ retry: rc = 0; } else rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); + + if (!rc) + swiotlb_set_max_segment(PAGE_SIZE); + return rc; error: if (repeat--) { @@ -392,7 +396,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && - !swiotlb_force) { + (swiotlb_force != SWIOTLB_FORCE)) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed * by the function. */ @@ -552,7 +556,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = xen_phys_to_bus(paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h index e74f9c1fbd80..867a2e425208 100644 --- a/drivers/xen/xenbus/xenbus_comms.h +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -42,7 +42,6 @@ int xb_write(const void *data, unsigned len); int xb_read(void *data, unsigned len); int xb_data_to_read(void); int xb_wait_for_data_to_read(void); -int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; extern enum xenstore_init xen_store_domain_type; diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 6c0ead4be784..79130b310247 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -302,6 +302,29 @@ static void watch_fired(struct xenbus_watch *watch, mutex_unlock(&adap->dev_data->reply_mutex); } +static int xenbus_command_reply(struct xenbus_file_priv *u, + unsigned int msg_type, const char *reply) +{ + struct { + struct xsd_sockmsg hdr; + const char body[16]; + } msg; + int rc; + + msg.hdr = u->u.msg; + msg.hdr.type = msg_type; + msg.hdr.len = strlen(reply) + 1; + if (msg.hdr.len > sizeof(msg.body)) + return -E2BIG; + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &msg, sizeof(msg.hdr) + msg.hdr.len); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + + return rc; +} + static int xenbus_write_transaction(unsigned msg_type, struct xenbus_file_priv *u) { @@ -316,12 +339,12 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } - } else if (msg_type == XS_TRANSACTION_END) { + } else if (u->u.msg.tx_id != 0) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; if (&trans->list == &u->transactions) - return -ESRCH; + return xenbus_command_reply(u, XS_ERROR, "ENOENT"); } reply = xenbus_dev_request_and_reply(&u->u.msg); @@ -372,12 +395,12 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) path = u->u.buffer + sizeof(u->u.msg); token = memchr(path, 0, u->u.msg.len); if (token == NULL) { - rc = -EILSEQ; + rc = xenbus_command_reply(u, XS_ERROR, "EINVAL"); goto out; } token++; if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) { - rc = -EILSEQ; + rc = xenbus_command_reply(u, XS_ERROR, "EINVAL"); goto out; } @@ -411,23 +434,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) } /* Success. Synthesize a reply to say all is OK. */ - { - struct { - struct xsd_sockmsg hdr; - char body[3]; - } __packed reply = { - { - .type = msg_type, - .len = sizeof(reply.body) - }, - "OK" - }; - - mutex_lock(&u->reply_mutex); - rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); - wake_up(&u->read_waitq); - mutex_unlock(&u->reply_mutex); - } + rc = xenbus_command_reply(u, msg_type, "OK"); out: return rc; diff --git a/fs/block_dev.c b/fs/block_dev.c index 6254cee8f8f3..5db5d1340d69 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -328,6 +328,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(inode); + struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ); @@ -353,6 +354,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->multi_bio = false; dio->should_dirty = is_read && (iter->type == ITER_IOVEC); + blk_start_plug(&plug); for (;;) { bio->bi_bdev = bdev; bio->bi_iter.bi_sector = pos >> 9; @@ -394,6 +396,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) submit_bio(bio); bio = bio_alloc(GFP_KERNEL, nr_pages); } + blk_finish_plug(&plug); if (!dio->is_sync) return -EIOCBQUEUED; diff --git a/fs/buffer.c b/fs/buffer.c index d21771fcf7d3..0e87401cf335 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1660,7 +1660,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) head = page_buffers(page); bh = head; do { - if (!buffer_mapped(bh)) + if (!buffer_mapped(bh) || (bh->b_blocknr < block)) goto next; if (bh->b_blocknr >= block + len) break; diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 6eeea1dcba41..95cd4c3b06c3 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -248,7 +248,8 @@ retry: goto out; if (fscrypt_dummy_context_enabled(inode)) { - memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); + memset(raw_key, 0x42, keysize/2); + memset(raw_key+keysize/2, 0x24, keysize - (keysize/2)); goto got_key; } diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 6ed7c2eebeec..d6cd7ea4851d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -179,6 +179,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!parent->i_sb->s_cop->is_encrypted(parent)) return 1; @@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } +static int __dax_invalidate_mapping_entry(struct address_space *mapping, + pgoff_t index, bool trunc) +{ + int ret = 0; + void *entry; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = get_unlocked_mapping_entry(mapping, index, NULL); + if (!entry || !radix_tree_exceptional_entry(entry)) + goto out; + if (!trunc && + (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) + goto out; + radix_tree_delete(page_tree, index); + mapping->nrexceptional--; + ret = 1; +out: + put_unlocked_mapping_entry(mapping, index, entry); + spin_unlock_irq(&mapping->tree_lock); + return ret; +} /* * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * entry to get unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { - void *entry; + int ret = __dax_invalidate_mapping_entry(mapping, index, true); - spin_lock_irq(&mapping->tree_lock); - entry = get_unlocked_mapping_entry(mapping, index, NULL); /* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the @@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * caller has seen exceptional entry for this index, we better find it * at that index as well... */ - if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { - spin_unlock_irq(&mapping->tree_lock); - return 0; - } - radix_tree_delete(&mapping->page_tree, index); + WARN_ON_ONCE(!ret); + return ret; +} + +/* + * Invalidate exceptional DAX entry if easily possible. This handles DAX + * entries for invalidate_inode_pages() so we evict the entry only if we can + * do so without blocking. + */ +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) +{ + int ret = 0; + void *entry, **slot; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = __radix_tree_lookup(page_tree, index, NULL, &slot); + if (!entry || !radix_tree_exceptional_entry(entry) || + slot_locked(mapping, slot)) + goto out; + if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) + goto out; + radix_tree_delete(page_tree, index); mapping->nrexceptional--; + ret = 1; +out: spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, entry, true); + if (ret) + dax_wake_mapping_entry_waiter(mapping, index, entry, true); + return ret; +} - return 1; +/* + * Invalidate exceptional DAX entry if it is clean. + */ +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index) +{ + return __dax_invalidate_mapping_entry(mapping, index, false); } /* @@ -488,15 +539,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * otherwise it will simply fall out of the page cache under memory * pressure without ever having been dirtied. */ -static int dax_load_hole(struct address_space *mapping, void *entry, +static int dax_load_hole(struct address_space *mapping, void **entry, struct vm_fault *vmf) { struct page *page; + int ret; /* Hole page already exists? Return it... */ - if (!radix_tree_exceptional_entry(entry)) { - vmf->page = entry; - return VM_FAULT_LOCKED; + if (!radix_tree_exceptional_entry(*entry)) { + page = *entry; + goto out; } /* This will replace locked radix tree entry with a hole page */ @@ -504,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry, vmf->gfp_mask | __GFP_ZERO); if (!page) return VM_FAULT_OOM; + out: vmf->page = page; - return VM_FAULT_LOCKED; + ret = finish_fault(vmf); + vmf->page = NULL; + *entry = page; + if (!ret) { + /* Grab reference for PTE that is now referencing the page */ + get_page(page); + return VM_FAULT_NOPAGE; + } + return ret; } static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, @@ -934,6 +995,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) return -EIO; + /* + * Write can allocate block for an area which has a hole page mapped + * into page tables. We have to tear down these mappings so that data + * written by write(2) is visible in mmap. + */ + if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + invalidate_inode_pages2_range(inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); + } + while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); struct blk_dax_ctl dax = { 0 }; @@ -992,23 +1064,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) flags |= IOMAP_WRITE; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. We'll eventually need to shift this down even further so that - * we can check if we allocated blocks over a hole first. - */ - if (mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, - (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); @@ -1023,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(dax_iomap_rw); +static int dax_fault_return(int error) +{ + if (error == 0) + return VM_FAULT_NOPAGE; + if (error == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} + /** * dax_iomap_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred @@ -1055,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (pos >= i_size_read(inode)) return VM_FAULT_SIGBUS; - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); - if (IS_ERR(entry)) { - error = PTR_ERR(entry); - goto out; - } - if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; @@ -1071,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); if (error) - goto unlock_entry; + return dax_fault_return(error); if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { - error = -EIO; /* fs corruption? */ + vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ + goto finish_iomap; + } + + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); + if (IS_ERR(entry)) { + vmf_ret = dax_fault_return(PTR_ERR(entry)); goto finish_iomap; } @@ -1096,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } if (error) - goto finish_iomap; + goto error_unlock_entry; __SetPageUptodate(vmf->cow_page); vmf_ret = finish_fault(vmf); if (!vmf_ret) vmf_ret = VM_FAULT_DONE_COW; - goto finish_iomap; + goto unlock_entry; } switch (iomap.type) { @@ -1114,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } error = dax_insert_mapping(mapping, iomap.bdev, sector, PAGE_SIZE, &entry, vma, vmf); + /* -EBUSY is fine, somebody else faulted on the same PTE */ + if (error == -EBUSY) + error = 0; break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { - vmf_ret = dax_load_hole(mapping, entry, vmf); - break; + vmf_ret = dax_load_hole(mapping, &entry, vmf); + goto unlock_entry; } /*FALLTHRU*/ default: @@ -1128,31 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, break; } + error_unlock_entry: + vmf_ret = dax_fault_return(error) | major; + unlock_entry: + put_locked_mapping_entry(mapping, vmf->pgoff, entry); finish_iomap: if (ops->iomap_end) { - if (error || (vmf_ret & VM_FAULT_ERROR)) { - /* keep previous error */ - ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PAGE_SIZE, - PAGE_SIZE, flags, &iomap); - } - } - unlock_entry: - if (vmf_ret != VM_FAULT_LOCKED || error) - put_locked_mapping_entry(mapping, vmf->pgoff, entry); - out: - if (error == -ENOMEM) - return VM_FAULT_OOM | major; - /* -EBUSY is fine, somebody else faulted on the same PTE */ - if (error < 0 && error != -EBUSY) - return VM_FAULT_SIGBUS | major; - if (vmf_ret) { - WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */ - return vmf_ret; + int copied = PAGE_SIZE; + + if (vmf_ret & VM_FAULT_ERROR) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PTE we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } - return VM_FAULT_NOPAGE | major; + return vmf_ret; } EXPORT_SYMBOL_GPL(dax_iomap_fault); @@ -1277,16 +1338,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, goto fallback; /* - * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX - * PMD or a HZP entry. If it can't (because a 4k page is already in - * the tree, for instance), it will return -EEXIST and we just fall - * back to 4k entries. - */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); - if (IS_ERR(entry)) - goto fallback; - - /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way * to look up our filesystem block. @@ -1294,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) - goto unlock_entry; + goto fallback; + if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto finish_iomap; + vmf.pgoff = pgoff; vmf.flags = flags; vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; @@ -1310,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) - goto finish_iomap; + goto unlock_entry; result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, &entry); break; @@ -1319,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, break; } + unlock_entry: + put_locked_mapping_entry(mapping, pgoff, entry); finish_iomap: if (ops->iomap_end) { - if (result == VM_FAULT_FALLBACK) { - ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags, - &iomap); - } else { - error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE, - iomap_flags, &iomap); - if (error) - result = VM_FAULT_FALLBACK; - } + int copied = PMD_SIZE; + + if (result == VM_FAULT_FALLBACK) + copied = 0; + /* + * The fault is done by now and there's no way back (other + * thread may be already happily using PMD we have installed). + * Just ignore error from ->iomap_end since we cannot do much + * with it. + */ + ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, + &iomap); } - unlock_entry: - put_locked_mapping_entry(mapping, pgoff, entry); fallback: if (result == VM_FAULT_FALLBACK) { split_huge_pmd(vma, pmd, address); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0093ea2512a8..f073bfca694b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode, mutex_unlock(&ei->truncate_mutex); goto cleanup; } - } else { - *new = true; } + *new = true; ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b5f184493c57..d663d3d7c81c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -258,7 +258,6 @@ out: static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - EXT4_DATA_TRANS_BLOCKS(sb)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else - result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + } + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { int result; - handle_t *handle = NULL; struct inode *inode = file_inode(vma->vm_file); struct super_block *sb = inode->i_sb; bool write = flags & FAULT_FLAG_WRITE; @@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); - down_read(&EXT4_I(inode)->i_mmap_sem); - handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, - ext4_chunk_trans_blocks(inode, - PMD_SIZE / PAGE_SIZE)); - } else - down_read(&EXT4_I(inode)->i_mmap_sem); - - if (IS_ERR(handle)) - result = VM_FAULT_SIGBUS; - else { - result = dax_iomap_pmd_fault(vma, addr, pmd, flags, - &ext4_iomap_ops); } - - if (write) { - if (!IS_ERR(handle)) - ext4_journal_stop(handle); - up_read(&EXT4_I(inode)->i_mmap_sem); + down_read(&EXT4_I(inode)->i_mmap_sem); + result = dax_iomap_pmd_fault(vma, addr, pmd, flags, + &ext4_iomap_ops); + up_read(&EXT4_I(inode)->i_mmap_sem); + if (write) sb_end_pagefault(sb); - } else - up_read(&EXT4_I(inode)->i_mmap_sem); return result; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d3fea0bd89e2..6043306e8e21 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -510,18 +510,6 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) } } -void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) -{ - assert_spin_locked(&old->lock); - new->inode = old->inode; - new->mnt = old->mnt; - if (old->group) - fsnotify_get_group(old->group); - new->group = old->group; - new->mask = old->mask; - new->free_mark = old->free_mark; -} - /* * Nothing fancy, just initialize lists and locks and counters. */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e5ebc3770460..d346d42c54d1 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -256,6 +256,9 @@ xfs_ag_resv_init( goto out; } + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + pag->pagf_freeblks + pag->pagf_flcount); out: return error; } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 6fb2215f8ff7..50add5272807 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -409,13 +409,14 @@ xfs_refcountbt_calc_size( */ xfs_extlen_t xfs_refcountbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_refc_mxr[0] == 0) return 0; - return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_refcountbt_calc_size(mp, agblocks); } /* @@ -430,22 +431,24 @@ xfs_refcountbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - *ask += xfs_refcountbt_max_size(mp); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_buf_relse(agbp); + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 3be7768bd51a..9db008b955b7 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index de25771764ba..74e5a54bc428 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -550,13 +550,14 @@ xfs_rmapbt_calc_size( */ xfs_extlen_t xfs_rmapbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0; - return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_rmapbt_calc_size(mp, agblocks); } /* @@ -571,25 +572,24 @@ xfs_rmapbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; - xfs_extlen_t pool_len; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - /* Reserve 1% of the AG or enough for 1 block per record. */ - pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); - *ask += pool_len; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_buf_relse(agbp); + /* Reserve 1% of the AG or enough for 1 block per record. */ + *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 2a9ac472fb15..19c08e933049 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 93d12fa2670d..242e8091296d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -631,6 +631,20 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + /* + * If we expanded the last AG, free the per-AG reservation + * so we can reinitialize it with the new size. + */ + if (new) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + error = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (error) + goto out; + } + /* Reserve AG metadata blocks. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index ff4d6311c7f4..70ca4f608321 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1597,7 +1597,8 @@ xfs_inode_free_cowblocks( * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ - if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fe86a668a57e..6e4c7446c3d4 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -526,13 +526,14 @@ xfs_cui_recover( xfs_refcount_finish_one_cleanup(tp, rcur, error); error = xfs_defer_finish(&tp, &dfops, NULL); if (error) - goto abort_error; + goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); +abort_defer: xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 276d3023d60f..de6195e38910 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -396,7 +396,7 @@ max_retries_show( int retries; struct xfs_error_cfg *cfg = to_error_cfg(kobject); - if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER) + if (cfg->max_retries == XFS_ERR_RETRY_FOREVER) retries = -1; else retries = cfg->max_retries; @@ -422,7 +422,7 @@ max_retries_store( return -EINVAL; if (val == -1) - cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; + cfg->max_retries = XFS_ERR_RETRY_FOREVER; else cfg->max_retries = val; return count; diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h index df13637e4017..939869c772b1 100644 --- a/include/asm-generic/asm-prototypes.h +++ b/include/asm-generic/asm-prototypes.h @@ -1,7 +1,13 @@ #include <linux/bitops.h> +#undef __memset extern void *__memset(void *, int, __kernel_size_t); +#undef __memcpy extern void *__memcpy(void *, const void *, __kernel_size_t); +#undef __memmove extern void *__memmove(void *, const void *, __kernel_size_t); +#undef memset extern void *memset(void *, int, __kernel_size_t); +#undef memcpy extern void *memcpy(void *, const void *, __kernel_size_t); +#undef memmove extern void *memmove(void *, const void *, __kernel_size_t); diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index 20d20f681a72..445fc45f4b5b 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -5,6 +5,7 @@ #ifndef _CRYPTO_CHACHA20_H #define _CRYPTO_CHACHA20_H +#include <crypto/skcipher.h> #include <linux/types.h> #include <linux/crypto.h> @@ -18,9 +19,8 @@ struct chacha20_ctx { void chacha20_block(u32 *state, void *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); +int crypto_chacha20_crypt(struct skcipher_request *req); #endif diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 8735979ed341..e42f7063f245 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -66,7 +66,7 @@ struct skcipher_walk { int flags; unsigned int blocksize; - unsigned int chunksize; + unsigned int stride; unsigned int alignmask; }; diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 750b14f1ada4..562001cb412b 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -115,6 +115,9 @@ struct crypto_skcipher { * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. + * @walksize: Equal to the chunk size except in cases where the algorithm is + * considerably more efficient if it can operate on multiple chunks + * in parallel. Should be a multiple of chunksize. * @base: Definition of a generic crypto algorithm. * * All fields except @ivsize are mandatory and must be filled. @@ -131,6 +134,7 @@ struct skcipher_alg { unsigned int max_keysize; unsigned int ivsize; unsigned int chunksize; + unsigned int walksize; struct crypto_alg base; }; @@ -289,6 +293,19 @@ static inline unsigned int crypto_skcipher_alg_chunksize( return alg->chunksize; } +static inline unsigned int crypto_skcipher_alg_walksize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blocksize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_blocksize; + + return alg->walksize; +} + /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle @@ -307,6 +324,23 @@ static inline unsigned int crypto_skcipher_chunksize( } /** + * crypto_skcipher_walksize() - obtain walk size + * @tfm: cipher handle + * + * In some cases, algorithms can only perform optimally when operating on + * multiple blocks in parallel. This is reflected by the walksize, which + * must be a multiple of the chunksize (or equal if the concern does not + * apply) + * + * Return: walk size in bytes + */ +static inline unsigned int crypto_skcipher_walksize( + struct crypto_skcipher *tfm) +{ + return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm)); +} + +/** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * diff --git a/include/dt-bindings/mfd/tps65217.h b/include/dt-bindings/mfd/tps65217.h deleted file mode 100644 index cafb9e60cf12..000000000000 --- a/include/dt-bindings/mfd/tps65217.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This header provides macros for TI TPS65217 DT bindings. - * - * Copyright (C) 2016 Texas Instruments - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __DT_BINDINGS_TPS65217_H__ -#define __DT_BINDINGS_TPS65217_H__ - -#define TPS65217_IRQ_USB 0 -#define TPS65217_IRQ_AC 1 -#define TPS65217_IRQ_PB 2 - -#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..fddd1a5eb322 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -116,6 +116,7 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) +#define __aligned_largest __attribute__((aligned)) #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) #define __attribute_const__ __attribute__((__const__)) diff --git a/include/linux/dax.h b/include/linux/dax.h index f97bcfe79472..24ad71173995 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); diff --git a/include/linux/filter.h b/include/linux/filter.h index 702314253797..a0934e6c9bab 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0cf34d6cc253..487246546ebe 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -323,8 +323,6 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(str extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); /* find (and take a reference) to a mark associated with group and vfsmount */ extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); -/* copy the values from old into new */ -extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old); /* set the ignored_mask of a mark */ extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask); /* set the mask of a mark (might pin the object into memory */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e0341af6950e..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -146,15 +146,6 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ }; -#define BLK_SCSI_MAX_CMDS (256) -#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) - -struct blk_scsi_cmd_filter { - unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; - unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; - struct kobject kobj; -}; - struct disk_part_tbl { struct rcu_head rcu_head; int len; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 228bd44efa4c..497f2b3a5a62 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -116,6 +116,16 @@ struct st_sensor_bdu { }; /** + * struct st_sensor_das - ST sensor device data alignment selection + * @addr: address of the register. + * @mask: mask to write the das flag for left alignment. + */ +struct st_sensor_das { + u8 addr; + u8 mask; +}; + +/** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt * @addr: address of the register. * @mask_int1: mask to enable/disable IRQ on INT1 pin. @@ -185,6 +195,7 @@ struct st_sensor_transfer_function { * @enable_axis: Enable one or more axis of the sensor. * @fs: Full scale register and full scale list available. * @bdu: Block data update register. + * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. @@ -200,6 +211,7 @@ struct st_sensor_settings { struct st_sensor_axis enable_axis; struct st_sensor_fullscale fs; struct st_sensor_bdu bdu; + struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; bool multi_read_bit; unsigned int bootime; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index ec819e9a115a..b6e048e1045f 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -13,34 +13,10 @@ #ifndef MDEV_H #define MDEV_H -/* Parent device */ -struct parent_device { - struct device *dev; - const struct parent_ops *ops; - - /* internal */ - struct kref ref; - struct mutex lock; - struct list_head next; - struct kset *mdev_types_kset; - struct list_head type_list; -}; - -/* Mediated device */ -struct mdev_device { - struct device dev; - struct parent_device *parent; - uuid_le uuid; - void *driver_data; - - /* internal */ - struct kref ref; - struct list_head next; - struct kobject *type_kobj; -}; +struct mdev_device; /** - * struct parent_ops - Structure to be registered for each parent device to + * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. * * @owner: The module owner. @@ -86,10 +62,9 @@ struct mdev_device { * @mdev: mediated device structure * @vma: vma structure * Parent device that support mediated device should be registered with mdev - * module with parent_ops structure. + * module with mdev_parent_ops structure. **/ - -struct parent_ops { +struct mdev_parent_ops { struct module *owner; const struct attribute_group **dev_attr_groups; const struct attribute_group **mdev_attr_groups; @@ -103,7 +78,7 @@ struct parent_ops { size_t count, loff_t *ppos); ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, size_t count, loff_t *ppos); - ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd, + long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); }; @@ -142,27 +117,22 @@ struct mdev_driver { }; #define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) -#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) - -static inline void *mdev_get_drvdata(struct mdev_device *mdev) -{ - return mdev->driver_data; -} -static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data) -{ - mdev->driver_data = data; -} +extern void *mdev_get_drvdata(struct mdev_device *mdev); +extern void mdev_set_drvdata(struct mdev_device *mdev, void *data); +extern uuid_le mdev_uuid(struct mdev_device *mdev); extern struct bus_type mdev_bus_type; -#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) - extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); extern void mdev_unregister_device(struct device *dev); extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); extern void mdev_unregister_driver(struct mdev_driver *drv); +extern struct device *mdev_parent_dev(struct mdev_device *mdev); +extern struct device *mdev_dev(struct mdev_device *mdev); +extern struct mdev_device *mdev_from_dev(struct device *dev); + #endif /* MDEV_H */ diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ed30d5d713e3..5d81f739aa0a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,6 +31,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 #define VFIO_MINOR 196 diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 93bdb3485192..6533c16e27ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9f489365b3d3..52b437431c6a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1071,11 +1071,6 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -enum { - MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, - MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, -}; - static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ae55361e674..735b36335f29 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -123,7 +123,6 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, - MLX5_REG_MPCNT = 0x9051, }; enum mlx5_dcbx_oper_mode { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57bec544e20a..a852e9db6f0d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; -struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 rx_errors[0x20]; - - u8 tx_errors[0x20]; - - u8 l0_to_recovery_eieos[0x20]; - - u8 l0_to_recovery_ts[0x20]; - - u8 l0_to_recovery_framing[0x20]; - - u8 l0_to_recovery_retrain[0x20]; - - u8 crc_error_dllp[0x20]; - - u8 crc_error_tlp[0x20]; - - u8 reserved_at_140[0x680]; -}; - -struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 time_to_boot_image_start[0x20]; - - u8 time_to_link_image[0x20]; - - u8 calibration_time[0x20]; - - u8 time_to_first_perst[0x20]; - - u8 time_to_detect_state[0x20]; - - u8 time_to_l0[0x20]; - - u8 time_to_crs_en[0x20]; - - u8 time_to_plastic_image_start[0x20]; - - u8 time_to_iron_image_start[0x20]; - - u8 perst_handler[0x20]; - - u8 times_in_l1[0x20]; - - u8 times_in_l23[0x20]; - - u8 dl_down[0x20]; - - u8 config_cycle1usec[0x20]; - - u8 config_cycle2to7usec[0x20]; - - u8 config_cycle_8to15usec[0x20]; - - u8 config_cycle_16_to_63usec[0x20]; - - u8 config_cycle_64usec[0x20]; - - u8 correctable_err_msg_sent[0x20]; - - u8 non_fatal_err_msg_sent[0x20]; - - u8 fatal_err_msg_sent[0x20]; - - u8 reserved_at_2e0[0x4e0]; -}; - struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; -union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { - struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; - struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; - u8 reserved_at_0[0x7c0]; -}; - union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; -struct mlx5_ifc_mpcnt_reg_bits { - u8 reserved_at_0[0x8]; - u8 pcie_index[0x8]; - u8 reserved_at_10[0xa]; - u8 grp[0x6]; - - u8 clr[0x1]; - u8 reserved_at_21[0x1f]; - - union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; -}; - struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; - struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c56b39890a41..6b5818d6de32 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -73,13 +73,13 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ - PG_waiters, /* Page has waiters, check its waitqueue */ PG_error, PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_active, + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5dea8f6440e4..52bda854593b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -306,7 +306,9 @@ void radix_tree_iter_replace(struct radix_tree_root *, void radix_tree_replace_slot(struct radix_tree_root *root, void **slot, void *item); void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node); + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); void radix_tree_clear_tags(struct radix_tree_root *root, diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 183f37c8a5e1..4ee479f2f355 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -9,7 +9,13 @@ struct device; struct page; struct scatterlist; -extern int swiotlb_force; +enum swiotlb_force { + SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ + SWIOTLB_FORCE, /* swiotlb=force */ + SWIOTLB_NO_FORCE, /* swiotlb=noforce */ +}; + +extern enum swiotlb_force swiotlb_force; /* * Maximum allowable number of contiguous slabs to map, @@ -108,11 +114,14 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB extern void __init swiotlb_free(void); +unsigned int swiotlb_max_segment(void); #else static inline void swiotlb_free(void) { } +static inline unsigned int swiotlb_max_segment(void) { return 0; } #endif extern void swiotlb_print_info(void); extern int is_swiotlb_buffer(phys_addr_t paddr); +extern void swiotlb_set_max_segment(unsigned int); #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1b777e..0378e88f6fd3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b4c6b2..6061963cca98 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 7ea4c5e7c448..288c0c54a2b4 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -11,16 +11,16 @@ TRACE_EVENT(swiotlb_bounced, TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size, - int swiotlb_force), + enum swiotlb_force swiotlb_force), TP_ARGS(dev, dev_addr, size, swiotlb_force), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( int, swiotlb_force ) + __string( dev_name, dev_name(dev) ) + __field( u64, dma_mask ) + __field( dma_addr_t, dev_addr ) + __field( size_t, size ) + __field( enum swiotlb_force, swiotlb_force ) ), TP_fast_assign( @@ -37,7 +37,10 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __entry->swiotlb_force ? "swiotlb_force" : "" ) + __print_symbolic(__entry->swiotlb_force, + { SWIOTLB_NORMAL, "NORMAL" }, + { SWIOTLB_FORCE, "FORCE" }, + { SWIOTLB_NO_FORCE, "NO_FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index acc63697a0cc..b2a31a55a612 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -93,6 +93,7 @@ struct usb_ext_prop_desc { * | 0 | magic | LE32 | FUNCTIONFS_DESCRIPTORS_MAGIC_V2 | * | 4 | length | LE32 | length of the whole data chunk | * | 8 | flags | LE32 | combination of functionfs_flags | + * | | eventfd | LE32 | eventfd file descriptor | * | | fs_count | LE32 | number of full-speed descriptors | * | | hs_count | LE32 | number of high-speed descriptors | * | | ss_count | LE32 | number of super-speed descriptors | diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 8b1dde96a0fa..7b44195da81b 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -231,9 +231,11 @@ static void untag_chunk(struct node *p) if (size) new = alloc_chunk(size); + mutex_lock(&entry->group->mark_mutex); spin_lock(&entry->lock); if (chunk->dead || !entry->inode) { spin_unlock(&entry->lock); + mutex_unlock(&entry->group->mark_mutex); if (new) free_chunk(new); goto out; @@ -251,6 +253,7 @@ static void untag_chunk(struct node *p) list_del_rcu(&chunk->hash); spin_unlock(&hash_lock); spin_unlock(&entry->lock); + mutex_unlock(&entry->group->mark_mutex); fsnotify_destroy_mark(entry, audit_tree_group); goto out; } @@ -258,8 +261,8 @@ static void untag_chunk(struct node *p) if (!new) goto Fallback; - fsnotify_duplicate_mark(&new->mark, entry); - if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.inode, NULL, 1)) { + if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode, + NULL, 1)) { fsnotify_put_mark(&new->mark); goto Fallback; } @@ -293,6 +296,7 @@ static void untag_chunk(struct node *p) owner->root = new; spin_unlock(&hash_lock); spin_unlock(&entry->lock); + mutex_unlock(&entry->group->mark_mutex); fsnotify_destroy_mark(entry, audit_tree_group); fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; @@ -309,6 +313,7 @@ Fallback: put_tree(owner); spin_unlock(&hash_lock); spin_unlock(&entry->lock); + mutex_unlock(&entry->group->mark_mutex); out: fsnotify_put_mark(entry); spin_lock(&hash_lock); @@ -386,18 +391,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk_entry = &chunk->mark; + mutex_lock(&old_entry->group->mark_mutex); spin_lock(&old_entry->lock); if (!old_entry->inode) { /* old_entry is being shot, lets just lie */ spin_unlock(&old_entry->lock); + mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(old_entry); free_chunk(chunk); return -ENOENT; } - fsnotify_duplicate_mark(chunk_entry, old_entry); - if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->inode, NULL, 1)) { + if (fsnotify_add_mark_locked(chunk_entry, old_entry->group, + old_entry->inode, NULL, 1)) { spin_unlock(&old_entry->lock); + mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); return -ENOSPC; @@ -413,6 +421,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk->dead = 1; spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); + mutex_unlock(&old_entry->group->mark_mutex); fsnotify_destroy_mark(chunk_entry, audit_tree_group); @@ -445,6 +454,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); + mutex_unlock(&old_entry->group->mark_mutex); fsnotify_destroy_mark(old_entry, audit_tree_group); fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 042fd7e8e030..f75c4d031eeb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1471,6 +1471,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, bool multi_instance) { int cpu, ret = 0; + bool dynstate; if (cpuhp_cb_check(state) || !name) return -EINVAL; @@ -1480,6 +1481,12 @@ int __cpuhp_setup_state(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); + dynstate = state == CPUHP_AP_ONLINE_DYN; + if (ret > 0 && dynstate) { + state = ret; + ret = 0; + } + if (ret || !invoke || !startup) goto out; @@ -1508,7 +1515,7 @@ out: * If the requested state is CPUHP_AP_ONLINE_DYN, return the * dynamically allocated state in case of success. */ - if (!ret && state == CPUHP_AP_ONLINE_DYN) + if (!ret && dynstate) return state; return ret; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6f382e07de77..0b92d605fb69 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -640,6 +640,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, update_node(node, private); } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); } } @@ -666,6 +667,7 @@ static void delete_node(struct radix_tree_root *root, root->rnode = NULL; } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); node = parent; @@ -767,6 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) struct radix_tree_node *old = child; offset = child->offset + 1; child = child->parent; + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(old); if (old == entry_to_node(node)) return; @@ -1824,15 +1827,19 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); * __radix_tree_delete_node - try to free node after clearing a slot * @root: radix tree root * @node: node containing @index + * @update_node: callback for changing leaf nodes + * @private: private data to pass to @update_node * * After clearing the slot at @index in @node from radix tree * rooted at @root, call this function to attempt freeing the * node and shrinking the tree. */ void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node) + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private) { - delete_node(root, node, NULL, NULL); + delete_node(root, node, update_node, private); } /** diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cb1b54ee8527..975b8fc4f1e1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -53,7 +53,7 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -int swiotlb_force; +enum swiotlb_force swiotlb_force; /* * Used to do a quick range check in swiotlb_tbl_unmap_single and @@ -83,6 +83,12 @@ static unsigned int *io_tlb_list; static unsigned int io_tlb_index; /* + * Max segment that we can provide which (if pages are contingous) will + * not be bounced (unless SWIOTLB_FORCE is set). + */ +unsigned int max_segment; + +/* * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ @@ -106,8 +112,12 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) - swiotlb_force = 1; + if (!strcmp(str, "force")) { + swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } return 0; } @@ -120,6 +130,20 @@ unsigned long swiotlb_nr_tbl(void) } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); +unsigned int swiotlb_max_segment(void) +{ + return max_segment; +} +EXPORT_SYMBOL_GPL(swiotlb_max_segment); + +void swiotlb_set_max_segment(unsigned int val) +{ + if (swiotlb_force == SWIOTLB_FORCE) + max_segment = 1; + else + max_segment = rounddown(val, PAGE_SIZE); +} + /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) unsigned long swiotlb_size_or_default(void) @@ -201,6 +225,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); return 0; } @@ -279,6 +304,7 @@ swiotlb_late_init_with_default_size(size_t default_size) rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); if (rc) free_pages((unsigned long)vstart, order); + return rc; } @@ -333,6 +359,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) late_alloc = 1; + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); + return 0; cleanup4: @@ -347,6 +375,7 @@ cleanup2: io_tlb_end = 0; io_tlb_start = 0; io_tlb_nslabs = 0; + max_segment = 0; return -ENOMEM; } @@ -375,6 +404,7 @@ void __init swiotlb_free(void) PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; + max_segment = 0; } int is_swiotlb_buffer(phys_addr_t paddr) @@ -543,8 +573,15 @@ static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); + dma_addr_t start_dma_addr; + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + + start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -721,6 +758,9 @@ static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) { + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. @@ -763,7 +803,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (dma_capable(dev, dev_addr, size) && !swiotlb_force) + if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) return dev_addr; trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); @@ -904,7 +944,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || !dma_capable(hwdev, dev_addr, sg->length)) { phys_addr_t map = map_single(hwdev, sg_phys(sg), sg->length, dir, attrs); diff --git a/mm/filemap.c b/mm/filemap.c index 82f26cde830c..d0e4d1002059 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters, mem); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); diff --git a/mm/memory.c b/mm/memory.c index 7d23b5050248..9f2c15cdb32c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) ret = 0; count_vm_event(THP_FILE_MAPPED); out: - /* - * If we are going to fallback to pte mapping, do a - * withdraw with pmd lock held. - */ - if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) - vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, - vmf->pmd); spin_unlock(vmf->ptl); return ret; } @@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) - goto fault_handled; + return ret; } if (!vmf->pte) { ret = pte_alloc_one_map(vmf); if (ret) - goto fault_handled; + return ret; } /* Re-check under ptl */ - if (unlikely(!pte_none(*vmf->pte))) { - ret = VM_FAULT_NOPAGE; - goto fault_handled; - } + if (unlikely(!pte_none(*vmf->pte))) + return VM_FAULT_NOPAGE; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); @@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, vmf->address, vmf->pte); - ret = 0; -fault_handled: - /* preallocated pagetable is unused: free it */ - if (vmf->prealloc_pte) { - pte_free(vmf->vma->vm_mm, vmf->prealloc_pte); - vmf->prealloc_pte = 0; - } - return ret; + return 0; } @@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf) static int do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; + int ret; /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) - return VM_FAULT_SIGBUS; - if (!(vmf->flags & FAULT_FLAG_WRITE)) - return do_read_fault(vmf); - if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(vmf); - return do_shared_fault(vmf); + ret = VM_FAULT_SIGBUS; + else if (!(vmf->flags & FAULT_FLAG_WRITE)) + ret = do_read_fault(vmf); + else if (!(vma->vm_flags & VM_SHARED)) + ret = do_cow_fault(vmf); + else + ret = do_shared_fault(vmf); + + /* preallocated pagetable is unused: free it */ + if (vmf->prealloc_pte) { + pte_free(vma->vm_mm, vmf->prealloc_pte); + vmf->prealloc_pte = 0; + } + return ret; } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, diff --git a/mm/truncate.c b/mm/truncate.c index fd97f1dbce29..dd7b24e083c5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -24,20 +24,12 @@ #include <linux/rmap.h> #include "internal.h" -static void clear_exceptional_entry(struct address_space *mapping, - pgoff_t index, void *entry) +static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, + void *entry) { struct radix_tree_node *node; void **slot; - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) - return; - - if (dax_mapping(mapping)) { - dax_delete_mapping_entry(mapping, index); - return; - } spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even @@ -55,6 +47,56 @@ unlock: spin_unlock_irq(&mapping->tree_lock); } +/* + * Unconditionally remove exceptional entry. Usually called from truncate path. + */ +static void truncate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + if (dax_mapping(mapping)) { + dax_delete_mapping_entry(mapping, index); + return; + } + clear_shadow_entry(mapping, index, entry); +} + +/* + * Invalidate exceptional entry if easily possible. This handles exceptional + * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and + * clean entries. + */ +static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + +/* + * Invalidate exceptional entry if clean. This handles exceptional entries for + * invalidate_inode_pages2() so for DAX it evicts only clean entries. + */ +static int invalidate_exceptional_entry2(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry_sync(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + /** * do_invalidatepage - invalidate part or all of a page * @page: the page which is affected @@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, } if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + invalidate_exceptional_entry(mapping, index, + page); continue; } @@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + if (!invalidate_exceptional_entry2(mapping, + index, page)) + ret = -EBUSY; continue; } diff --git a/mm/workingset.c b/mm/workingset.c index 241fa5d6b3b2..abb58ffa3c64 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -473,7 +473,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); - __radix_tree_delete_node(&mapping->page_tree, node); + __radix_tree_delete_node(&mapping->page_tree, node, + workingset_update_node, mapping); out_invalid: spin_unlock(&mapping->tree_lock); diff --git a/net/atm/lec.c b/net/atm/lec.c index 019557d0a11d..09cfe87f0a44 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1059,7 +1059,9 @@ static void __exit lane_module_cleanup(void) { int i; +#ifdef CONFIG_PROC_FS remove_proc_entry("lec", atm_proc_root); +#endif deregister_atm_ioctl(&lane_ioctl_ops); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 8e0c0635ee97..fb55327dcfea 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -75,6 +75,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -82,21 +83,41 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); + if (!skb) + goto err; + + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; } + msg = nla_data(nla); + memset(msg, 0, al); + goto out; +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } diff --git a/net/core/filter.c b/net/core/filter.c index e6c412b94dec..1969b3f118c1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -void bpf_warn_invalid_xdp_buffer(void) -{ - WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); -} -EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); - static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6447dc10371..fe4e1531976c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -468,8 +468,9 @@ ip_proto_again: if (hdr->flags & GRE_ACK) offset += sizeof(((struct pptp_gre_header *)0)->ack); - ppp_hdr = skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), _ppp_hdr); + ppp_hdr = __skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), + data, hlen, _ppp_hdr); if (!ppp_hdr) goto out_bad; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 18b5aae99bec..75e3ea7bda08 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3898,6 +3898,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) u32 filter_mask; int err; + if (nlmsg_len(nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); @@ -3947,6 +3950,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; + if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(cb->nlh); filter_mask = ifsm->filter_mask; if (!filter_mask) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3ff8938893ec..eae0332b0e8c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 68d622133f53..5b15459955f8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 57e1405e8282..53ae0c6315ad 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1225,8 +1225,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a82a11747b3f..0fcac8e7a2b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1914,7 +1914,8 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..22cbd61079b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -433,13 +433,6 @@ static struct ctl_table ipv4_table[] = { .extra2 = &tcp_adv_win_scale_max, }, { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f533ada..fe9da4fb96bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 70d0de404197..38122d04fadc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1373,7 +1373,7 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 8938b6ba57a0..3d73278b86ca 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -47,7 +47,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) return (struct l2tp_ip_sock *)sk; } -static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) +static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, + __be32 raddr, int dif, u32 tunnel_id) { struct sock *sk; @@ -61,6 +62,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -71,15 +73,6 @@ found: return sk; } -static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -183,8 +176,8 @@ pass_up: struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), - tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, + inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip_lock); goto discard; @@ -280,7 +273,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ write_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip_lock); ret = -EADDRINUSE; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f092ac441fdd..331ccf5a7bad 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk) static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, + const struct in6_addr *raddr, int dif, u32 tunnel_id) { struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - const struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -72,7 +74,8 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && - (!addr || ipv6_addr_equal(addr, laddr)) && + (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && + (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -83,17 +86,6 @@ found: return sk; } -static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, - struct in6_addr *laddr, - int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -197,8 +189,8 @@ pass_up: struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), - tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip6_lock); goto discard; @@ -330,7 +322,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); write_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip6_lock); err = -EADDRINUSE; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2c21b7039136..0d8b716e509e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3287,7 +3287,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -3351,6 +3351,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2d4c4d3911c0..9c62b6325f7a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; - packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 08aa926cd5cf..2c0a00f7f1b7 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb) * Returns 0 if it encounters a non-vlan or incomplete packet. * Returns 1 after successfully parsing vlan tag. */ -static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, + bool untag_vlan) { struct vlan_head *vh = (struct vlan_head *)skb->data; @@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); key_vh->tpid = vh->tpid; - __skb_pull(skb, sizeof(struct vlan_head)); + if (unlikely(untag_vlan)) { + int offset = skb->data - skb_mac_header(skb); + u16 tci; + int err; + + __skb_push(skb, offset); + err = __skb_vlan_pop(skb, &tci); + __skb_pull(skb, offset); + if (err) + return err; + __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci); + } else { + __skb_pull(skb, sizeof(struct vlan_head)); + } return 1; } @@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) key->eth.vlan.tpid = skb->vlan_proto; } else { /* Parse outer vlan tag in the non-accelerated case. */ - res = parse_vlan_tag(skb, &key->eth.vlan); + res = parse_vlan_tag(skb, &key->eth.vlan, true); if (res <= 0) return res; } /* Parse inner vlan tag. */ - res = parse_vlan_tag(skb, &key->eth.cvlan); + res = parse_vlan_tag(skb, &key->eth.cvlan, false); if (res <= 0) return res; @@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { - /* key_extract assumes that skb->protocol is set-up for - * layer 3 packets which is the case for other callers, - * in particular packets recieved from the network stack. - * Here the correct value can be set from the metadata - * extracted above. - */ - skb->protocol = key->eth.type; - } else { - struct ethhdr *eth; - - skb_reset_mac_header(skb); - eth = eth_hdr(skb); - - /* Normally, setting the skb 'protocol' field would be - * handled by a call to eth_type_trans(), but it assumes - * there's a sending device, which we may not have. - */ - if (eth_proto_is_802_3(eth->h_proto)) - skb->protocol = eth->h_proto; - else - skb->protocol = htons(ETH_P_802_2); - } + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets received from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + * For L2 packet key eth type would be zero. skb protocol + * would be set to correct value later during key-extact. + */ + skb->protocol = key->eth.type; return key_extract(skb, key); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3fbba79a4ef0..1ecdf809b5fa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 333f8e268431..970db7a41684 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -153,10 +153,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, switch (ip_tunnel_info_af(info)) { case AF_INET: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV4_ADDRS; skb_key.enc_ipv4.src = key->u.ipv4.src; skb_key.enc_ipv4.dst = key->u.ipv4.dst; break; case AF_INET6: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV6_ADDRS; skb_key.enc_ipv6.src = key->u.ipv6.src; skb_key.enc_ipv6.dst = key->u.ipv6.dst; break; diff --git a/net/socket.c b/net/socket.c index 8487bf136e5c..a8c2307590b8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -537,7 +537,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); - if (!err) { + if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); sock->sk->sk_uid = iattr->ia_uid; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 333c5dae0072..800caaa699a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error) while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (TIPC_SKB_CB(skb)->bytes_read) { kfree_skb(skb); - } else { - if (!tipc_sk_type_connectionless(sk) && - sk->sk_state != TIPC_DISCONNECTING) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, error); + continue; + } + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); } + tipc_sk_respond(sk, skb, error); } + + if (tipc_sk_type_connectionless(sk)) + return; + if (sk->sk_state != TIPC_DISCONNECTING) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, @@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error) tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - if (!tipc_sk_type_connectionless(sk)) { - tipc_node_remove_conn(net, dnode, tsk->portid); - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - } + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); } } diff --git a/samples/Kconfig b/samples/Kconfig index a6d2a43bbf2e..b124f62ed6cb 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -105,4 +105,11 @@ config SAMPLE_BLACKFIN_GPTIMERS help Build samples of blackfin gptimers sample module. +config SAMPLE_VFIO_MDEV_MTTY + tristate "Build VFIO mtty example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual tty sample driver for use as a VFIO + mediated device + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index e17d66d77f09..86a137e451d9 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ - configfs/ connector/ v4l/ trace_printk/ blackfin/ + configfs/ connector/ v4l/ trace_printk/ blackfin/ \ + vfio-mdev/ diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile index a932edbe38eb..cbbd868a50a8 100644 --- a/samples/vfio-mdev/Makefile +++ b/samples/vfio-mdev/Makefile @@ -1,13 +1 @@ -# -# Makefile for mtty.c file -# -KERNEL_DIR:=/lib/modules/$(shell uname -r)/build - -obj-m:=mtty.o - -modules clean modules_install: - $(MAKE) -C $(KERNEL_DIR) SUBDIRS=$(PWD) $@ - -default: modules - -module: modules +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 6b633a4ea333..1fc57a5093a7 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -164,7 +164,7 @@ static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) struct mdev_state *mds; list_for_each_entry(mds, &mdev_devices_list, next) { - if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0) + if (uuid_le_cmp(mdev_uuid(mds->mdev), uuid) == 0) return mds; } @@ -341,7 +341,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: Fifo level trigger\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } } else { #if defined(DEBUG_INTR) @@ -355,7 +356,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, */ if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RLSI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); break; @@ -374,7 +376,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: IER_THRI write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -445,7 +448,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR_OUT2 write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && @@ -453,7 +456,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR RTS/DTR write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } break; @@ -504,7 +507,8 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, #endif if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_THRI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -734,7 +738,7 @@ int mtty_create(struct kobject *kobj, struct mdev_device *mdev) for (i = 0; i < 2; i++) { snprintf(name, MTTY_STRING_LEN, "%s-%d", - dev_driver_string(mdev->parent->dev), i + 1); + dev_driver_string(mdev_parent_dev(mdev)), i + 1); if (!strcmp(kobj->name, name)) { nr_ports = i + 1; break; @@ -1298,10 +1302,8 @@ static ssize_t sample_mdev_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = to_mdev_device(dev); - - if (mdev) - return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev)); + if (mdev_from_dev(dev)) + return sprintf(buf, "This is MDEV %s\n", dev_name(dev)); return sprintf(buf, "\n"); } @@ -1402,7 +1404,7 @@ struct attribute_group *mdev_type_groups[] = { NULL, }; -struct parent_ops mdev_fops = { +struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, .dev_attr_groups = mtty_dev_groups, .mdev_attr_groups = mdev_dev_groups, @@ -1447,6 +1449,7 @@ static int __init mtty_dev_init(void) if (IS_ERR(mtty_dev.vd_class)) { pr_err("Error: failed to register mtty_dev class\n"); + ret = PTR_ERR(mtty_dev.vd_class); goto failed1; } @@ -1458,7 +1461,8 @@ static int __init mtty_dev_init(void) if (ret) goto failed2; - if (mdev_register_device(&mtty_dev.dev, &mdev_fops) != 0) + ret = mdev_register_device(&mtty_dev.dev, &mdev_fops); + if (ret) goto failed3; mutex_init(&mdev_list_lock); diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 950fd2e64bb7..12262c0cc691 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -39,6 +39,9 @@ #include "hash-map.h" #endif +#if BUILDING_GCC_VERSION >= 7000 +#include "memmodel.h" +#endif #include "emit-rtl.h" #include "debug.h" #include "target.h" @@ -91,6 +94,9 @@ #include "tree-ssa-alias.h" #include "tree-ssa.h" #include "stringpool.h" +#if BUILDING_GCC_VERSION >= 7000 +#include "tree-vrp.h" +#endif #include "tree-ssanames.h" #include "print-tree.h" #include "tree-eh.h" @@ -287,6 +293,22 @@ static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct c return NULL; } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + cgraph_node_ptr alias; + + if (callback(node, data)) + return true; + + for (alias = node->same_body; alias; alias = alias->next) { + if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE) + if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable)) + return true; + } + + return false; +} + #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \ for ((node) = cgraph_first_function_with_gimple_body(); (node); \ (node) = cgraph_next_function_with_gimple_body(node)) @@ -399,6 +421,7 @@ typedef union gimple_statement_d gassign; typedef union gimple_statement_d gcall; typedef union gimple_statement_d gcond; typedef union gimple_statement_d gdebug; +typedef union gimple_statement_d ggoto; typedef union gimple_statement_d gphi; typedef union gimple_statement_d greturn; @@ -452,6 +475,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return stmt; @@ -496,6 +529,14 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt) typedef struct rtx_def rtx_insn; +static inline const char *get_decl_section_name(const_tree decl) +{ + if (DECL_SECTION_NAME(decl) == NULL_TREE) + return NULL; + + return TREE_STRING_POINTER(DECL_SECTION_NAME(decl)); +} + static inline void set_decl_section_name(tree node, const char *value) { if (value) @@ -511,6 +552,7 @@ typedef struct gimple_statement_base gassign; typedef struct gimple_statement_call gcall; typedef struct gimple_statement_base gcond; typedef struct gimple_statement_base gdebug; +typedef struct gimple_statement_base ggoto; typedef struct gimple_statement_phi gphi; typedef struct gimple_statement_base greturn; @@ -564,6 +606,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a<gphi>(stmt); @@ -611,6 +663,11 @@ inline bool is_a_helper<const gassign *>::test(const_gimple gs) #define INSN_DELETED_P(insn) (insn)->deleted() +static inline const char *get_decl_section_name(const_tree decl) +{ + return DECL_SECTION_NAME(decl); +} + /* symtab/cgraph related */ #define debug_cgraph_node(node) (node)->debug() #define cgraph_get_node(decl) cgraph_node::get(decl) @@ -619,6 +676,7 @@ inline bool is_a_helper<const gassign *>::test(const_gimple gs) #define cgraph_n_nodes symtab->cgraph_count #define cgraph_max_uid symtab->cgraph_max_uid #define varpool_get_node(decl) varpool_node::get(decl) +#define dump_varpool_node(file, node) (node)->dump(file) #define cgraph_create_edge(caller, callee, call_stmt, count, freq, nest) \ (caller)->create_edge((callee), (call_stmt), (count), (freq)) @@ -674,6 +732,11 @@ static inline cgraph_node_ptr cgraph_alias_target(cgraph_node_ptr node) return node->get_alias_target(); } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + return node->call_for_symbol_thunks_and_aliases(callback, data, include_overwritable); +} + static inline struct cgraph_node_hook_list *cgraph_add_function_insertion_hook(cgraph_node_hook hook, void *data) { return symtab->add_cgraph_insertion_hook(hook, data); @@ -731,6 +794,13 @@ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree l template <> template <> +inline bool is_a_helper<const ggoto *>::test(const_gimple gs) +{ + return gs->code == GIMPLE_GOTO; +} + +template <> +template <> inline bool is_a_helper<const greturn *>::test(const_gimple gs) { return gs->code == GIMPLE_RETURN; @@ -766,6 +836,16 @@ static inline const gcall *as_a_const_gcall(const_gimple stmt) return as_a<const gcall *>(stmt); } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return as_a<ggoto *>(stmt); +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return as_a<const ggoto *>(stmt); +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a<gphi *>(stmt); @@ -828,4 +908,9 @@ static inline void debug_gimple_stmt(const_gimple s) #define debug_gimple_stmt(s) debug_gimple_stmt(CONST_CAST_GIMPLE(s)) #endif +#if BUILDING_GCC_VERSION >= 7000 +#define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning) \ + get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep) +#endif + #endif diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 12541126575b..8ff203ad4809 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -328,9 +328,9 @@ static enum tree_code get_op(tree *rhs) op = LROTATE_EXPR; /* * This code limits the value of random_const to - * the size of a wide int for the rotation + * the size of a long for the rotation */ - random_const &= HOST_BITS_PER_WIDE_INT - 1; + random_const %= TYPE_PRECISION(long_unsigned_type_node); break; } diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index ee47924aef0d..827161bc269c 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -117,7 +117,7 @@ destroy_stream(struct snd_efw *efw, struct amdtp_stream *stream) conn = &efw->in_conn; amdtp_stream_destroy(stream); - cmp_connection_destroy(&efw->out_conn); + cmp_connection_destroy(conn); } static int diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c index 4ad3bd7fd445..f1657a4e0621 100644 --- a/sound/firewire/tascam/tascam-stream.c +++ b/sound/firewire/tascam/tascam-stream.c @@ -343,7 +343,7 @@ int snd_tscm_stream_init_duplex(struct snd_tscm *tscm) if (err < 0) amdtp_stream_destroy(&tscm->rx_stream); - return 0; + return err; } /* At bus reset, streaming is stopped and some registers are clear. */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9448daff9d8b..7d660ee1d5e8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2230,6 +2230,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1971, "Asus W2JC", ALC882_FIXUP_ASUS_W2JC), SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), + SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), @@ -6983,6 +6984,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 15d1d5c63c3c..c90607ebe155 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -384,6 +384,9 @@ static void snd_complete_urb(struct urb *urb) if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; + if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ @@ -534,6 +537,11 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) alive, ep->ep_num); clear_bit(EP_FLAG_STOPPING, &ep->flags); + ep->data_subs = NULL; + ep->sync_slave = NULL; + ep->retire_data_urb = NULL; + ep->prepare_data_urb = NULL; + return 0; } @@ -912,9 +920,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, /** * snd_usb_endpoint_start: start an snd_usb_endpoint * - * @ep: the endpoint to start - * @can_sleep: flag indicating whether the operation is executed in - * non-atomic context + * @ep: the endpoint to start * * A call to this function will increment the use count of the endpoint. * In case it is not already running, the URBs for this endpoint will be @@ -924,7 +930,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, * * Returns an error if the URB submission failed, 0 in all other cases. */ -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) { int err; unsigned int i; @@ -938,8 +944,6 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) /* just to be sure */ deactivate_urbs(ep, false); - if (can_sleep) - wait_clear_urbs(ep); ep->active_mask = 0; ep->unlink_mask = 0; @@ -1020,10 +1024,6 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) if (--ep->use_count == 0) { deactivate_urbs(ep, false); - ep->data_subs = NULL; - ep->sync_slave = NULL; - ep->retire_data_urb = NULL; - ep->prepare_data_urb = NULL; set_bit(EP_FLAG_STOPPING, &ep->flags); } } diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 6428392d8f62..584f295d7c77 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -18,7 +18,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, struct audioformat *fmt, struct snd_usb_endpoint *sync_ep); -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep); +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep); void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 34c6d4f2c0b6..9aa5b1855481 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -218,7 +218,7 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface, } } -static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) +static int start_endpoints(struct snd_usb_substream *subs) { int err; @@ -231,7 +231,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting data EP @%p\n", ep); ep->data_subs = subs; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags); return err; @@ -260,7 +260,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting sync EP @%p\n", ep); ep->sync_slave = subs->data_endpoint; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags); return err; @@ -850,7 +850,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) /* for playback, submit the URBs now; otherwise, the first hwptr_done * updates for all URBs would happen at the same time when starting */ if (subs->direction == SNDRV_PCM_STREAM_PLAYBACK) - ret = start_endpoints(subs, true); + ret = start_endpoints(subs); unlock: snd_usb_unlock_shutdown(subs->stream->chip); @@ -1666,7 +1666,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream switch (cmd) { case SNDRV_PCM_TRIGGER_START: - err = start_endpoints(subs, false); + err = start_endpoints(subs); if (err < 0) return err; diff --git a/usr/Makefile b/usr/Makefile index 17a513268325..0b87e71c00fc 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -5,8 +5,10 @@ klibcdirs:; PHONY += klibcdirs -suffix_y = $(CONFIG_INITRAMFS_COMPRESSION) -AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/initramfs_data.cpio$(suffix_y)" +suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION)) +datafile_y = initramfs_data.cpio$(suffix_y) +AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)" + # Generate builtin.o based on initramfs_data.o obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o @@ -14,7 +16,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o # initramfs_data.o contains the compressed initramfs_data.cpio image. # The image is included using .incbin, a dependency which is not # tracked automatically. -$(obj)/initramfs_data.o: $(obj)/initramfs_data.cpio$(suffix_y) FORCE +$(obj)/initramfs_data.o: $(obj)/$(datafile_y) FORCE ##### # Generate the initramfs cpio archive @@ -38,10 +40,8 @@ endif quiet_cmd_initfs = GEN $@ cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input) -targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 \ - initramfs_data.cpio.lzma initramfs_data.cpio.xz \ - initramfs_data.cpio.lzo initramfs_data.cpio.lz4 \ - initramfs_data.cpio +targets := $(datafile_y) + # do not try to update files included in initramfs $(deps_initramfs): ; @@ -51,6 +51,6 @@ $(deps_initramfs): klibcdirs # 2) There are changes in which files are included (added or deleted) # 3) If gen_init_cpio are newer than initramfs_data.cpio # 4) arguments to gen_initramfs.sh changes -$(obj)/initramfs_data.cpio$(suffix_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs +$(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d $(call if_changed,initfs) |